diff options
Diffstat (limited to 'gnulib-local/lib/libxml/parser.c')
-rw-r--r-- | gnulib-local/lib/libxml/parser.c | 5533 |
1 files changed, 3847 insertions, 1686 deletions
diff --git a/gnulib-local/lib/libxml/parser.c b/gnulib-local/lib/libxml/parser.c index bd44585..c5741e3 100644 --- a/gnulib-local/lib/libxml/parser.c +++ b/gnulib-local/lib/libxml/parser.c @@ -17,7 +17,7 @@ * parserInternals.c to reduce this file size. * As much as possible the functions are associated with their relative * production in the XML specification. A few productions defining the - * different ranges of character are actually implanted either in + * different ranges of character are actually implanted either in * parserInternals.h or parserInternals.c * The DOM tree build is realized from the default SAX callbacks in * the module SAX.c. @@ -40,6 +40,7 @@ #endif #include <stdlib.h> +#include <limits.h> #include <string.h> #include <stdarg.h> #include <libxml/xmlmemory.h> @@ -79,36 +80,193 @@ #ifdef HAVE_ZLIB_H #include <zlib.h> #endif +#ifdef HAVE_LZMA_H +#include <lzma.h> +#endif + +#include "buf.h" +#include "enc.h" + +static void +xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); + +static xmlParserCtxtPtr +xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, + const xmlChar *base, xmlParserCtxtPtr pctx); + +static void xmlHaltParser(xmlParserCtxtPtr ctxt); + +/************************************************************************ + * * + * Arbitrary limits set in the parser. See XML_PARSE_HUGE * + * * + ************************************************************************/ + +#define XML_PARSER_BIG_ENTITY 1000 +#define XML_PARSER_LOT_ENTITY 5000 + +/* + * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity + * replacement over the size in byte of the input indicates that you have + * and eponential behaviour. A value of 10 correspond to at least 3 entity + * replacement per byte of input. + */ +#define XML_PARSER_NON_LINEAR 10 + +/* + * xmlParserEntityCheck + * + * Function to check non-linear entity expansion behaviour + * This is here to detect and stop exponential linear entity expansion + * This is not a limitation of the parser but a safety + * boundary feature. It can be disabled with the XML_PARSE_HUGE + * parser option. + */ +static int +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, + xmlEntityPtr ent, size_t replacement) +{ + size_t consumed = 0; + + if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) + return (0); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + return (1); + + /* + * This may look absurd but is needed to detect + * entities problems + */ + if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && + (ent->content != NULL) && (ent->checked == 0)) { + unsigned long oldnbent = ctxt->nbentities; + xmlChar *rep; + + ent->checked = 1; + + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; + if (rep != NULL) { + if (xmlStrchr(rep, '<')) + ent->checked |= 1; + xmlFree(rep); + rep = NULL; + } + } + if (replacement != 0) { + if (replacement < XML_MAX_TEXT_LENGTH) + return(0); + + /* + * If the volume of entity copy reaches 10 times the + * amount of parsed data and over the large text threshold + * then that's very likely to be an abuse. + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if (replacement < XML_PARSER_NON_LINEAR * consumed) + return(0); + } else if (size != 0) { + /* + * Do the check based on the replacement size of the entity + */ + if (size < XML_PARSER_BIG_ENTITY) + return(0); + + /* + * A limit on the amount of text data reasonably used + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if ((size < XML_PARSER_NON_LINEAR * consumed) && + (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) + return (0); + } else if (ent != NULL) { + /* + * use the number of parsed entities in the replacement + */ + size = ent->checked / 2; + + /* + * The amount of data parsed counting entities size only once + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + /* + * Check the density of entities for the amount of data + * knowing an entity reference will take at least 3 bytes + */ + if (size * 3 < consumed * XML_PARSER_NON_LINEAR) + return (0); + } else { + /* + * strange we got no data for checking + */ + if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && + (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || + (ctxt->nbentities <= 10000)) + return (0); + } + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return (1); +} /** * xmlParserMaxDepth: * - * arbitrary depth limit for the XML documents that we allow to - * process. This is not a limitation of the parser but a safety - * boundary feature. + * arbitrary depth limit for the XML documents that we allow to + * process. This is not a limitation of the parser but a safety + * boundary feature. It can be disabled with the XML_PARSE_HUGE + * parser option. */ -unsigned int xmlParserMaxDepth = 1024; +unsigned int xmlParserMaxDepth = 256; -#define SAX2 1 + +#define SAX2 1 #define XML_PARSER_BIG_BUFFER_SIZE 300 #define XML_PARSER_BUFFER_SIZE 100 - #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" +/** + * XML_PARSER_CHUNK_SIZE + * + * When calling GROW that's the minimal amount of data + * the parser expected to have received. It is not a hard + * limit but an optimization when reading strings like Names + * It is not strictly needed as long as inputs available characters + * are followed by 0, which should be provided by the I/O level + */ +#define XML_PARSER_CHUNK_SIZE 100 + /* * List of XML prefixed PI allowed by W3C specs */ static const char *xmlW3CPIs[] = { "xml-stylesheet", + "xml-model", NULL }; /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ -xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, - const xmlChar **str); +static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, + const xmlChar **str); static xmlParserErrors xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, @@ -116,6 +274,9 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, void *user_data, int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list); +static int +xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, + const char *encoding); #ifdef LIBXML_LEGACY_ENABLED static void xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, @@ -126,9 +287,12 @@ static xmlParserErrors xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, const xmlChar *string, void *user_data, xmlNodePtr *lst); +static int +xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); + /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -149,14 +313,15 @@ xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, return; if (ctxt != NULL) ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; + if (prefix == NULL) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, - ctxt->errNo, XML_ERR_FATAL, NULL, 0, + XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, (const char *) localname, NULL, NULL, 0, 0, "Attribute %s redefined\n", localname); else __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, - ctxt->errNo, XML_ERR_FATAL, NULL, 0, + XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, (const char *) prefix, (const char *) localname, NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, localname); @@ -179,193 +344,201 @@ static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) { const char *errmsg; + char errstr[129] = ""; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; switch (error) { case XML_ERR_INVALID_HEX_CHARREF: - errmsg = "CharRef: invalid hexadecimal value\n"; + errmsg = "CharRef: invalid hexadecimal value"; break; case XML_ERR_INVALID_DEC_CHARREF: - errmsg = "CharRef: invalid decimal value\n"; + errmsg = "CharRef: invalid decimal value"; break; case XML_ERR_INVALID_CHARREF: - errmsg = "CharRef: invalid value\n"; + errmsg = "CharRef: invalid value"; break; case XML_ERR_INTERNAL_ERROR: errmsg = "internal error"; break; case XML_ERR_PEREF_AT_EOF: - errmsg = "PEReference at end of document\n"; + errmsg = "PEReference at end of document"; break; case XML_ERR_PEREF_IN_PROLOG: - errmsg = "PEReference in prolog\n"; + errmsg = "PEReference in prolog"; break; case XML_ERR_PEREF_IN_EPILOG: - errmsg = "PEReference in epilog\n"; + errmsg = "PEReference in epilog"; break; case XML_ERR_PEREF_NO_NAME: - errmsg = "PEReference: no name\n"; + errmsg = "PEReference: no name"; break; case XML_ERR_PEREF_SEMICOL_MISSING: - errmsg = "PEReference: expecting ';'\n"; + errmsg = "PEReference: expecting ';'"; break; case XML_ERR_ENTITY_LOOP: - errmsg = "Detected an entity reference loop\n"; + errmsg = "Detected an entity reference loop"; break; case XML_ERR_ENTITY_NOT_STARTED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ENTITY_PE_INTERNAL: - errmsg = "PEReferences forbidden in internal subset\n"; + errmsg = "PEReferences forbidden in internal subset"; break; case XML_ERR_ENTITY_NOT_FINISHED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ATTRIBUTE_NOT_STARTED: - errmsg = "AttValue: \" or ' expected\n"; + errmsg = "AttValue: \" or ' expected"; break; case XML_ERR_LT_IN_ATTRIBUTE: - errmsg = "Unescaped '<' not allowed in attributes values\n"; + errmsg = "Unescaped '<' not allowed in attributes values"; break; case XML_ERR_LITERAL_NOT_STARTED: - errmsg = "SystemLiteral \" or ' expected\n"; + errmsg = "SystemLiteral \" or ' expected"; break; case XML_ERR_LITERAL_NOT_FINISHED: - errmsg = "Unfinished System or Public ID \" or ' expected\n"; + errmsg = "Unfinished System or Public ID \" or ' expected"; break; case XML_ERR_MISPLACED_CDATA_END: - errmsg = "Sequence ']]>' not allowed in content\n"; + errmsg = "Sequence ']]>' not allowed in content"; break; case XML_ERR_URI_REQUIRED: - errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; + errmsg = "SYSTEM or PUBLIC, the URI is missing"; break; case XML_ERR_PUBID_REQUIRED: - errmsg = "PUBLIC, the Public Identifier is missing\n"; + errmsg = "PUBLIC, the Public Identifier is missing"; break; case XML_ERR_HYPHEN_IN_COMMENT: - errmsg = "Comment must not contain '--' (double-hyphen)\n"; + errmsg = "Comment must not contain '--' (double-hyphen)"; break; case XML_ERR_PI_NOT_STARTED: - errmsg = "xmlParsePI : no target name\n"; + errmsg = "xmlParsePI : no target name"; break; case XML_ERR_RESERVED_XML_NAME: - errmsg = "Invalid PI name\n"; + errmsg = "Invalid PI name"; break; case XML_ERR_NOTATION_NOT_STARTED: - errmsg = "NOTATION: Name expected here\n"; + errmsg = "NOTATION: Name expected here"; break; case XML_ERR_NOTATION_NOT_FINISHED: - errmsg = "'>' required to close NOTATION declaration\n"; + errmsg = "'>' required to close NOTATION declaration"; break; case XML_ERR_VALUE_REQUIRED: - errmsg = "Entity value required\n"; + errmsg = "Entity value required"; break; case XML_ERR_URI_FRAGMENT: errmsg = "Fragment not allowed"; break; case XML_ERR_ATTLIST_NOT_STARTED: - errmsg = "'(' required to start ATTLIST enumeration\n"; + errmsg = "'(' required to start ATTLIST enumeration"; break; case XML_ERR_NMTOKEN_REQUIRED: - errmsg = "NmToken expected in ATTLIST enumeration\n"; + errmsg = "NmToken expected in ATTLIST enumeration"; break; case XML_ERR_ATTLIST_NOT_FINISHED: - errmsg = "')' required to finish ATTLIST enumeration\n"; + errmsg = "')' required to finish ATTLIST enumeration"; break; case XML_ERR_MIXED_NOT_STARTED: - errmsg = "MixedContentDecl : '|' or ')*' expected\n"; + errmsg = "MixedContentDecl : '|' or ')*' expected"; break; case XML_ERR_PCDATA_REQUIRED: - errmsg = "MixedContentDecl : '#PCDATA' expected\n"; + errmsg = "MixedContentDecl : '#PCDATA' expected"; break; case XML_ERR_ELEMCONTENT_NOT_STARTED: - errmsg = "ContentDecl : Name or '(' expected\n"; + errmsg = "ContentDecl : Name or '(' expected"; break; case XML_ERR_ELEMCONTENT_NOT_FINISHED: - errmsg = "ContentDecl : ',' '|' or ')' expected\n"; + errmsg = "ContentDecl : ',' '|' or ')' expected"; break; case XML_ERR_PEREF_IN_INT_SUBSET: errmsg = - "PEReference: forbidden within markup decl in internal subset\n"; + "PEReference: forbidden within markup decl in internal subset"; break; case XML_ERR_GT_REQUIRED: - errmsg = "expected '>'\n"; + errmsg = "expected '>'"; break; case XML_ERR_CONDSEC_INVALID: - errmsg = "XML conditional section '[' expected\n"; + errmsg = "XML conditional section '[' expected"; break; case XML_ERR_EXT_SUBSET_NOT_FINISHED: - errmsg = "Content error in the external subset\n"; + errmsg = "Content error in the external subset"; break; case XML_ERR_CONDSEC_INVALID_KEYWORD: errmsg = - "conditional section INCLUDE or IGNORE keyword expected\n"; + "conditional section INCLUDE or IGNORE keyword expected"; break; case XML_ERR_CONDSEC_NOT_FINISHED: - errmsg = "XML conditional section not closed\n"; + errmsg = "XML conditional section not closed"; break; case XML_ERR_XMLDECL_NOT_STARTED: - errmsg = "Text declaration '<?xml' required\n"; + errmsg = "Text declaration '<?xml' required"; break; case XML_ERR_XMLDECL_NOT_FINISHED: - errmsg = "parsing XML declaration: '?>' expected\n"; + errmsg = "parsing XML declaration: '?>' expected"; break; case XML_ERR_EXT_ENTITY_STANDALONE: - errmsg = "external parsed entities cannot be standalone\n"; + errmsg = "external parsed entities cannot be standalone"; break; case XML_ERR_ENTITYREF_SEMICOL_MISSING: - errmsg = "EntityRef: expecting ';'\n"; + errmsg = "EntityRef: expecting ';'"; break; case XML_ERR_DOCTYPE_NOT_FINISHED: - errmsg = "DOCTYPE improperly terminated\n"; + errmsg = "DOCTYPE improperly terminated"; break; case XML_ERR_LTSLASH_REQUIRED: - errmsg = "EndTag: '</' not found\n"; + errmsg = "EndTag: '</' not found"; break; case XML_ERR_EQUAL_REQUIRED: - errmsg = "expected '='\n"; + errmsg = "expected '='"; break; case XML_ERR_STRING_NOT_CLOSED: - errmsg = "String not closed expecting \" or '\n"; + errmsg = "String not closed expecting \" or '"; break; case XML_ERR_STRING_NOT_STARTED: - errmsg = "String not started expecting ' or \"\n"; + errmsg = "String not started expecting ' or \""; break; case XML_ERR_ENCODING_NAME: - errmsg = "Invalid XML encoding name\n"; + errmsg = "Invalid XML encoding name"; break; case XML_ERR_STANDALONE_VALUE: - errmsg = "standalone accepts only 'yes' or 'no'\n"; + errmsg = "standalone accepts only 'yes' or 'no'"; break; case XML_ERR_DOCUMENT_EMPTY: - errmsg = "Document is empty\n"; + errmsg = "Document is empty"; break; case XML_ERR_DOCUMENT_END: - errmsg = "Extra content at the end of the document\n"; + errmsg = "Extra content at the end of the document"; break; case XML_ERR_NOT_WELL_BALANCED: - errmsg = "chunk is not well balanced\n"; + errmsg = "chunk is not well balanced"; break; case XML_ERR_EXTRA_CONTENT: - errmsg = "extra content at the end of well balanced chunk\n"; + errmsg = "extra content at the end of well balanced chunk"; break; case XML_ERR_VERSION_MISSING: - errmsg = "Malformed declaration expecting version\n"; + errmsg = "Malformed declaration expecting version"; + break; + case XML_ERR_NAME_TOO_LONG: + errmsg = "Name too long use XML_PARSE_HUGE option"; break; #if 0 case: - errmsg = "\n"; + errmsg = ""; break; #endif default: - errmsg = "Unregistered error message\n"; + errmsg = "Unregistered error message"; } + if (info == NULL) + snprintf(errstr, 128, "%s\n", errmsg); + else + snprintf(errstr, 128, "%s: %%s\n", errmsg); if (ctxt != NULL) ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], info); if (ctxt != NULL) { ctxt->wellFormed = 0; @@ -392,7 +565,7 @@ xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, if (ctxt != NULL) ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); + XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); if (ctxt != NULL) { ctxt->wellFormed = 0; if (ctxt->recovery == 0) @@ -415,20 +588,28 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2) { xmlStructuredErrorFunc schannel = NULL; - + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; if ((ctxt != NULL) && (ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) schannel = ctxt->sax->serror; - __xmlRaiseError(schannel, + if (ctxt != NULL) { + __xmlRaiseError(schannel, (ctxt->sax) ? ctxt->sax->warning : NULL, ctxt->userData, ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING, NULL, 0, (const char *) str1, (const char *) str2, NULL, 0, 0, msg, (const char *) str1, (const char *) str2); + } else { + __xmlRaiseError(schannel, NULL, NULL, + ctxt, NULL, XML_FROM_PARSER, error, + XML_ERR_WARNING, NULL, 0, + (const char *) str1, (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); + } } /** @@ -442,7 +623,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, */ static void xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1) + const char *msg, const xmlChar *str1, const xmlChar *str2) { xmlStructuredErrorFunc schannel = NULL; @@ -454,14 +635,20 @@ xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) schannel = ctxt->sax->serror; } - __xmlRaiseError(schannel, + if (ctxt != NULL) { + __xmlRaiseError(schannel, ctxt->vctxt.error, ctxt->vctxt.userData, ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR, NULL, 0, (const char *) str1, - NULL, NULL, 0, 0, - msg, (const char *) str1); - if (ctxt != NULL) { + (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); ctxt->valid = 0; + } else { + __xmlRaiseError(schannel, NULL, NULL, + ctxt, NULL, XML_FROM_DTD, error, + XML_ERR_ERROR, NULL, 0, (const char *) str1, + (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); } } @@ -506,7 +693,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, */ static void xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, int val, + const char *msg, const xmlChar *str1, int val, const xmlChar *str2) { if ((ctxt != NULL) && (ctxt->disableSAX != 0) && @@ -607,9 +794,34 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, ctxt->nsWellFormed = 0; } +/** + * xmlNsWarn + * @ctxt: an XML parser context + * @error: the error number + * @msg: the message + * @info1: extra information string + * @info2: extra information string + * + * Handle a namespace warning error + */ +static void +xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, + const xmlChar * info1, const xmlChar * info2, + const xmlChar * info3) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, + XML_ERR_WARNING, NULL, 0, (const char *) info1, + (const char *) info2, (const char *) info3, 0, 0, msg, + info1, info2, info3); +} + /************************************************************************ * * - * Library wide options * + * Library wide options * * * ************************************************************************/ @@ -806,13 +1018,25 @@ xmlHasFeature(xmlFeature feature) return(1); #else return(0); -#endif +#endif case XML_WITH_ZLIB: #ifdef LIBXML_ZLIB_ENABLED return(1); #else return(0); #endif + case XML_WITH_LZMA: +#ifdef LIBXML_LZMA_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_ICU: +#ifdef LIBXML_ICU_ENABLED + return(1); +#else + return(0); +#endif default: break; } @@ -821,7 +1045,7 @@ xmlHasFeature(xmlFeature feature) /************************************************************************ * * - * SAX2 defaulted attributes handling * + * SAX2 defaulted attributes handling * * * ************************************************************************/ @@ -845,8 +1069,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) { ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); - if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || - (ctxt->str_xml_ns == NULL)) { + if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || + (ctxt->str_xml_ns == NULL)) { xmlErrMemory(ctxt, NULL); } } @@ -856,10 +1080,108 @@ typedef xmlDefAttrs *xmlDefAttrsPtr; struct _xmlDefAttrs { int nbAttrs; /* number of defaulted attributes on that element */ int maxAttrs; /* the size of the array */ - const xmlChar *values[4]; /* array of localname/prefix/values */ + const xmlChar *values[5]; /* array of localname/prefix/values/external */ }; /** + * xmlAttrNormalizeSpace: + * @src: the source string + * @dst: the target string + * + * Normalize the space in non CDATA attribute values: + * If the attribute type is not CDATA, then the XML processor MUST further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by a single space (#x20) character. + * Note that the size of dst need to be at least src, and if one doesn't need + * to preserve dst (and it doesn't come from a dictionary or read-only) then + * passing src as dst is just fine. + * + * Returns a pointer to the normalized value (dst) or NULL if no conversion + * is needed. + */ +static xmlChar * +xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) +{ + if ((src == NULL) || (dst == NULL)) + return(NULL); + + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; + if (dst == src) + return(NULL); + return(dst); +} + +/** + * xmlAttrNormalizeSpace2: + * @src: the source string + * + * Normalize the space in non CDATA attribute values, a slightly more complex + * front end to avoid allocation problems when running on attribute values + * coming from the input. + * + * Returns a pointer to the normalized value (dst) or NULL if no conversion + * is needed. + */ +static const xmlChar * +xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) +{ + int i; + int remove_head = 0; + int need_realloc = 0; + const xmlChar *cur; + + if ((ctxt == NULL) || (src == NULL) || (len == NULL)) + return(NULL); + i = *len; + if (i <= 0) + return(NULL); + + cur = src; + while (*cur == 0x20) { + cur++; + remove_head++; + } + while (*cur != 0) { + if (*cur == 0x20) { + cur++; + if ((*cur == 0x20) || (*cur == 0)) { + need_realloc = 1; + break; + } + } else + cur++; + } + if (need_realloc) { + xmlChar *ret; + + ret = xmlStrndup(src + remove_head, i - remove_head + 1); + if (ret == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + xmlAttrNormalizeSpace(ret, ret); + *len = (int) strlen((const char *)ret); + return(ret); + } else if (remove_head) { + *len -= remove_head; + memmove(src, src + remove_head, 1 + *len); + return(src); + } + return(NULL); +} + +/** * xmlAddDefAttrs: * @ctxt: an XML parser context * @fullname: the element fullname @@ -878,6 +1200,14 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, const xmlChar *name; const xmlChar *prefix; + /* + * Allows to detect attribute redefinitions + */ + if (ctxt->attsSpecial != NULL) { + if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) + return; + } + if (ctxt->attsDefault == NULL) { ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); if (ctxt->attsDefault == NULL) @@ -903,22 +1233,30 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); if (defaults == NULL) { defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + - (4 * 4) * sizeof(const xmlChar *)); + (4 * 5) * sizeof(const xmlChar *)); if (defaults == NULL) goto mem_error; defaults->nbAttrs = 0; defaults->maxAttrs = 4; - xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); + if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, + defaults, NULL) < 0) { + xmlFree(defaults); + goto mem_error; + } } else if (defaults->nbAttrs >= defaults->maxAttrs) { xmlDefAttrsPtr temp; temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + - (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); + (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); if (temp == NULL) goto mem_error; defaults = temp; defaults->maxAttrs *= 2; - xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); + if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, + defaults, NULL) < 0) { + xmlFree(defaults); + goto mem_error; + } } /* @@ -934,13 +1272,17 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, prefix = xmlDictLookup(ctxt->dict, fullattr, len); } - defaults->values[4 * defaults->nbAttrs] = name; - defaults->values[4 * defaults->nbAttrs + 1] = prefix; + defaults->values[5 * defaults->nbAttrs] = name; + defaults->values[5 * defaults->nbAttrs + 1] = prefix; /* intern the string and precompute the end */ len = xmlStrlen(value); value = xmlDictLookup(ctxt->dict, value, len); - defaults->values[4 * defaults->nbAttrs + 2] = value; - defaults->values[4 * defaults->nbAttrs + 3] = value + len; + defaults->values[5 * defaults->nbAttrs + 2] = value; + defaults->values[5 * defaults->nbAttrs + 3] = value + len; + if (ctxt->external) + defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; + else + defaults->values[5 * defaults->nbAttrs + 4] = NULL; defaults->nbAttrs++; return; @@ -957,7 +1299,7 @@ mem_error: * @fullattr: the attribute fullname * @type: the attribute type * - * Register that this attribute is not CDATA + * Register this attribute type */ static void xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, @@ -971,6 +1313,9 @@ xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, goto mem_error; } + if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) + return; + xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, (void *) (long) type); return; @@ -981,6 +1326,45 @@ mem_error: } /** + * xmlCleanSpecialAttrCallback: + * + * Removes CDATA attributes from the special attribute table + */ +static void +xmlCleanSpecialAttrCallback(void *payload, void *data, + const xmlChar *fullname, const xmlChar *fullattr, + const xmlChar *unused ATTRIBUTE_UNUSED) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; + + if (((long) payload) == XML_ATTRIBUTE_CDATA) { + xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); + } +} + +/** + * xmlCleanSpecialAttr: + * @ctxt: an XML parser context + * + * Trim the list of attributes defined to remove all those of type + * CDATA as they are not special. This call should be done when finishing + * to parse the DTD and before starting to parse the document root. + */ +static void +xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) +{ + if (ctxt->attsSpecial == NULL) + return; + + xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); + + if (xmlHashSize(ctxt->attsSpecial) == 0) { + xmlHashFree(ctxt->attsSpecial, NULL); + ctxt->attsSpecial = NULL; + } + return; +} + +/** * xmlCheckLanguageID: * @lang: pointer to the string value * @@ -996,70 +1380,192 @@ mem_error: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ * [38] Subcode ::= ([a-z] | [A-Z])+ * + * The current REC reference the sucessors of RFC 1766, currently 5646 + * + * http://www.rfc-editor.org/rfc/rfc5646.txt + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + * language = 2*3ALPHA ; shortest ISO 639 code + * ["-" extlang] ; sometimes followed by + * ; extended language subtags + * / 4ALPHA ; or reserved for future use + * / 5*8ALPHA ; or registered language subtag + * + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + * + * script = 4ALPHA ; ISO 15924 code + * + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + * + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + * + * extension = singleton 1*("-" (2*8alphanum)) + * + * ; Single alphanumerics + * ; "x" reserved for private use + * singleton = DIGIT ; 0 - 9 + * / %x41-57 ; A - W + * / %x59-5A ; Y - Z + * / %x61-77 ; a - w + * / %x79-7A ; y - z + * + * it sounds right to still allow Irregular i-xxx IANA and user codes too + * The parser below doesn't try to cope with extension or privateuse + * that could be added but that's not interoperable anyway + * * Returns 1 if correct 0 otherwise **/ int xmlCheckLanguageID(const xmlChar * lang) { - const xmlChar *cur = lang; + const xmlChar *cur = lang, *nxt; if (cur == NULL) return (0); if (((cur[0] == 'i') && (cur[1] == '-')) || - ((cur[0] == 'I') && (cur[1] == '-'))) { + ((cur[0] == 'I') && (cur[1] == '-')) || + ((cur[0] == 'x') && (cur[1] == '-')) || + ((cur[0] == 'X') && (cur[1] == '-'))) { /* - * IANA code + * Still allow IANA code and user code which were coming + * from the previous version of the XML-1.0 specification + * it's deprecated but we should not fail */ cur += 2; - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; - } else if (((cur[0] == 'x') && (cur[1] == '-')) || - ((cur[0] == 'X') && (cur[1] == '-'))) { - /* - * User code - */ - cur += 2; - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ - ((cur[0] >= 'a') && (cur[0] <= 'z'))) - cur++; - } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || - ((cur[0] >= 'a') && (cur[0] <= 'z'))) { + return(cur[0] == 0); + } + nxt = cur; + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + if (nxt - cur >= 4) { /* - * ISO639 + * Reserved */ - cur++; - if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || - ((cur[0] >= 'a') && (cur[0] <= 'z'))) - cur++; - else - return (0); - } else - return (0); - while (cur[0] != 0) { /* non input consuming */ - if (cur[0] != '-') - return (0); - cur++; - if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || - ((cur[0] >= 'a') && (cur[0] <= 'z'))) - cur++; - else - return (0); - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ - ((cur[0] >= 'a') && (cur[0] <= 'z'))) - cur++; + if ((nxt - cur > 8) || (nxt[0] != 0)) + return(0); + return(1); } + if (nxt - cur < 2) + return(0); + /* we got an ISO 639 code */ + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can have extlang or script or region or variant */ + if ((nxt[0] >= '0') && (nxt[0] <= '9')) + goto region_m49; + + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + if (nxt - cur == 4) + goto script; + if (nxt - cur == 2) + goto region; + if ((nxt - cur >= 5) && (nxt - cur <= 8)) + goto variant; + if (nxt - cur != 3) + return(0); + /* we parsed an extlang */ + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can have script or region or variant */ + if ((nxt[0] >= '0') && (nxt[0] <= '9')) + goto region_m49; + + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + if (nxt - cur == 2) + goto region; + if ((nxt - cur >= 5) && (nxt - cur <= 8)) + goto variant; + if (nxt - cur != 4) + return(0); + /* we parsed a script */ +script: + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can have region or variant */ + if ((nxt[0] >= '0') && (nxt[0] <= '9')) + goto region_m49; + + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + + if ((nxt - cur >= 5) && (nxt - cur <= 8)) + goto variant; + if (nxt - cur != 2) + return(0); + /* we parsed a region */ +region: + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can just have a variant */ + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + + if ((nxt - cur < 5) || (nxt - cur > 8)) + return(0); + + /* we parsed a variant */ +variant: + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + /* extensions and private use subtags not checked */ return (1); + +region_m49: + if (((nxt[1] >= '0') && (nxt[1] <= '9')) && + ((nxt[2] >= '0') && (nxt[2] <= '9'))) { + nxt += 3; + goto region; + } + return(0); } /************************************************************************ * * - * Parser stacks related functions and macros * + * Parser stacks related functions and macros * * * ************************************************************************/ -xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, - const xmlChar ** str); +static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, + const xmlChar ** str); #ifdef SAX2 /** @@ -1078,7 +1584,7 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) { if (ctxt->options & XML_PARSE_NSCLEAN) { int i; - for (i = 0;i < ctxt->nsNr;i += 2) { + for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { if (ctxt->nsTab[i] == prefix) { /* in scope */ if (ctxt->nsTab[i + 1] == URL) @@ -1099,15 +1605,16 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) return (-1); } } else if (ctxt->nsNr >= ctxt->nsMax) { + const xmlChar ** tmp; ctxt->nsMax *= 2; - ctxt->nsTab = (const xmlChar **) - xmlRealloc((char *) ctxt->nsTab, - ctxt->nsMax * sizeof(ctxt->nsTab[0])); - if (ctxt->nsTab == NULL) { + tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, + ctxt->nsMax * sizeof(ctxt->nsTab[0])); + if (tmp == NULL) { xmlErrMemory(ctxt, NULL); ctxt->nsMax /= 2; return (-1); } + ctxt->nsTab = tmp; } ctxt->nsTab[ctxt->nsNr++] = prefix; ctxt->nsTab[ctxt->nsNr++] = URL; @@ -1134,7 +1641,7 @@ nsPop(xmlParserCtxtPtr ctxt, int nr) } if (ctxt->nsNr <= 0) return (0); - + for (i = 0;i < nr;i++) { ctxt->nsNr--; ctxt->nsTab[ctxt->nsNr] = NULL; @@ -1184,13 +1691,13 @@ mem_error: * * Pushes a new parser input on top of the input stack * - * Returns 0 in case of error, the index in the stack otherwise + * Returns -1 in case of error, the index in the stack otherwise */ int inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) { if ((ctxt == NULL) || (value == NULL)) - return(0); + return(-1); if (ctxt->inputNr >= ctxt->inputMax) { ctxt->inputMax *= 2; ctxt->inputTab = @@ -1199,7 +1706,10 @@ inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) sizeof(ctxt->inputTab[0])); if (ctxt->inputTab == NULL) { xmlErrMemory(ctxt, NULL); - return (0); + xmlFreeInputStream(value); + ctxt->inputMax /= 2; + value = NULL; + return (-1); } } ctxt->inputTab[ctxt->inputNr] = value; @@ -1239,7 +1749,7 @@ inputPop(xmlParserCtxtPtr ctxt) * * Pushes a new element node on top of the node stack * - * Returns 0 in case of error, the index in the stack otherwise + * Returns -1 in case of error, the index in the stack otherwise */ int nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) @@ -1253,22 +1763,24 @@ nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) sizeof(ctxt->nodeTab[0])); if (tmp == NULL) { xmlErrMemory(ctxt, NULL); - return (0); + return (-1); } ctxt->nodeTab = tmp; ctxt->nodeMax *= 2; } - if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { + if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, - "Excessive depth in document: change xmlParserMaxDepth = %d\n", + "Excessive depth in document: %d use XML_PARSE_HUGE option\n", xmlParserMaxDepth); - ctxt->instate = XML_PARSER_EOF; - return(0); + xmlHaltParser(ctxt); + return(-1); } ctxt->nodeTab[ctxt->nodeNr] = value; ctxt->node = value; return (ctxt->nodeNr++); } + /** * nodePop: * @ctxt: an XML parser context @@ -1384,15 +1896,14 @@ namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) if (ctxt->nameNr >= ctxt->nameMax) { const xmlChar * *tmp; - ctxt->nameMax *= 2; tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, - ctxt->nameMax * + ctxt->nameMax * 2 * sizeof(ctxt->nameTab[0])); if (tmp == NULL) { - ctxt->nameMax /= 2; goto mem_error; } ctxt->nameTab = tmp; + ctxt->nameMax *= 2; } ctxt->nameTab[ctxt->nameNr] = value; ctxt->name = value; @@ -1428,13 +1939,17 @@ namePop(xmlParserCtxtPtr ctxt) static int spacePush(xmlParserCtxtPtr ctxt, int val) { if (ctxt->spaceNr >= ctxt->spaceMax) { + int *tmp; + ctxt->spaceMax *= 2; - ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, - ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); - if (ctxt->spaceTab == NULL) { + tmp = (int *) xmlRealloc(ctxt->spaceTab, + ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); + if (tmp == NULL) { xmlErrMemory(ctxt, NULL); - return(0); + ctxt->spaceMax /=2; + return(-1); } + ctxt->spaceTab = tmp; } ctxt->spaceTab[ctxt->spaceNr] = val; ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; @@ -1474,7 +1989,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings without newlines within the parser. - * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII + * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII * defined char within the parser. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * @@ -1523,10 +2038,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) { #define SKIPL(val) do { \ int skipl; \ for(skipl=0; skipl<val; skipl++) { \ - if (*(ctxt->input->cur) == '\n') { \ + if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ - } else ctxt->input->col++; \ - ctxt->nbChars++; \ + } else ctxt->input->col++; \ + ctxt->nbChars++; \ ctxt->input->cur++; \ } \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ @@ -1552,8 +2067,25 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) { xmlGROW (ctxt); static void xmlGROW (xmlParserCtxtPtr ctxt) { + unsigned long curEnd = ctxt->input->end - ctxt->input->cur; + unsigned long curBase = ctxt->input->cur - ctxt->input->base; + + if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || + (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + xmlHaltParser(ctxt); + return; + } xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - if ((*ctxt->input->cur == 0) && + if ((ctxt->input->cur > ctxt->input->end) || + (ctxt->input->cur < ctxt->input->base)) { + xmlHaltParser(ctxt); + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); + return; + } + if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) xmlPopInput(ctxt); } @@ -1612,6 +2144,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { while (IS_BLANK_CH(*cur)) { if (*cur == '\n') { ctxt->input->line++; ctxt->input->col = 1; + } else { + ctxt->input->col++; } cur++; res++; @@ -1626,7 +2160,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int cur; do { cur = CUR; - while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ + while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ + (ctxt->instate != XML_PARSER_EOF))) { NEXT; cur = CUR; res++; @@ -1640,7 +2175,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { * Need to handle support of entities branching here */ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); - } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ + } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ + (ctxt->instate != XML_PARSER_EOF)); } return(res); } @@ -1680,10 +2216,12 @@ xmlPopInput(xmlParserCtxtPtr ctxt) { * * xmlPushInput: switch to a new input stream which is stacked on top * of the previous one(s). + * Returns -1 in case of error or the index in the input stack */ -void +int xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { - if (input == NULL) return; + int ret; + if (input == NULL) return(-1); if (xmlParserDebugEntities) { if ((ctxt->input != NULL) && (ctxt->input->filename)) @@ -1693,8 +2231,11 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { xmlGenericError(xmlGenericErrorContext, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); } - inputPush(ctxt, input); + ret = inputPush(ctxt, input); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); GROW; + return(ret); } /** @@ -1708,7 +2249,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error */ @@ -1729,8 +2270,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 16 + (CUR - '0'); else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) val = val * 16 + (CUR - 'a') + 10; @@ -1760,8 +2303,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 10 + (CUR - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -1787,7 +2332,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -1812,7 +2357,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error, str will be * updated to the current value of the index @@ -1831,7 +2376,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { ptr += 3; cur = *ptr; while (cur != ';') { /* Non input consuming loop */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) val = val * 16 + (cur - 'a') + 10; @@ -1854,7 +2399,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { ptr += 2; cur = *ptr; while (cur != ';') { /* Non input consuming loops */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -1878,7 +2423,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -1900,9 +2445,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { * * Returns the new input stream or NULL */ - + static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} - + static xmlParserInputPtr xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; @@ -1924,7 +2469,8 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { buffer = xmlMallocAtomic(length); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); - return(NULL); + xmlFree(input); + return(NULL); } buffer [0] = ' '; buffer [1] = '%'; @@ -1943,12 +2489,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { /** * xmlParserHandlePEReference: * @ctxt: the parser context - * + * * [69] PEReference ::= '%' Name ';' * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -1966,9 +2512,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { * NOTE: misleading but this is handled. * * A PEReference may have been detected in the current input stream - * the handling is done accordingly to + * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc - * i.e. + * i.e. * - Included in literal in entity values * - Included as Parameter Entity reference within DTDs */ @@ -2045,8 +2591,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) + return; if (entity == NULL) { - + /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an @@ -2071,16 +2619,18 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", - name); - } else + name, NULL); + } else xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); ctxt->valid = 0; } + xmlParserEntityCheck(ctxt, 0, NULL, 0); } else if (ctxt->input->free != deallocblankswrapper) { input = xmlNewBlanksWrapperInputStream(ctxt, entity); - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) + return; } else { if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { @@ -2088,14 +2638,32 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlCharEncoding enc; /* + * Note: external parameter entities will not be loaded, it + * is not required for a non-validating parser, unless the + * option of validating, or substituting entities were + * given. Doing so is far more secure as the parser will + * only process data coming from the document entity by + * default. + */ + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + ((ctxt->options & XML_PARSE_NOENT) == 0) && + ((ctxt->options & XML_PARSE_DTDVALID) == 0) && + ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && + ((ctxt->options & XML_PARSE_DTDATTR) == 0) && + (ctxt->replaceEntities == 0) && + (ctxt->validate == 0)) + return; + + /* * handle the extra spaces added before and after * c.f. http://www.w3.org/TR/REC-xml#as-PE * this is done independently. */ input = xmlNewEntityInputStream(ctxt, entity); - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) + return; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -2105,6 +2673,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * the amount of data in the buffer. */ GROW + if (ctxt->instate == XML_PARSER_EOF) + return; if ((ctxt->input->end - ctxt->input->cur)>=4) { start[0] = RAW; start[1] = NXT(1); @@ -2135,14 +2705,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { /* * Macro used to grow the current buffer. + * buffer##_size is expected to be a size_t + * mem_error: is expected to handle memory allocation failures */ -#define growBuffer(buffer) { \ +#define growBuffer(buffer, n) { \ xmlChar *tmp; \ - buffer##_size *= 2; \ - tmp = (xmlChar *) \ - xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + size_t new_size = buffer##_size * 2 + n; \ + if (new_size < buffer##_size) goto mem_error; \ + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ if (tmp == NULL) goto mem_error; \ buffer = tmp; \ + buffer##_size = new_size; \ } /** @@ -2154,7 +2727,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -2168,19 +2741,22 @@ xmlChar * xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; - int buffer_size = 0; + size_t buffer_size = 0; + size_t nbchars = 0; xmlChar *current = NULL; + xmlChar *rep = NULL; const xmlChar *last; xmlEntityPtr ent; int c,l; - int nbchars = 0; if ((ctxt == NULL) || (str == NULL) || (len < 0)) return(NULL); last = str + len; - if (ctxt->depth > 40) { + if (((ctxt->depth > 40) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) || + (ctxt->depth > 1024)) { xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return(NULL); } @@ -2189,7 +2765,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; - buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); + buffer = (xmlChar *) xmlMallocAtomic(buffer_size); if (buffer == NULL) goto mem_error; /* @@ -2209,8 +2785,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { if (xmlParserDebugEntities) @@ -2218,42 +2794,53 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, "String decoding Entity Reference: %.30s\n", str); ent = xmlParseStringEntityRef(ctxt, &str); + if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || + (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) + goto int_error; + xmlParserEntityCheck(ctxt, 0, ent, 0); + if (ent != NULL) + ctxt->nbentities += ent->checked / 2; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else { xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "predefined entity has no content\n"); } } else if ((ent != NULL) && (ent->content != NULL)) { - xmlChar *rep; - ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; + + if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || + (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) + goto int_error; + if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) + goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } xmlFree(rep); + rep = NULL; } } else if (ent != NULL) { int i = xmlStrlen(ent->name); const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; - if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); } for (;i > 0;i--) buffer[nbchars++] = *cur++; @@ -2264,9 +2851,15 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, xmlGenericError(xmlGenericErrorContext, "String decoding PE Reference: %.30s\n", str); ent = xmlParseStringPEReference(ctxt, &str); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + goto int_error; + xmlParserEntityCheck(ctxt, 0, ent, 0); + if (ent != NULL) + ctxt->nbentities += ent->checked / 2; if (ent != NULL) { - xmlChar *rep; - + if (ent->content == NULL) { + xmlLoadEntityContent(ctxt, ent); + } ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); @@ -2275,19 +2868,21 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) + goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } xmlFree(rep); + rep = NULL; } } } else { COPY_BUF(l,buffer,nbchars,c); str += l; - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } if (str < last) @@ -2295,11 +2890,16 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, else c = 0; } - buffer[nbchars++] = 0; + buffer[nbchars] = 0; return(buffer); mem_error: xmlErrMemory(ctxt, NULL); +int_error: + if (rep != NULL) + xmlFree(rep); + if (buffer != NULL) + xmlFree(buffer); return(NULL); } @@ -2311,7 +2911,7 @@ mem_error: * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -2414,7 +3014,7 @@ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, * xmlSplitQName: * @ctxt: an XML parser context * @name: an XML parser context - * @prefix: a xmlChar ** + * @prefix: a xmlChar ** * * parse an UTF8 encoded XML qualified name string * @@ -2465,7 +3065,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { * for the processing speed. */ max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); @@ -2480,7 +3080,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); if (tmp == NULL) { - xmlFree(tmp); + xmlFree(buffer); xmlErrMemory(ctxt, NULL); return(NULL); } @@ -2491,7 +3091,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { } buffer[len] = 0; } - + if ((c == ':') && (*cur == 0)) { if (buffer != NULL) xmlFree(buffer); @@ -2544,7 +3144,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { * for the processing speed. */ max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); @@ -2570,7 +3170,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { } buffer[len] = 0; } - + if (buffer == NULL) ret = xmlStrndup(buf, len); else { @@ -2588,10 +3188,221 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { * * ************************************************************************/ -static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); +/************************************************************************ + * * + * Routines to parse Name, NCName and NmToken * + * * + ************************************************************************/ +#ifdef DEBUG +static unsigned long nbParseName = 0; +static unsigned long nbParseNmToken = 0; +static unsigned long nbParseNCName = 0; +static unsigned long nbParseNCNameComplex = 0; +static unsigned long nbParseNameComplex = 0; +static unsigned long nbParseStringName = 0; +#endif + +/* + * The two following functions are related to the change of accepted + * characters for Name and NmToken in the Revision 5 of XML-1.0 + * They correspond to the modified production [4] and the new production [4a] + * changes in that revision. Also note that the macros used for the + * productions Letter, Digit, CombiningChar and Extender are not needed + * anymore. + * We still keep compatibility to pre-revision5 parsing semantic if the + * new XML_PARSE_OLD10 option is given to the parser. + */ +static int +xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if (IS_LETTER(c) || (c == '_') || (c == ':')) + return(1); + } + return(0); +} + +static int +xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) + return(1); + } + return(0); +} + static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, int normalize); +static const xmlChar * +xmlParseNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + +#ifdef DEBUG + nbParseNameComplex++; +#endif + + /* + * Handler for more complex cases + */ + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!(((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)) + )) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } else { + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + (c != ':'))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } + } + } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } + if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); +} + /** * xmlParseName: * @ctxt: an XML parser context @@ -2616,6 +3427,10 @@ xmlParseName(xmlParserCtxtPtr ctxt) { GROW; +#ifdef DEBUG + nbParseName++; +#endif + /* * Accelerator for simple ASCII names */ @@ -2632,6 +3447,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -2641,9 +3461,136 @@ xmlParseName(xmlParserCtxtPtr ctxt) { return(ret); } } + /* accelerator for special cases */ return(xmlParseNameComplex(ctxt)); } +static const xmlChar * +xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */ + +#ifdef DEBUG + nbParseNCNameComplex++; +#endif + + /* + * Handler for more complex cases + */ + GROW; + end = ctxt->input->cur; + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + (xmlIsNameChar(ctxt, c) && (c != ':'))) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + } + len += l; + NEXTL(l); + end = ctxt->input->cur; + c = CUR_CHAR(l); + if (c == 0) { + count = 0; + /* + * when shrinking to extend the buffer we really need to preserve + * the part of the name we already parsed. Hence rolling back + * by current lenght. + */ + ctxt->input->cur -= l; + GROW; + ctxt->input->cur += l; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + end = ctxt->input->cur; + c = CUR_CHAR(l); + } + } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + return(xmlDictLookup(ctxt->dict, end - len, len)); +} + +/** + * xmlParseNCName: + * @ctxt: an XML parser context + * @len: length of the string parsed + * + * parse an XML name. + * + * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * [5NS] NCName ::= (Letter | '_') (NCNameChar)* + * + * Returns the Name parsed or NULL + */ + +static const xmlChar * +xmlParseNCName(xmlParserCtxtPtr ctxt) { + const xmlChar *in, *e; + const xmlChar *ret; + int count = 0; + +#ifdef DEBUG + nbParseNCName++; +#endif + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->input->cur; + e = ctxt->input->end; + if ((((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_')) && (in < e)) { + in++; + while ((((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == '.')) && (in < e)) + in++; + if (in >= e) + goto complex; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); + ctxt->input->cur = in; + ctxt->nbChars += count; + ctxt->input->col += count; + if (ret == NULL) { + xmlErrMemory(ctxt, NULL); + } + return(ret); + } + } +complex: + return(xmlParseNCNameComplex(ctxt)); +} + /** * xmlParseNameAndCompare: * @ctxt: an XML parser context @@ -2662,15 +3609,17 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { const xmlChar *ret; GROW; - + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { - ++in; + ++in; ++cmp; ctxt->input->col++; } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { - /* success */ + /* success */ ctxt->input->cur = in; return (const xmlChar*) 1; } @@ -2683,42 +3632,6 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { return ret; } -static const xmlChar * -xmlParseNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - - /* - * Handler for more complex cases - */ - GROW; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_') && - (c != ':'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > 100) { - count = 0; - GROW; - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - /** * xmlParseStringName: * @ctxt: an XML parser context @@ -2733,7 +3646,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { * * [6] Names ::= Name (#x20 Name)* * - * Returns the Name parsed or NULL. The @str pointer + * Returns the Name parsed or NULL. The @str pointer * is updated to the current location in the string. */ @@ -2744,17 +3657,19 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { int len = 0, l; int c; +#ifdef DEBUG + nbParseStringName++; +#endif + c = CUR_SCHAR(cur, l); - if (!IS_LETTER(c) && (c != '_') && - (c != ':')) { + if (!xmlIsNameStartChar(ctxt, c)) { return(NULL); } - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + while (xmlIsNameChar(ctxt, c)) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); @@ -2765,21 +3680,23 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { */ xmlChar *buffer; int max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); } memcpy(buffer, buf, len); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || - /* test bigentname.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (len + 10 > max) { xmlChar *tmp; + + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -2799,6 +3716,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { return(buffer); } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } *str = cur; return(xmlStrndup(buf, len)); } @@ -2806,7 +3728,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { /** * xmlParseNmtoken: * @ctxt: an XML parser context - * + * * parse an XML Nmtoken. * * [7] Nmtoken ::= (NameChar)+ @@ -2823,21 +3745,30 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { int c; int count = 0; +#ifdef DEBUG + nbParseNmToken++; +#endif + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { - if (count++ > 100) { + while (xmlIsNameChar(ctxt, c)) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay @@ -2845,25 +3776,31 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { */ xmlChar *buffer; int max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); } memcpy(buffer, buf, len); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { - if (count++ > 100) { + while (xmlIsNameChar(ctxt, c)) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buffer); + return(NULL); + } } if (len + 10 > max) { xmlChar *tmp; + if ((max > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -2884,6 +3821,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } if (len == 0) return(NULL); + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + return(NULL); + } return(xmlStrndup(buf, len)); } @@ -2930,6 +3872,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { ctxt->instate = XML_PARSER_ENTITY_VALUE; input = ctxt->input; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } NEXT; c = CUR_CHAR(l); /* @@ -2937,12 +3883,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { * When a parameter entity reference appears in a literal entity * value, ... a single or double quote character in the replacement * text is always treated as a normal data character and will not - * terminate the literal. + * terminate the literal. * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ - while ((IS_CHAR(c)) && ((c != stop) || /* checked */ - (ctxt->input != input))) { + while (((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { if (len + 5 >= size) { xmlChar *tmp; @@ -2971,6 +3917,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { } } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } /* * Raise problem w.r.t. '&' and '%' being used in non-entities @@ -3018,12 +3968,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { */ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 0, 0, 0); - if (orig != NULL) + if (orig != NULL) *orig = buf; else xmlFree(buf); } - + return(ret); } @@ -3043,8 +3993,9 @@ static xmlChar * xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { xmlChar limit = 0; xmlChar *buf = NULL; - int len = 0; - int buf_size = 0; + xmlChar *rep = NULL; + size_t len = 0; + size_t buf_size = 0; int c, l, in_space = 0; xmlChar *current = NULL; xmlEntityPtr ent; @@ -3061,20 +4012,31 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); return(NULL); } - + /* * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); + buf = (xmlChar *) xmlMallocAtomic(buf_size); if (buf == NULL) goto mem_error; /* * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); - while ((NXT(0) != limit) && /* checked */ - (c != '<')) { + while (((NXT(0) != limit) && /* checked */ + (IS_CHAR(c)) && (c != '<')) && + (ctxt->instate != XML_PARSER_EOF)) { + /* + * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE + * special option is given + */ + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } if (c == 0) break; if (c == '&') { in_space = 0; @@ -3083,8 +4045,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if (val == '&') { if (ctxt->replaceEntities) { - if (len > buf_size - 10) { - growBuffer(buf); + if (len + 10 > buf_size) { + growBuffer(buf, 10); } buf[len++] = '&'; } else { @@ -3092,8 +4054,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c */ - if (len > buf_size - 10) { - growBuffer(buf); + if (len + 10 > buf_size) { + growBuffer(buf, 10); } buf[len++] = '&'; buf[len++] = '#'; @@ -3101,18 +4063,21 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { buf[len++] = '8'; buf[len++] = ';'; } - } else { - if (len > buf_size - 10) { - growBuffer(buf); + } else if (val != 0) { + if (len + 10 > buf_size) { + growBuffer(buf, 10); } len += xmlCopyChar(0, &buf[len], val); } } else { ent = xmlParseEntityRef(ctxt); + ctxt->nbentities++; + if (ent != NULL) + ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (len > buf_size - 10) { - growBuffer(buf); + if (len + 10 > buf_size) { + growBuffer(buf, 10); } if ((ctxt->replaceEntities == 0) && (ent->content[0] == '&')) { @@ -3124,10 +4089,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } else { buf[len++] = ent->content[0]; } - } else if ((ent != NULL) && + } else if ((ent != NULL) && (ctxt->replaceEntities != 0)) { - xmlChar *rep; - if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, @@ -3135,16 +4098,22 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming */ - buf[len++] = *current++; - if (len > buf_size - 10) { - growBuffer(buf); + if ((*current == 0xD) || (*current == 0xA) || + (*current == 0x9)) { + buf[len++] = 0x20; + current++; + } else + buf[len++] = *current++; + if (len + 10 > buf_size) { + growBuffer(buf, 10); } } xmlFree(rep); + rep = NULL; } } else { - if (len > buf_size - 10) { - growBuffer(buf); + if (len + 10 > buf_size) { + growBuffer(buf, 10); } if (ent->content != NULL) buf[len++] = ent->content[0]; @@ -3158,20 +4127,27 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * entities problems */ if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL)) { - xmlChar *rep; + (ent->content != NULL) && (ent->checked == 0)) { + unsigned long oldnbent = ctxt->nbentities; + rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, 0, 0, 0); - if (rep != NULL) + XML_SUBSTITUTE_REF, 0, 0, 0); + + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; + if (rep != NULL) { + if (xmlStrchr(rep, '<')) + ent->checked |= 1; xmlFree(rep); + rep = NULL; + } } /* * Just output the reference */ buf[len++] = '&'; - if (len > buf_size - i - 10) { - growBuffer(buf); + while (len + i + 10 > buf_size) { + growBuffer(buf, i + 10); } for (;i > 0;i--) buf[len++] = *cur++; @@ -3183,8 +4159,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); - if (len > buf_size - 10) { - growBuffer(buf); + while (len + 10 > buf_size) { + growBuffer(buf, 10); } } in_space = 1; @@ -3192,8 +4168,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } else { in_space = 0; COPY_BUF(l,buf,len,c); - if (len > buf_size - 10) { - growBuffer(buf); + if (len + 10 > buf_size) { + growBuffer(buf, 10); } } NEXTL(l); @@ -3201,22 +4177,46 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { GROW; c = CUR_CHAR(l); } + if (ctxt->instate == XML_PARSER_EOF) + goto error; + if ((in_space) && (normalize)) { - while (buf[len - 1] == 0x20) len--; + while ((len > 0) && (buf[len - 1] == 0x20)) len--; } buf[len] = 0; if (RAW == '<') { xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); } else if (RAW != limit) { - xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, - "AttValue: ' expected\n"); + if ((c != 0) && (!IS_CHAR(c))) { + xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, + "invalid character in attribute value\n"); + } else { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue: ' expected\n"); + } } else NEXT; - if (attlen != NULL) *attlen = len; + + /* + * There we potentially risk an overflow, don't allow attribute value of + * length more than INT_MAX it is a very reasonnable assumption ! + */ + if (len >= INT_MAX) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } + + if (attlen != NULL) *attlen = (int) len; return(buf); mem_error: xmlErrMemory(ctxt, NULL); +error: + if (buf != NULL) + xmlFree(buf); + if (rep != NULL) + xmlFree(rep); return(NULL); } @@ -3233,20 +4233,20 @@ mem_error: * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -3263,7 +4263,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { /** * xmlParseSystemLiteral: * @ctxt: an XML parser context - * + * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") @@ -3292,7 +4292,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); return(NULL); } - + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { xmlErrMemory(ctxt, NULL); @@ -3304,6 +4304,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); + xmlFree(buf); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -3318,6 +4325,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -3381,6 +4392,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (len + 1 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); + xmlFree(buf); + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -3395,6 +4412,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } NEXT; cur = CUR; @@ -3414,7 +4435,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { return(buf); } -void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); +static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); /* * used for the test in the inner loop of the char data testing @@ -3465,7 +4486,7 @@ static const unsigned char test_char_data[256] = { * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that - * string is not marking the end of a CDATA section. + * string is not marking the end of a CDATA section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ @@ -3488,7 +4509,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { in = ctxt->input->cur; do { get_more_space: - while (*in == 0x20) in++; + while (*in == 0x20) { in++; ctxt->input->col++; } if (*in == 0xA) { do { ctxt->input->line++; ctxt->input->col = 1; @@ -3578,6 +4599,9 @@ get_more: line = ctxt->input->line; col = ctxt->input->col; } + /* something really bad happened in the SAX callback */ + if (ctxt->instate != XML_PARSER_CONTENT) + return; } ctxt->input->cur = in; if (*in == 0xD) { @@ -3598,6 +4622,8 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; in = ctxt->input->cur; } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); nbchar = 0; @@ -3616,7 +4642,7 @@ get_more: * of xmlParseCharData() when the parsing requires handling * of non-ASCII characters. */ -void +static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; @@ -3627,7 +4653,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ - (cur != '&') && + (cur != '&') && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { @@ -3658,11 +4684,16 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { } } nbchar = 0; + /* something really bad happened in the SAX callback */ + if (ctxt->instate != XML_PARSER_CONTENT) + return; } count++; if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) + return; } NEXTL(l); cur = CUR_CHAR(l); @@ -3755,7 +4786,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { } } else { /* - * We handle [83] so we return immediately, if + * We handle [83] so we return immediately, if * "S SystemLiteral" is not detected. From a purely parsing * point of view that's a nice mess. */ @@ -3764,7 +4795,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { ptr = CUR_PTR; if (!IS_BLANK_CH(*ptr)) return(NULL); - + while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ if ((*ptr != '\'') && (*ptr != '"')) return(NULL); } @@ -3792,12 +4823,15 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */ static void -xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, + size_t len, size_t size) { int q, ql; int r, rl; int cur, l; - xmlParserInputPtr input = ctxt->input; - int count = 0; + size_t count = 0; + int inputid; + + inputid = ctxt->input->id; if (buf == NULL) { len = 0; @@ -3808,13 +4842,28 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { return; } } + GROW; /* Assure there's enough input data */ q = CUR_CHAR(ql); if (q == 0) goto not_terminated; + if (!IS_CHAR(q)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseComment: invalid xmlChar value %d\n", + q); + xmlFree (buf); + return; + } NEXTL(ql); r = CUR_CHAR(rl); if (r == 0) goto not_terminated; + if (!IS_CHAR(r)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseComment: invalid xmlChar value %d\n", + q); + xmlFree (buf); + return; + } NEXTL(rl); cur = CUR_CHAR(l); if (cur == 0) @@ -3825,16 +4874,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { if ((r == '-') && (q == '-')) { xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } if (len + 5 >= size) { xmlChar *new_buf; - size *= 2; - new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size; + + new_size = size * 2; + new_buf = (xmlChar *) xmlRealloc(buf, new_size); if (new_buf == NULL) { xmlFree (buf); xmlErrMemory(ctxt, NULL); return; } buf = new_buf; + size = new_size; } COPY_BUF(ql,buf,len,q); q = r; @@ -3846,6 +4905,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } } NEXTL(l); cur = CUR_CHAR(l); @@ -3856,12 +4919,15 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { } } buf[len] = 0; - if (!IS_CHAR(cur)) { + if (cur == 0) { xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment not terminated \n<!--%.50s\n", buf); - xmlFree(buf); + } else if (!IS_CHAR(cur)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseComment: invalid xmlChar value %d\n", + cur); } else { - if (input != ctxt->input) { + if (inputid != ctxt->input->id) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, "Comment doesn't start and stop in the same entity\n"); } @@ -3869,14 +4935,16 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && (!ctxt->disableSAX)) ctxt->sax->comment(ctxt->userData, buf); - xmlFree(buf); } + xmlFree(buf); return; not_terminated: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment not terminated\n", NULL); xmlFree(buf); + return; } + /** * xmlParseComment: * @ctxt: an XML parser context @@ -3890,20 +4958,22 @@ not_terminated: void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int size = XML_PARSER_BUFFER_SIZE; - int len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; xmlParserInputState state; const xmlChar *in; - int nbchar = 0, ccol; + size_t nbchar = 0; + int ccol; + int inputid; /* * Check that there is a comment right here. */ if ((RAW != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; - state = ctxt->instate; ctxt->instate = XML_PARSER_COMMENT; + inputid = ctxt->input->id; SKIP(4); SHRINK; GROW; @@ -3973,6 +5043,13 @@ get_more: buf[len] = 0; } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } ctxt->input->cur = in; if (*in == 0xA) { in++; @@ -3990,10 +5067,18 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } in = ctxt->input->cur; if (*in == '-') { if (in[1] == '-') { if (in[2] == '>') { + if (ctxt->input->id != inputid) { + xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, + "comment doesn't start and stop in the same entity\n"); + } SKIP(3); if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && (!ctxt->disableSAX)) { @@ -4004,16 +5089,18 @@ get_more: } if (buf != NULL) xmlFree(buf); - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; return; } - if (buf != NULL) - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n<!--%.50s\n", + if (buf != NULL) { + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, + "Double hyphen within comment: " + "<!--%.50s\n", buf); - else - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n", NULL); + } else + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, + "Double hyphen within comment\n", NULL); in++; ctxt->input->col++; } @@ -4031,7 +5118,7 @@ get_more: /** * xmlParsePITarget: * @ctxt: an XML parser context - * + * * parse the name of a PI * * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) @@ -4067,6 +5154,10 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { "xmlParsePITarget: invalid name prefix 'xml'\n", NULL, NULL); } + if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { + xmlNsErr(ctxt, XML_NS_ERR_COLON, + "colons are forbidden from PI names '%s'\n", name, NULL, NULL); + } return(name); } @@ -4075,7 +5166,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { * xmlParseCatalogPI: * @ctxt: an XML parser context * @catalog: the PI value string - * + * * parse an XML Catalog Processing Instruction. * * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> @@ -4135,7 +5226,7 @@ error: /** * xmlParsePI: * @ctxt: an XML parser context - * + * * parse an XML Processing Instruction. * * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' @@ -4146,8 +5237,8 @@ error: void xmlParsePI(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int len = 0; - int size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; int cur, l; const xmlChar *target; xmlParserInputState state; @@ -4183,7 +5274,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { (ctxt->sax->processingInstruction != NULL)) ctxt->sax->processingInstruction(ctxt->userData, target, NULL); - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; return; } buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); @@ -4203,9 +5295,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { ((cur != '?') || (NXT(1) != '>'))) { if (len + 5 >= size) { xmlChar *tmp; - - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size = size * 2; + tmp = (xmlChar *) xmlRealloc(buf, new_size); if (tmp == NULL) { xmlErrMemory(ctxt, NULL); xmlFree(buf); @@ -4213,11 +5304,24 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { return; } buf = tmp; + size = new_size; } count++; if (count > 50) { GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } count = 0; + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, + "PI %s too big found", target); + xmlFree(buf); + ctxt->instate = state; + return; + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -4228,6 +5332,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, + "PI %s too big found", target); + xmlFree(buf); + ctxt->instate = state; + return; + } buf[len] = 0; if (cur != '?') { xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, @@ -4263,7 +5375,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { } else { xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); } - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; } } @@ -4288,7 +5401,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { const xmlChar *name; xmlChar *Pubid; xmlChar *Systemid; - + if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { xmlParserInputPtr input = ctxt->input; SHRINK; @@ -4310,6 +5423,11 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { "Space required after the NOTATION name'\n"); return; } + if (xmlStrchr(name, ':') != NULL) { + xmlNsErr(ctxt, XML_NS_ERR_COLON, + "colons are forbidden from notation names '%s'\n", + name, NULL, NULL); + } SKIP_BLANKS; /* @@ -4366,7 +5484,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { int isParameter = 0; xmlChar *orig = NULL; int skipped; - + /* GROW; done in the caller */ if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { xmlParserInputPtr input = ctxt->input; @@ -4394,6 +5512,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { "xmlParseEntityDecl: no name\n"); return; } + if (xmlStrchr(name, ':') != NULL) { + xmlNsErr(ctxt, XML_NS_ERR_COLON, + "colons are forbidden from entities names '%s'\n", + name, NULL, NULL); + } skipped = SKIP_BLANKS; if (skipped == 0) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, @@ -4465,6 +5588,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { if (ctxt->myDoc == NULL) { ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return; + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if (ctxt->myDoc->intSubset == NULL) ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, @@ -4533,6 +5661,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { if (ctxt->myDoc == NULL) { ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return; + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if (ctxt->myDoc->intSubset == NULL) @@ -4545,10 +5678,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { } } } + if (ctxt->instate == XML_PARSER_EOF) + return; SKIP_BLANKS; if (RAW != '>') { xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, "xmlParseEntityDecl: entity %s not terminated\n", name); + xmlHaltParser(ctxt); } else { if (input != ctxt->input) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, @@ -4608,13 +5744,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { * * [ VC: Fixed Attribute Default ] * if an attribute has a default value declared with the #FIXED - * keyword, instances of that attribute must match the default value. + * keyword, instances of that attribute must match the default value. * * [ WFC: No < in Attribute Values ] * handled in xmlParseAttValue() * * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED - * or XML_ATTRIBUTE_FIXED. + * or XML_ATTRIBUTE_FIXED. */ int @@ -4663,7 +5799,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { * * [ VC: Notation Attributes ] * Values of this type must match one of the notation names included - * in the declaration; all notation names in the declaration must be declared. + * in the declaration; all notation names in the declaration must be declared. * * Returns: the notation attribute tree built while parsing */ @@ -4671,7 +5807,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { xmlEnumerationPtr xmlParseNotationType(xmlParserCtxtPtr ctxt) { const xmlChar *name; - xmlEnumerationPtr ret = NULL, last = NULL, cur; + xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; if (RAW != '(') { xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); @@ -4685,22 +5821,39 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) { if (name == NULL) { xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, "Name expected in NOTATION declaration\n"); - return(ret); + xmlFreeEnumeration(ret); + return(NULL); } - cur = xmlCreateEnumeration(name); - if (cur == NULL) return(ret); - if (last == NULL) ret = last = cur; - else { - last->next = cur; - last = cur; + tmp = ret; + while (tmp != NULL) { + if (xmlStrEqual(name, tmp->name)) { + xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, + "standalone: attribute notation value token %s duplicated\n", + name, NULL); + if (!xmlDictOwns(ctxt->dict, name)) + xmlFree((xmlChar *) name); + break; + } + tmp = tmp->next; + } + if (tmp == NULL) { + cur = xmlCreateEnumeration(name); + if (cur == NULL) { + xmlFreeEnumeration(ret); + return(NULL); + } + if (last == NULL) ret = last = cur; + else { + last->next = cur; + last = cur; + } } SKIP_BLANKS; } while (RAW == '|'); if (RAW != ')') { xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); - if ((last != NULL) && (last != ret)) - xmlFreeEnumeration(last); - return(ret); + xmlFreeEnumeration(ret); + return(NULL); } NEXT; return(ret); @@ -4724,7 +5877,7 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) { xmlEnumerationPtr xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { xmlChar *name; - xmlEnumerationPtr ret = NULL, last = NULL, cur; + xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; if (RAW != '(') { xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); @@ -4739,13 +5892,31 @@ xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); return(ret); } - cur = xmlCreateEnumeration(name); - xmlFree(name); - if (cur == NULL) return(ret); - if (last == NULL) ret = last = cur; - else { - last->next = cur; - last = cur; + tmp = ret; + while (tmp != NULL) { + if (xmlStrEqual(name, tmp->name)) { + xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, + "standalone: attribute enumeration value token %s duplicated\n", + name, NULL); + if (!xmlDictOwns(ctxt->dict, name)) + xmlFree(name); + break; + } + tmp = tmp->next; + } + if (tmp == NULL) { + cur = xmlCreateEnumeration(name); + if (!xmlDictOwns(ctxt->dict, name)) + xmlFree(name); + if (cur == NULL) { + xmlFreeEnumeration(ret); + return(NULL); + } + if (last == NULL) ret = last = cur; + else { + last->next = cur; + last = cur; + } } SKIP_BLANKS; } while (RAW == '|'); @@ -4828,15 +5999,15 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { * [ VC: Entity Name ] * Values of type ENTITY must match the Name production, values * of type ENTITIES must match Names; each Entity Name must match the - * name of an unparsed entity declared in the DTD. + * name of an unparsed entity declared in the DTD. * * [ VC: Name Token ] * Values of type NMTOKEN must match the Nmtoken production; values - * of type NMTOKENS must match Nmtokens. + * of type NMTOKENS must match Nmtokens. * * Returns the attribute type */ -int +int xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { SHRINK; if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { @@ -4901,7 +6072,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { } SKIP_BLANKS; GROW; - while (RAW != '>') { + while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; int type; int def; @@ -4946,6 +6117,8 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { xmlFreeEnumeration(tree); break; } + if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) + xmlAttrNormalizeSpace(defaultValue, defaultValue); GROW; if (RAW != '>') { @@ -4977,11 +6150,11 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { xmlFreeEnumeration(tree); if ((ctxt->sax2) && (defaultValue != NULL) && - (def != XML_ATTRIBUTE_IMPLIED) && + (def != XML_ATTRIBUTE_IMPLIED) && (def != XML_ATTRIBUTE_REQUIRED)) { xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); } - if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) { + if (ctxt->sax2) { xmlAddSpecialAttr(ctxt, elemName, attrName, type); } if (defaultValue != NULL) @@ -4990,8 +6163,9 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { } if (RAW == '>') { if (input != ctxt->input) { - xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, - "Attribute list declaration doesn't start and stop in the same entity\n"); + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "Attribute list declaration doesn't start and stop in the same entity\n", + NULL, NULL); } NEXT; } @@ -5005,7 +6179,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * * parse the declaration for a Mixed Element content * The leading '(' and spaces have been skipped in xmlParseElementContentDecl - * + * * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | * '(' S? '#PCDATA' S? ')' * @@ -5013,7 +6187,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * * [ VC: No Duplicate Types ] * The same name must not appear more than once in a single - * mixed-content declaration. + * mixed-content declaration. * * returns: the list of the xmlElementContentPtr describing the element choices */ @@ -5031,10 +6205,12 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } NEXT; ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); + if (ret == NULL) + return(NULL); if (RAW == '*') { ret->ocur = XML_ELEMENT_CONTENT_MULT; NEXT; @@ -5045,7 +6221,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); if (ret == NULL) return(NULL); } - while (RAW == '|') { + while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { NEXT; if (elem == NULL) { ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); @@ -5083,11 +6259,12 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if (cur->c2 != NULL) cur->c2->parent = cur; } - ret->ocur = XML_ELEMENT_CONTENT_MULT; + if (ret != NULL) + ret->ocur = XML_ELEMENT_CONTENT_MULT; if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } SKIP(2); } else { @@ -5103,13 +6280,14 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { } /** - * xmlParseElementChildrenContentDecl: + * xmlParseElementChildrenContentDeclPriv: * @ctxt: an XML parser context * @inputchk: the input used for the current entity, needed for boundary checks + * @depth: the level of recursion * * parse the declaration for a Mixed Element content * The leading '(' and spaces have been skipped in xmlParseElementContentDecl - * + * * * [47] children ::= (choice | seq) ('?' | '*' | '+')? * @@ -5130,15 +6308,23 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { * be empty, and neither the first nor last non-blank character of * the replacement text should be a connector (| or ,). * - * Returns the tree of xmlElementContentPtr describing the element + * Returns the tree of xmlElementContentPtr describing the element * hierarchy. */ -xmlElementContentPtr -xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { +static xmlElementContentPtr +xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, + int depth) { xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; const xmlChar *elem; xmlChar type = 0; + if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || + (depth > 2048)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, +"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", + depth); + return(NULL); + } SKIP_BLANKS; GROW; if (RAW == '(') { @@ -5147,7 +6333,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { /* Recurse on first child */ NEXT; SKIP_BLANKS; - cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); + cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, + depth + 1); SKIP_BLANKS; GROW; } else { @@ -5178,7 +6365,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { } SKIP_BLANKS; SHRINK; - while (RAW != ')') { + while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { /* * Each loop we parse one separator and one element. */ @@ -5265,6 +6452,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { } } else { xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); + if ((last != NULL) && (last != ret)) + xmlFreeDocElementContent(ctxt->myDoc, last); if (ret != NULL) xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); @@ -5277,7 +6466,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { /* Recurse on second child */ NEXT; SKIP_BLANKS; - last = xmlParseElementChildrenContentDecl(ctxt, inputid); + last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, + depth + 1); SKIP_BLANKS; } else { elem = xmlParseName(ctxt); @@ -5288,6 +6478,11 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { return(NULL); } last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); + if (last == NULL) { + if (ret != NULL) + xmlFreeDocElementContent(ctxt->myDoc, ret); + return(NULL); + } if (RAW == '?') { last->ocur = XML_ELEMENT_CONTENT_OPT; NEXT; @@ -5312,7 +6507,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } NEXT; if (RAW == '?') { @@ -5383,6 +6578,42 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { } /** + * xmlParseElementChildrenContentDecl: + * @ctxt: an XML parser context + * @inputchk: the input used for the current entity, needed for boundary checks + * + * parse the declaration for a Mixed Element content + * The leading '(' and spaces have been skipped in xmlParseElementContentDecl + * + * [47] children ::= (choice | seq) ('?' | '*' | '+')? + * + * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? + * + * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' + * + * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' + * + * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] + * TODO Parameter-entity replacement text must be properly nested + * with parenthesized groups. That is to say, if either of the + * opening or closing parentheses in a choice, seq, or Mixed + * construct is contained in the replacement text for a parameter + * entity, both must be contained in the same replacement text. For + * interoperability, if a parameter-entity reference appears in a + * choice, seq, or Mixed construct, its replacement text should not + * be empty, and neither the first nor last non-blank character of + * the replacement text should be a connector (| or ,). + * + * Returns the tree of xmlElementContentPtr describing the element + * hierarchy. + */ +xmlElementContentPtr +xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { + /* stub left for API/ABI compat */ + return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); +} + +/** * xmlParseElementContentDecl: * @ctxt: an XML parser context * @name: the name of the element being defined. @@ -5390,7 +6621,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { * * parse the declaration for an Element content either Mixed or Children, * the cases EMPTY and ANY are handled directly in xmlParseElementDecl - * + * * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children * * returns: the type of element content XML_ELEMENT_TYPE_xxx @@ -5413,12 +6644,14 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, } NEXT; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(-1); SKIP_BLANKS; if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { tree = xmlParseElementMixedContentDecl(ctxt, inputid); res = XML_ELEMENT_TYPE_MIXED; } else { - tree = xmlParseElementChildrenContentDecl(ctxt, inputid); + tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); res = XML_ELEMENT_TYPE_ELEMENT; } SKIP_BLANKS; @@ -5516,7 +6749,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element declaration doesn't start and stop in the same entity\n"); } - + NEXT; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && (ctxt->sax->elementDecl != NULL)) { @@ -5528,7 +6761,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { /* * this is a trick: if xmlAddElementDecl is called, * instead of copying the full tree it is plugged directly - * if called from the parser. Avoid duplicating the + * if called from the parser. Avoid duplicating the * interfaces or change the API/ABI */ xmlFreeDocElementContent(ctxt->myDoc, content); @@ -5545,8 +6778,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { * xmlParseConditionalSections * @ctxt: an XML parser context * - * [61] conditionalSect ::= includeSect | ignoreSect - * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' + * [61] conditionalSect ::= includeSect | ignoreSect + * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) @@ -5554,6 +6787,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { static void xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { + int id = ctxt->input->id; + SKIP(3); SKIP_BLANKS; if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { @@ -5561,7 +6796,14 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { SKIP_BLANKS; if (RAW != '[') { xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); + xmlHaltParser(ctxt); + return; } else { + if (ctxt->input->id != id) { + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "All markup of the conditional section is not in the same entity\n", + NULL, NULL); + } NEXT; } if (xmlParserDebugEntities) { @@ -5573,8 +6815,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { "Entering INCLUDE Conditional Section\n"); } - while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || - (NXT(2) != '>'))) { + while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || + (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -5616,7 +6858,14 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { SKIP_BLANKS; if (RAW != '[') { xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); + xmlHaltParser(ctxt); + return; } else { + if (ctxt->input->id != id) { + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "All markup of the conditional section is not in the same entity\n", + NULL, NULL); + } NEXT; } if (xmlParserDebugEntities) { @@ -5637,7 +6886,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (ctxt->recovery == 0) ctxt->disableSAX = 1; ctxt->instate = XML_PARSER_IGNORE; - while ((depth >= 0) && (RAW != 0)) { + while (((depth >= 0) && (RAW != 0)) && + (ctxt->instate != XML_PARSER_EOF)) { if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { depth++; SKIP(3); @@ -5665,6 +6915,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { } else { xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); + xmlHaltParser(ctxt); + return; } if (RAW == 0) @@ -5673,14 +6925,21 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (RAW == 0) { xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); } else { - SKIP(3); + if (ctxt->input->id != id) { + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "All markup of the conditional section is not in the same entity\n", + NULL, NULL); + } + if ((ctxt-> instate != XML_PARSER_EOF) && + ((ctxt->input->cur + 3) <= ctxt->input->end)) + SKIP(3); } } /** * xmlParseMarkupDecl: * @ctxt: an XML parser context - * + * * parse Markup declarations * * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | @@ -5697,7 +6956,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { * In the internal DTD subset, parameter-entity references can occur * only where markup declarations can occur, not within markup declarations. * (This does not apply to references that occur in external parameter - * entities or to the external subset.) + * entities or to the external subset.) */ void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { @@ -5728,6 +6987,14 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { xmlParsePI(ctxt); } } + + /* + * detect requirement to exit there and act accordingly + * and avoid having instate overriden later on + */ + if (ctxt->instate == XML_PARSER_EOF) + return; + /* * This is only for internal subset. On external entities, * the replacement is done before parsing stage @@ -5751,12 +7018,10 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseTextDecl: * @ctxt: an XML parser context - * + * * parse an XML declaration header for external entities * * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' - * - * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? */ void @@ -5828,7 +7093,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { * @ctxt: an XML parser context * @ExternalID: the external identifier * @SystemID: the system identifier (or URL) - * + * * parse Markup declarations from an external subset * * [30] extSubset ::= textDecl? extSubsetDecl @@ -5840,18 +7105,38 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, const xmlChar *SystemID) { xmlDetectSAX2(ctxt); GROW; + + if ((ctxt->encoding == NULL) && + (ctxt->input->end - ctxt->input->cur >= 4)) { + xmlChar start[4]; + xmlCharEncoding enc; + + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) + xmlSwitchEncoding(ctxt, enc); + } + if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { xmlParseTextDecl(ctxt); if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { /* * The XML REC instructs us to stop parsing right here */ - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); return; } } if (ctxt->myDoc == NULL) { ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return; + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); @@ -5885,7 +7170,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, break; } } - + if (RAW != 0) { xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); } @@ -5895,7 +7180,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, /** * xmlParseReference: * @ctxt: an XML parser context - * + * * parse and handle entity references in content, depending on the SAX * interface, this may end-up in a call to character() if this is a * CharRef, a predefined entity, if there is no reference() callback. @@ -5907,14 +7192,25 @@ void xmlParseReference(xmlParserCtxtPtr ctxt) { xmlEntityPtr ent; xmlChar *val; - if (RAW != '&') return; + int was_checked; + xmlNodePtr list = NULL; + xmlParserErrors ret = XML_ERR_OK; + + + if (RAW != '&') + return; + /* + * Simple case of a CharRef + */ if (NXT(1) == '#') { int i = 0; xmlChar out[10]; int hex = NXT(2); int value = xmlParseCharRef(ctxt); - + + if (value == 0) + return; if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { /* * So we are using non-UTF-8 buffers @@ -5946,359 +7242,367 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { (!ctxt->disableSAX)) ctxt->sax->characters(ctxt->userData, out, i); } - } else { - int was_checked; + return; + } - ent = xmlParseEntityRef(ctxt); - if (ent == NULL) return; - if (!ctxt->wellFormed) - return; - was_checked = ent->checked; - if ((ent->name != NULL) && - (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { - xmlNodePtr list = NULL; - xmlParserErrors ret = XML_ERR_OK; + /* + * We are seeing an entity reference + */ + ent = xmlParseEntityRef(ctxt); + if (ent == NULL) return; + if (!ctxt->wellFormed) + return; + was_checked = ent->checked; + + /* special case of predefined entities */ + if ((ent->name == NULL) || + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + val = ent->content; + if (val == NULL) return; + /* + * inline the entity. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); + return; + } + /* + * The first reference to the entity trigger a parsing phase + * where the ent->children is filled with the result from + * the parsing. + * Note: external parsed entities will not be loaded, it is not + * required for a non-validating parser, unless the parsing option + * of validating, or substituting entities were given. Doing so is + * far more secure as the parser will only process data coming from + * the document entity by default. + */ + if (((ent->checked == 0) || + ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && + ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || + (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { + unsigned long oldnbent = ctxt->nbentities; - /* - * The first reference to the entity trigger a parsing phase - * where the ent->children is filled with the result from - * the parsing. - */ - if (ent->checked == 0) { - xmlChar *value; + /* + * This is a bit hackish but this seems the best + * way to make sure both SAX and DOM entity support + * behaves okay. + */ + void *user_data; + if (ctxt->userData == ctxt) + user_data = NULL; + else + user_data = ctxt->userData; - value = ent->content; + /* + * Check that this entity is well formed + * 4.3.2: An internal general parsed entity is well-formed + * if its replacement text matches the production labeled + * content. + */ + if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { + ctxt->depth++; + ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, + user_data, &list); + ctxt->depth--; + + } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, + user_data, ctxt->depth, ent->URI, + ent->ExternalID, &list); + ctxt->depth--; + } else { + ret = XML_ERR_ENTITY_PE_INTERNAL; + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, + "invalid entity type found\n", NULL); + } - /* - * Check that this entity is well formed - */ - if ((value != NULL) && (value[0] != 0) && - (value[1] == 0) && (value[0] == '<') && - (xmlStrEqual(ent->name, BAD_CAST "lt"))) { - /* - * DONE: get definite answer on this !!! - * Lots of entity decls are used to declare a single - * char - * <!ENTITY lt "<"> - * Which seems to be valid since - * 2.4: The ampersand character (&) and the left angle - * bracket (<) may appear in their literal form only - * when used ... They are also legal within the literal - * entity value of an internal entity declaration;i - * see "4.3.2 Well-Formed Parsed Entities". - * IMHO 2.4 and 4.3.2 are directly in contradiction. - * Looking at the OASIS test suite and James Clark - * tests, this is broken. However the XML REC uses - * it. Is the XML REC not well-formed ???? - * This is a hack to avoid this problem - * - * ANSWER: since lt gt amp .. are already defined, - * this is a redefinition and hence the fact that the - * content is not well balanced is not a Wf error, this - * is lousy but acceptable. - */ - list = xmlNewDocText(ctxt->myDoc, value); - if (list != NULL) { - if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && - (ent->children == NULL)) { - ent->children = list; - ent->last = list; - ent->owner = 1; - list->parent = (xmlNodePtr) ent; - } else { - xmlFreeNodeList(list); - } - } else if (list != NULL) { - xmlFreeNodeList(list); - } - } else { - /* - * 4.3.2: An internal general parsed entity is well-formed - * if its replacement text matches the production labeled - * content. - */ + /* + * Store the number of entities needing parsing for this entity + * content and do checkings + */ + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; + if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) + ent->checked |= 1; + if (ret == XML_ERR_ENTITY_LOOP) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + xmlFreeNodeList(list); + return; + } + if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { + xmlFreeNodeList(list); + return; + } - void *user_data; + if ((ret == XML_ERR_OK) && (list != NULL)) { + if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& + (ent->children == NULL)) { + ent->children = list; + if (ctxt->replaceEntities) { /* - * This is a bit hackish but this seems the best - * way to make sure both SAX and DOM entity support - * behaves okay. + * Prune it directly in the generated document + * except for single text nodes. */ - if (ctxt->userData == ctxt) - user_data = NULL; - else - user_data = ctxt->userData; - - if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { - ctxt->depth++; - ret = xmlParseBalancedChunkMemoryInternal(ctxt, - value, user_data, &list); - ctxt->depth--; - } else if (ent->etype == - XML_EXTERNAL_GENERAL_PARSED_ENTITY) { - ctxt->depth++; - ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, - ctxt->sax, user_data, ctxt->depth, - ent->URI, ent->ExternalID, &list); - ctxt->depth--; + if (((list->type == XML_TEXT_NODE) && + (list->next == NULL)) || + (ctxt->parseMode == XML_PARSE_READER)) { + list->parent = (xmlNodePtr) ent; + list = NULL; + ent->owner = 1; } else { - ret = XML_ERR_ENTITY_PE_INTERNAL; - xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, - "invalid entity type found\n", NULL); - } - if (ret == XML_ERR_ENTITY_LOOP) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return; - } else if ((ret == XML_ERR_OK) && (list != NULL)) { - if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || - (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& - (ent->children == NULL)) { - ent->children = list; - if (ctxt->replaceEntities) { - /* - * Prune it directly in the generated document - * except for single text nodes. - */ - if (((list->type == XML_TEXT_NODE) && - (list->next == NULL)) || - (ctxt->parseMode == XML_PARSE_READER)) { - list->parent = (xmlNodePtr) ent; - list = NULL; - ent->owner = 1; - } else { - ent->owner = 0; - while (list != NULL) { - list->parent = (xmlNodePtr) ctxt->node; - list->doc = ctxt->myDoc; - if (list->next == NULL) - ent->last = list; - list = list->next; - } - list = ent->children; + ent->owner = 0; + while (list != NULL) { + list->parent = (xmlNodePtr) ctxt->node; + list->doc = ctxt->myDoc; + if (list->next == NULL) + ent->last = list; + list = list->next; + } + list = ent->children; #ifdef LIBXML_LEGACY_ENABLED - if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) - xmlAddEntityReference(ent, list, NULL); + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, list, NULL); #endif /* LIBXML_LEGACY_ENABLED */ - } - } else { - ent->owner = 1; - while (list != NULL) { - list->parent = (xmlNodePtr) ent; - if (list->next == NULL) - ent->last = list; - list = list->next; - } - } - } else { - xmlFreeNodeList(list); - list = NULL; - } - } else if ((ret != XML_ERR_OK) && - (ret != XML_WAR_UNDECLARED_ENTITY)) { - xmlFatalErr(ctxt, ret, NULL); - } else if (list != NULL) { - xmlFreeNodeList(list); - list = NULL; + } + } else { + ent->owner = 1; + while (list != NULL) { + list->parent = (xmlNodePtr) ent; + xmlSetTreeDoc(list, ent->doc); + if (list->next == NULL) + ent->last = list; + list = list->next; } } - ent->checked = 1; + } else { + xmlFreeNodeList(list); + list = NULL; } + } else if ((ret != XML_ERR_OK) && + (ret != XML_WAR_UNDECLARED_ENTITY)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "Entity '%s' failed to parse\n", ent->name); + xmlParserEntityCheck(ctxt, 0, ent, 0); + } else if (list != NULL) { + xmlFreeNodeList(list); + list = NULL; + } + if (ent->checked == 0) + ent->checked = 2; + } else if (ent->checked != 1) { + ctxt->nbentities += ent->checked / 2; + } - if (ent->children == NULL) { - /* - * Probably running in SAX mode and the callbacks don't - * build the entity content. So unless we already went - * though parsing for first checking go though the entity - * content to generate callbacks associated to the entity - */ - if (was_checked == 1) { - void *user_data; - /* - * This is a bit hackish but this seems the best - * way to make sure both SAX and DOM entity support - * behaves okay. - */ - if (ctxt->userData == ctxt) - user_data = NULL; - else - user_data = ctxt->userData; - - if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { - ctxt->depth++; - ret = xmlParseBalancedChunkMemoryInternal(ctxt, - ent->content, user_data, NULL); - ctxt->depth--; - } else if (ent->etype == - XML_EXTERNAL_GENERAL_PARSED_ENTITY) { - ctxt->depth++; - ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, - ctxt->sax, user_data, ctxt->depth, - ent->URI, ent->ExternalID, NULL); - ctxt->depth--; - } else { - ret = XML_ERR_ENTITY_PE_INTERNAL; - xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, - "invalid entity type found\n", NULL); - } - if (ret == XML_ERR_ENTITY_LOOP) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return; - } - } - if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && - (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { - /* - * Entity reference callback comes second, it's somewhat - * superfluous but a compatibility to historical behaviour - */ - ctxt->sax->reference(ctxt->userData, ent->name); - } + /* + * Now that the entity content has been gathered + * provide it to the application, this can take different forms based + * on the parsing modes. + */ + if (ent->children == NULL) { + /* + * Probably running in SAX mode and the callbacks don't + * build the entity content. So unless we already went + * though parsing for first checking go though the entity + * content to generate callbacks associated to the entity + */ + if (was_checked != 0) { + void *user_data; + /* + * This is a bit hackish but this seems the best + * way to make sure both SAX and DOM entity support + * behaves okay. + */ + if (ctxt->userData == ctxt) + user_data = NULL; + else + user_data = ctxt->userData; + + if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { + ctxt->depth++; + ret = xmlParseBalancedChunkMemoryInternal(ctxt, + ent->content, user_data, NULL); + ctxt->depth--; + } else if (ent->etype == + XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, + ctxt->sax, user_data, ctxt->depth, + ent->URI, ent->ExternalID, NULL); + ctxt->depth--; + } else { + ret = XML_ERR_ENTITY_PE_INTERNAL; + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, + "invalid entity type found\n", NULL); + } + if (ret == XML_ERR_ENTITY_LOOP) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return; } - if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && - (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + } + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Entity reference callback comes second, it's somewhat + * superfluous but a compatibility to historical behaviour + */ + ctxt->sax->reference(ctxt->userData, ent->name); + } + return; + } + + /* + * If we didn't get any children for the entity being built + */ + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Create a node. + */ + ctxt->sax->reference(ctxt->userData, ent->name); + return; + } + + if ((ctxt->replaceEntities) || (ent->children == NULL)) { + /* + * There is a problem on the handling of _private for entities + * (bug 155816): Should we copy the content of the field from + * the entity (possibly overwriting some value set by the user + * when a copy is created), should we leave it alone, or should + * we try to take care of different situations? The problem + * is exacerbated by the usage of this field by the xmlReader. + * To fix this bug, we look at _private on the created node + * and, if it's NULL, we copy in whatever was in the entity. + * If it's not NULL we leave it alone. This is somewhat of a + * hack - maybe we should have further tests to determine + * what to do. + */ + if ((ctxt->node != NULL) && (ent->children != NULL)) { + /* + * Seems we are generating the DOM content, do + * a simple tree copy for all references except the first + * In the first occurrence list contains the replacement. + */ + if (((list == NULL) && (ent->owner == 0)) || + (ctxt->parseMode == XML_PARSE_READER)) { + xmlNodePtr nw = NULL, cur, firstChild = NULL; + /* - * Create a node. + * We are copying here, make sure there is no abuse */ - ctxt->sax->reference(ctxt->userData, ent->name); - return; - } - if ((ctxt->replaceEntities) || (ent->children == NULL)) { + ctxt->sizeentcopy += ent->length + 5; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + /* - * There is a problem on the handling of _private for entities - * (bug 155816): Should we copy the content of the field from - * the entity (possibly overwriting some value set by the user - * when a copy is created), should we leave it alone, or should - * we try to take care of different situations? The problem - * is exacerbated by the usage of this field by the xmlReader. - * To fix this bug, we look at _private on the created node - * and, if it's NULL, we copy in whatever was in the entity. - * If it's not NULL we leave it alone. This is somewhat of a - * hack - maybe we should have further tests to determine - * what to do. + * when operating on a reader, the entities definitions + * are always owning the entities subtree. + if (ctxt->parseMode == XML_PARSE_READER) + ent->owner = 1; */ - if ((ctxt->node != NULL) && (ent->children != NULL)) { - /* - * Seems we are generating the DOM content, do - * a simple tree copy for all references except the first - * In the first occurrence list contains the replacement. - * progressive == 2 means we are operating on the Reader - * and since nodes are discarded we must copy all the time. - */ - if (((list == NULL) && (ent->owner == 0)) || - (ctxt->parseMode == XML_PARSE_READER)) { - xmlNodePtr nw = NULL, cur, firstChild = NULL; + cur = ent->children; + while (cur != NULL) { + nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); + if (nw != NULL) { + if (nw->_private == NULL) + nw->_private = cur->_private; + if (firstChild == NULL){ + firstChild = nw; + } + nw = xmlAddChild(ctxt->node, nw); + } + if (cur == ent->last) { /* - * when operating on a reader, the entities definitions - * are always owning the entities subtree. - if (ctxt->parseMode == XML_PARSE_READER) - ent->owner = 1; + * needed to detect some strange empty + * node cases in the reader tests */ + if ((ctxt->parseMode == XML_PARSE_READER) && + (nw != NULL) && + (nw->type == XML_ELEMENT_NODE) && + (nw->children == NULL)) + nw->extra = 1; - cur = ent->children; - while (cur != NULL) { - nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); - if (nw != NULL) { - if (nw->_private == NULL) - nw->_private = cur->_private; - if (firstChild == NULL){ - firstChild = nw; - } - nw = xmlAddChild(ctxt->node, nw); - } - if (cur == ent->last) { - /* - * needed to detect some strange empty - * node cases in the reader tests - */ - if ((ctxt->parseMode == XML_PARSE_READER) && - (nw != NULL) && - (nw->type == XML_ELEMENT_NODE) && - (nw->children == NULL)) - nw->extra = 1; - - break; - } - cur = cur->next; - } + break; + } + cur = cur->next; + } #ifdef LIBXML_LEGACY_ENABLED - if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) - xmlAddEntityReference(ent, firstChild, nw); + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, firstChild, nw); #endif /* LIBXML_LEGACY_ENABLED */ - } else if (list == NULL) { - xmlNodePtr nw = NULL, cur, next, last, - firstChild = NULL; - /* - * Copy the entity child list and make it the new - * entity child list. The goal is to make sure any - * ID or REF referenced will be the one from the - * document content and not the entity copy. - */ - cur = ent->children; - ent->children = NULL; - last = ent->last; - ent->last = NULL; - while (cur != NULL) { - next = cur->next; - cur->next = NULL; - cur->parent = NULL; - nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); - if (nw != NULL) { - if (nw->_private == NULL) - nw->_private = cur->_private; - if (firstChild == NULL){ - firstChild = cur; - } - xmlAddChild((xmlNodePtr) ent, nw); - xmlAddChild(ctxt->node, cur); - } - if (cur == last) - break; - cur = next; + } else if ((list == NULL) || (ctxt->inputNr > 0)) { + xmlNodePtr nw = NULL, cur, next, last, + firstChild = NULL; + + /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length + 5; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + + /* + * Copy the entity child list and make it the new + * entity child list. The goal is to make sure any + * ID or REF referenced will be the one from the + * document content and not the entity copy. + */ + cur = ent->children; + ent->children = NULL; + last = ent->last; + ent->last = NULL; + while (cur != NULL) { + next = cur->next; + cur->next = NULL; + cur->parent = NULL; + nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); + if (nw != NULL) { + if (nw->_private == NULL) + nw->_private = cur->_private; + if (firstChild == NULL){ + firstChild = cur; } - ent->owner = 1; + xmlAddChild((xmlNodePtr) ent, nw); + xmlAddChild(ctxt->node, cur); + } + if (cur == last) + break; + cur = next; + } + if (ent->owner == 0) + ent->owner = 1; #ifdef LIBXML_LEGACY_ENABLED - if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) - xmlAddEntityReference(ent, firstChild, nw); + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, firstChild, nw); #endif /* LIBXML_LEGACY_ENABLED */ - } else { - const xmlChar *nbktext; - - /* - * the name change is to avoid coalescing of the - * node with a possible previous text one which - * would make ent->children a dangling pointer - */ - nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", - -1); - if (ent->children->type == XML_TEXT_NODE) - ent->children->name = nbktext; - if ((ent->last != ent->children) && - (ent->last->type == XML_TEXT_NODE)) - ent->last->name = nbktext; - xmlAddChildList(ctxt->node, ent->children); - } + } else { + const xmlChar *nbktext; - /* - * This is to avoid a nasty side effect, see - * characters() in SAX.c - */ - ctxt->nodemem = 0; - ctxt->nodelen = 0; - return; - } + /* + * the name change is to avoid coalescing of the + * node with a possible previous text one which + * would make ent->children a dangling pointer + */ + nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", + -1); + if (ent->children->type == XML_TEXT_NODE) + ent->children->name = nbktext; + if ((ent->last != ent->children) && + (ent->last->type == XML_TEXT_NODE)) + ent->last->name = nbktext; + xmlAddChildList(ctxt->node, ent->children); } - } else { - val = ent->content; - if (val == NULL) return; + /* - * inline the entity. + * This is to avoid a nasty side effect, see + * characters() in SAX.c */ - if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && - (!ctxt->disableSAX)) - ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); + ctxt->nodemem = 0; + ctxt->nodelen = 0; + return; } } } @@ -6337,132 +7641,153 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { xmlEntityPtr ent = NULL; GROW; - - if (RAW == '&') { - NEXT; - name = xmlParseName(ctxt); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParseEntityRef: no name\n"); - } else { - if (RAW == ';') { - NEXT; - /* - * Ask first SAX for entity resolution, otherwise try the - * predefined set. - */ - if (ctxt->sax != NULL) { - if (ctxt->sax->getEntity != NULL) - ent = ctxt->sax->getEntity(ctxt->userData, name); - if ((ctxt->wellFormed == 1 ) && (ent == NULL)) - ent = xmlGetPredefinedEntity(name); - if ((ctxt->wellFormed == 1 ) && (ent == NULL) && - (ctxt->userData==ctxt)) { - ent = xmlSAX2GetEntity(ctxt, name); - } - } - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", the - * Name given in the entity reference must match that in an - * entity declaration, except that well-formed documents - * need not declare any of the following entities: amp, lt, - * gt, apos, quot. - * The declaration of a parameter entity must precede any - * reference to it. - * Similarly, the declaration of a general entity must - * precede any reference to it which appears in a default - * value in an attribute-list declaration. Note that if - * entities are declared in the external subset or in - * external parameter entities, a non-validating processor - * is not obligated to read and process their declarations; - * for such documents, the rule that an entity must be - * declared is a well-formedness constraint only if - * standalone='yes'. - */ - if (ent == NULL) { - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", name); - } else { - xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", name); - if ((ctxt->inSubset == 0) && - (ctxt->sax != NULL) && - (ctxt->sax->reference != NULL)) { - ctxt->sax->reference(ctxt->userData, name); - } - } - ctxt->valid = 0; - } - - /* - * [ WFC: Parsed Entity ] - * An entity reference must not contain the name of an - * unparsed entity - */ - else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, - "Entity reference to unparsed entity %s\n", name); - } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); - /* - * [ WFC: No External Entity References ] - * Attribute values cannot contain direct or indirect - * entity references to external entities. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, - "Attribute references external entity '%s'\n", name); - } - /* - * [ WFC: No < in Attribute Values ] - * The replacement text of any entity referred to directly or - * indirectly in an attribute value (other than "<") must - * not contain a <. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent != NULL) && - (!xmlStrEqual(ent->name, BAD_CAST "lt")) && - (ent->content != NULL) && - (xmlStrchr(ent->content, '<'))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, - "'<' in entity '%s' is not allowed in attributes values\n", name); - } + if (RAW != '&') + return(NULL); + NEXT; + name = xmlParseName(ctxt); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParseEntityRef: no name\n"); + return(NULL); + } + if (RAW != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + return(NULL); + } + NEXT; - /* - * Internal check, no parameter entities here ... - */ - else { - switch (ent->etype) { - case XML_INTERNAL_PARAMETER_ENTITY: - case XML_EXTERNAL_PARAMETER_ENTITY: - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, - "Attempt to reference the parameter entity '%s'\n", - name); - break; - default: - break; - } - } + /* + * Predefined entities override any extra definition + */ + if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { + ent = xmlGetPredefinedEntity(name); + if (ent != NULL) + return(ent); + } - /* - * [ WFC: No Recursion ] - * A parsed entity must not contain a recursive reference - * to itself, either directly or indirectly. - * Done somewhere else - */ + /* + * Increase the number of entity references parsed + */ + ctxt->nbentities++; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + /* + * Ask first SAX for entity resolution, otherwise try the + * entities which may have stored in the parser context. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if ((ctxt->wellFormed == 1 ) && (ent == NULL) && + (ctxt->options & XML_PARSE_OLDSAX)) + ent = xmlGetPredefinedEntity(name); + if ((ctxt->wellFormed == 1 ) && (ent == NULL) && + (ctxt->userData==ctxt)) { + ent = xmlSAX2GetEntity(ctxt, name); + } + } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", the + * Name given in the entity reference must match that in an + * entity declaration, except that well-formed documents + * need not declare any of the following entities: amp, lt, + * gt, apos, quot. + * The declaration of a parameter entity must precede any + * reference to it. + * Similarly, the declaration of a general entity must + * precede any reference to it which appears in a default + * value in an attribute-list declaration. Note that if + * entities are declared in the external subset or in + * external parameter entities, a non-validating processor + * is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be + * declared is a well-formedness constraint only if + * standalone='yes'. + */ + if (ent == NULL) { + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", name); + } else { + xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", name); + if ((ctxt->inSubset == 0) && + (ctxt->sax != NULL) && + (ctxt->sax->reference != NULL)) { + ctxt->sax->reference(ctxt->userData, name); } } + xmlParserEntityCheck(ctxt, 0, ent, 0); + ctxt->valid = 0; + } + + /* + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an + * unparsed entity + */ + else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, + "Entity reference to unparsed entity %s\n", name); } + + /* + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect + * entity references to external entities. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, + "Attribute references external entity '%s'\n", name); + } + /* + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or + * indirectly in an attribute value (other than "<") must + * not contain a <. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent != NULL) && + (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { + if (((ent->checked & 1) || (ent->checked == 0)) && + (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, + "'<' in entity '%s' is not allowed in attributes values\n", name); + } + } + + /* + * Internal check, no parameter entities here ... + */ + else { + switch (ent->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, + "Attempt to reference the parameter entity '%s'\n", + name); + break; + default: + break; + } + } + + /* + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive reference + * to itself, either directly or indirectly. + * Done somewhere else + */ return(ent); } @@ -6497,7 +7822,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer * is updated to the current location in the string. */ -xmlEntityPtr +static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { xmlChar *name; const xmlChar *ptr; @@ -6508,129 +7833,156 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { return(NULL); ptr = *str; cur = *ptr; - if (cur == '&') { - ptr++; - cur = *ptr; - name = xmlParseStringName(ctxt, &ptr); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParseStringEntityRef: no name\n"); - } else { - if (*ptr == ';') { - ptr++; - /* - * Ask first SAX for entity resolution, otherwise try the - * predefined set. - */ - if (ctxt->sax != NULL) { - if (ctxt->sax->getEntity != NULL) - ent = ctxt->sax->getEntity(ctxt->userData, name); - if (ent == NULL) - ent = xmlGetPredefinedEntity(name); - if ((ent == NULL) && (ctxt->userData==ctxt)) { - ent = xmlSAX2GetEntity(ctxt, name); - } - } - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", the - * Name given in the entity reference must match that in an - * entity declaration, except that well-formed documents - * need not declare any of the following entities: amp, lt, - * gt, apos, quot. - * The declaration of a parameter entity must precede any - * reference to it. - * Similarly, the declaration of a general entity must - * precede any reference to it which appears in a default - * value in an attribute-list declaration. Note that if - * entities are declared in the external subset or in - * external parameter entities, a non-validating processor - * is not obligated to read and process their declarations; - * for such documents, the rule that an entity must be - * declared is a well-formedness constraint only if - * standalone='yes'. - */ - if (ent == NULL) { - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", name); - } else { - xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", - name); - } - /* TODO ? check regressions ctxt->valid = 0; */ - } + if (cur != '&') + return(NULL); - /* - * [ WFC: Parsed Entity ] - * An entity reference must not contain the name of an - * unparsed entity - */ - else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, - "Entity reference to unparsed entity %s\n", name); - } + ptr++; + name = xmlParseStringName(ctxt, &ptr); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParseStringEntityRef: no name\n"); + *str = ptr; + return(NULL); + } + if (*ptr != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + xmlFree(name); + *str = ptr; + return(NULL); + } + ptr++; - /* - * [ WFC: No External Entity References ] - * Attribute values cannot contain direct or indirect - * entity references to external entities. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, - "Attribute references external entity '%s'\n", name); - } - /* - * [ WFC: No < in Attribute Values ] - * The replacement text of any entity referred to directly or - * indirectly in an attribute value (other than "<") must - * not contain a <. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent != NULL) && - (!xmlStrEqual(ent->name, BAD_CAST "lt")) && - (ent->content != NULL) && - (xmlStrchr(ent->content, '<'))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, - "'<' in entity '%s' is not allowed in attributes values\n", - name); - } - /* - * Internal check, no parameter entities here ... - */ - else { - switch (ent->etype) { - case XML_INTERNAL_PARAMETER_ENTITY: - case XML_EXTERNAL_PARAMETER_ENTITY: - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, - "Attempt to reference the parameter entity '%s'\n", - name); - break; - default: - break; - } - } + /* + * Predefined entities override any extra definition + */ + if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { + ent = xmlGetPredefinedEntity(name); + if (ent != NULL) { + xmlFree(name); + *str = ptr; + return(ent); + } + } - /* - * [ WFC: No Recursion ] - * A parsed entity must not contain a recursive reference - * to itself, either directly or indirectly. - * Done somewhere else - */ + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); - } - xmlFree(name); + /* + * Ask first SAX for entity resolution, otherwise try the + * entities which may have stored in the parser context. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) + ent = xmlGetPredefinedEntity(name); + if ((ent == NULL) && (ctxt->userData==ctxt)) { + ent = xmlSAX2GetEntity(ctxt, name); + } + } + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(name); + return(NULL); + } + + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", the + * Name given in the entity reference must match that in an + * entity declaration, except that well-formed documents + * need not declare any of the following entities: amp, lt, + * gt, apos, quot. + * The declaration of a parameter entity must precede any + * reference to it. + * Similarly, the declaration of a general entity must + * precede any reference to it which appears in a default + * value in an attribute-list declaration. Note that if + * entities are declared in the external subset or in + * external parameter entities, a non-validating processor + * is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be + * declared is a well-formedness constraint only if + * standalone='yes'. + */ + if (ent == NULL) { + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", name); + } else { + xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", + name); + } + xmlParserEntityCheck(ctxt, 0, ent, 0); + /* TODO ? check regressions ctxt->valid = 0; */ + } + + /* + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an + * unparsed entity + */ + else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, + "Entity reference to unparsed entity %s\n", name); + } + + /* + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect + * entity references to external entities. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, + "Attribute references external entity '%s'\n", name); + } + /* + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or + * indirectly in an attribute value (other than "<") must + * not contain a <. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent != NULL) && (ent->content != NULL) && + (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && + (xmlStrchr(ent->content, '<'))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, + "'<' in entity '%s' is not allowed in attributes values\n", + name); + } + + /* + * Internal check, no parameter entities here ... + */ + else { + switch (ent->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, + "Attempt to reference the parameter entity '%s'\n", + name); + break; + default: + break; } } + + /* + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive reference + * to itself, either directly or indirectly. + * Done somewhere else + */ + + xmlFree(name); *str = ptr; return(ent); } @@ -6647,7 +7999,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -6671,90 +8023,200 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) xmlEntityPtr entity = NULL; xmlParserInputPtr input; - if (RAW == '%') { - NEXT; - name = xmlParseName(ctxt); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParsePEReference: no name\n"); - } else { - if (RAW == ';') { - NEXT; - if ((ctxt->sax != NULL) && - (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); - if (entity == NULL) { - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name); - } else { - /* - * [ VC: Entity Declared ] - * In a document with an external subset or external - * parameter entities with "standalone='no'", ... - * ... The declaration of a parameter entity must - * precede any reference to it... - */ - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - ctxt->valid = 0; - } - } else { - /* - * Internal checking in case the entity quest barfed - */ - if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Internal: %%%s; is not a parameter entity\n", - name, NULL); - } else if (ctxt->input->free != deallocblankswrapper) { - input = - xmlNewBlanksWrapperInputStream(ctxt, entity); - xmlPushInput(ctxt, input); - } else { - /* - * TODO !!! - * handle the extra spaces added before and after - * c.f. http://www.w3.org/TR/REC-xml#as-PE - */ - input = xmlNewEntityInputStream(ctxt, entity); - xmlPushInput(ctxt, input); - if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && - (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && - (IS_BLANK_CH(NXT(5)))) { - xmlParseTextDecl(ctxt); - if (ctxt->errNo == - XML_ERR_UNSUPPORTED_ENCODING) { - /* - * The XML REC instructs us to stop parsing - * right here - */ - ctxt->instate = XML_PARSER_EOF; - return; - } - } - } - } - ctxt->hasPErefs = 1; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + if (RAW != '%') + return; + NEXT; + name = xmlParseName(ctxt); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParsePEReference: no name\n"); + return; + } + if (RAW != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + return; + } + + NEXT; + + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; + + /* + * Request the entity from SAX + */ + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) + return; + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name); + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must + * precede any reference to it... + */ + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name, NULL); + ctxt->valid = 0; + } + xmlParserEntityCheck(ctxt, 0, NULL, 0); + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Internal: %%%s; is not a parameter entity\n", + name, NULL); + } else if (ctxt->input->free != deallocblankswrapper) { + input = xmlNewBlanksWrapperInputStream(ctxt, entity); + if (xmlPushInput(ctxt, input) < 0) + return; + } else { + /* + * TODO !!! + * handle the extra spaces added before and after + * c.f. http://www.w3.org/TR/REC-xml#as-PE + */ + input = xmlNewEntityInputStream(ctxt, entity); + if (xmlPushInput(ctxt, input) < 0) + return; + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && + (IS_BLANK_CH(NXT(5)))) { + xmlParseTextDecl(ctxt); + if (ctxt->errNo == + XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing + * right here + */ + xmlHaltParser(ctxt); + return; + } + } + } + } + ctxt->hasPErefs = 1; +} + +/** + * xmlLoadEntityContent: + * @ctxt: an XML parser context + * @entity: an unloaded system entity + * + * Load the original content of the given system entity from the + * ExternalID/SystemID given. This is to be used for Included in Literal + * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references + * + * Returns 0 in case of success and -1 in case of failure + */ +static int +xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { + xmlParserInputPtr input; + xmlBufferPtr buf; + int l, c; + int count = 0; + + if ((ctxt == NULL) || (entity == NULL) || + ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || + (entity->content != NULL)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, + "xmlLoadEntityContent parameter error"); + return(-1); + } + + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "Reading %s entity content input\n", entity->name); + + buf = xmlBufferCreate(); + if (buf == NULL) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, + "xmlLoadEntityContent parameter error"); + return(-1); + } + + input = xmlNewEntityInputStream(ctxt, entity); + if (input == NULL) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, + "xmlLoadEntityContent input error"); + xmlBufferFree(buf); + return(-1); + } + + /* + * Push the entity as the current input, read char by char + * saving to the buffer until the end of the entity or an error + */ + if (xmlPushInput(ctxt, input) < 0) { + xmlBufferFree(buf); + return(-1); + } + + GROW; + c = CUR_CHAR(l); + while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && + (IS_CHAR(c))) { + xmlBufferAdd(buf, ctxt->input->cur, l); + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); } - } + } + NEXTL(l); + c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); + } + c = CUR_CHAR(l); + } } + + if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { + xmlPopInput(ctxt); + } else if (!IS_CHAR(c)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlLoadEntityContent: invalid char value %d\n", + c); + xmlBufferFree(buf); + return(-1); + } + entity->content = buf->content; + buf->content = NULL; + xmlBufferFree(buf); + + return(0); } /** @@ -6768,7 +8230,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -6788,7 +8250,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) * Returns the string of the entity content. * str is updated to the current value of the index */ -xmlEntityPtr +static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { const xmlChar *ptr; xmlChar cur; @@ -6798,67 +8260,80 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { if ((str == NULL) || (*str == NULL)) return(NULL); ptr = *str; cur = *ptr; - if (cur == '%') { - ptr++; - cur = *ptr; - name = xmlParseStringName(ctxt, &ptr); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParseStringPEReference: no name\n"); + if (cur != '%') + return(NULL); + ptr++; + name = xmlParseStringName(ctxt, &ptr); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParseStringPEReference: no name\n"); + *str = ptr; + return(NULL); + } + cur = *ptr; + if (cur != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + xmlFree(name); + *str = ptr; + return(NULL); + } + ptr++; + + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; + + /* + * Request the entity from SAX + */ + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(name); + return(NULL); + } + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", name); } else { - cur = *ptr; - if (cur == ';') { - ptr++; - cur = *ptr; - if ((ctxt->sax != NULL) && - (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); - if (entity == NULL) { - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", name); - } else { - /* - * [ VC: Entity Declared ] - * In a document with an external subset or external - * parameter entities with "standalone='no'", ... - * ... The declaration of a parameter entity must - * precede any reference to it... - */ - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - ctxt->valid = 0; - } - } else { - /* - * Internal checking in case the entity quest barfed - */ - if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "%%%s; is not a parameter entity\n", - name, NULL); - } - } - ctxt->hasPErefs = 1; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); - } - xmlFree(name); + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must + * precede any reference to it... + */ + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name, NULL); + ctxt->valid = 0; + } + xmlParserEntityCheck(ctxt, 0, NULL, 0); + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "%%%s; is not a parameter entity\n", + name, NULL); } } + ctxt->hasPErefs = 1; + xmlFree(name); *str = ptr; return(entity); } @@ -6869,12 +8344,12 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { * * parse a DOCTYPE declaration * - * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' * * [ VC: Root Element Type ] * The Name in the document type declaration must match the element - * type of the root element. + * type of the root element. */ void @@ -6921,6 +8396,8 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && (!ctxt->disableSAX)) ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); + if (ctxt->instate == XML_PARSER_EOF) + return; /* * Is there any internal subset declarations ? @@ -6956,11 +8433,11 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { ctxt->instate = XML_PARSER_DTD; NEXT; /* - * Parse the succession of Markup declarations and + * Parse the succession of Markup declarations and * PEReferences. * Subsequence (markupdecl | PEReference | S)* */ - while (RAW != ']') { + while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -6980,7 +8457,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { break; } } - if (RAW == ']') { + if (RAW == ']') { NEXT; SKIP_BLANKS; } @@ -7011,8 +8488,8 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { * * [ WFC: No < in Attribute Values ] * The replacement text of any entity referred to directly or indirectly in - * an attribute value (other than "<") must not contain a <. - * + * an attribute value (other than "<") must not contain a <. + * * [ VC: Attribute Value Type ] * The attribute must have been declared; the value must be of the type * declared for it. @@ -7093,7 +8570,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { /** * xmlParseStartTag: * @ctxt: an XML parser context - * + * * parse a start of tag either for rule element or * EmptyElement. In both case we don't parse the tag closing chars. * @@ -7101,13 +8578,13 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * With namespace: * @@ -7146,9 +8623,9 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { SKIP_BLANKS; GROW; - while ((RAW != '>') && + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && - (IS_BYTE_CHAR(RAW))) { + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *q = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -7157,7 +8634,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same - * start-tag or empty-element tag. + * start-tag or empty-element tag. */ for (i = 0; i < nbatts;i += 2) { if (xmlStrEqual(atts[i], attname)) { @@ -7206,7 +8683,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { xmlFree(attvalue); } -failed: +failed: GROW if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) @@ -7288,7 +8765,7 @@ xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { /* * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ if (name != (xmlChar*)1) { @@ -7335,38 +8812,6 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { * * ************************************************************************/ -static const xmlChar * -xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - - /* - * Handler for more complex cases - */ - GROW; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || (c == '_') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > 100) { - count = 0; - GROW; - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - /* * xmlGetNamespace: * @ctxt: an XML parser context @@ -7392,56 +8837,6 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { } /** - * xmlParseNCName: - * @ctxt: an XML parser context - * @len: lenght of the string parsed - * - * parse an XML name. - * - * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | - * CombiningChar | Extender - * - * [5NS] NCName ::= (Letter | '_') (NCNameChar)* - * - * Returns the Name parsed or NULL - */ - -static const xmlChar * -xmlParseNCName(xmlParserCtxtPtr ctxt) { - const xmlChar *in; - const xmlChar *ret; - int count = 0; - - /* - * Accelerator for simple ASCII names - */ - in = ctxt->input->cur; - if (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - (*in == '_')) { - in++; - while (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - ((*in >= 0x30) && (*in <= 0x39)) || - (*in == '_') || (*in == '-') || - (*in == '.')) - in++; - if ((*in > 0) && (*in < 0x80)) { - count = in - ctxt->input->cur; - ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); - ctxt->input->cur = in; - ctxt->nbChars += count; - ctxt->input->col += count; - if (ret == NULL) { - xmlErrMemory(ctxt, NULL); - } - return(ret); - } - } - return(xmlParseNCNameComplex(ctxt)); -} - -/** * xmlParseQName: * @ctxt: an XML parser context * @prefix: pointer to store the prefix part @@ -7466,7 +8861,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { if (CUR == ':') { l = xmlParseName(ctxt); if (l != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_QNAME, + xmlNsErr(ctxt, XML_NS_ERR_QNAME, "Failed to parse QName '%s'\n", l, NULL, NULL); *prefix = NULL; return(l); @@ -7483,7 +8878,13 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { xmlNsErr(ctxt, XML_NS_ERR_QNAME, "Failed to parse QName '%s:'\n", p, NULL, NULL); - tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); + l = xmlParseNmtoken(ctxt); + if (l == NULL) + tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); + else { + tmp = xmlBuildQName(l, p, NULL, 0); + xmlFree((char *)l); + } p = xmlDictLookup(ctxt->dict, tmp, -1); if (tmp != NULL) xmlFree(tmp); *prefix = NULL; @@ -7531,7 +8932,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { static const xmlChar * xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, xmlChar const *prefix) { - const xmlChar *cmp = name; + const xmlChar *cmp; const xmlChar *in; const xmlChar *ret; const xmlChar *prefix2; @@ -7540,10 +8941,10 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, GROW; in = ctxt->input->cur; - + cmp = prefix; while (*in != 0 && *in == *cmp) { - ++in; + ++in; ++cmp; } if ((*cmp == 0) && (*in == ':')) { @@ -7581,20 +8982,20 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -7609,9 +9010,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, xmlChar limit = 0; const xmlChar *in = NULL, *start, *end, *last; xmlChar *ret = NULL; + int line, col; GROW; in = (xmlChar *) CUR_PTR; + line = ctxt->input->line; + col = ctxt->input->col; if (*in != '"' && *in != '\'') { xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); return (NULL); @@ -7624,6 +9028,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, * pure ASCII. */ limit = *in++; + col++; end = ctxt->input->end; start = in; if (in >= end) { @@ -7640,34 +9045,56 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, /* * Skip any leading spaces */ - while ((in < end) && (*in != limit) && + while ((in < end) && (*in != limit) && ((*in == 0x20) || (*in == 0x9) || (*in == 0xA) || (*in == 0xD))) { + if (*in == 0xA) { + line++; col = 1; + } else { + col++; + } in++; start = in; if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } while ((in < end) && (*in != limit) && (*in >= 0x20) && (*in <= 0x7f) && (*in != '&') && (*in != '<')) { + col++; if ((*in++ == 0x20) && (*in == 0x20)) break; if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } last = in; @@ -7675,13 +9102,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, * skip the trailing blanks */ while ((last[-1] == 0x20) && (last > start)) last--; - while ((in < end) && (*in != limit) && + while ((in < end) && (*in != limit) && ((*in == 0x20) || (*in == 0x9) || (*in == 0xA) || (*in == 0xD))) { + if (*in == 0xA) { + line++, col = 1; + } else { + col++; + } in++; if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; @@ -7689,28 +9123,56 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, last = last + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } if (*in != limit) goto need_complex; } else { while ((in < end) && (*in != limit) && (*in >= 0x20) && (*in <= 0x7f) && (*in != '&') && (*in != '<')) { in++; + col++; if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } last = in; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } if (*in != limit) goto need_complex; } in++; + col++; if (len != NULL) { *len = last - start; ret = (xmlChar *) start; @@ -7719,6 +9181,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, ret = xmlStrndup(start, last - start); } CUR_PTR = in; + ctxt->input->line = line; + ctxt->input->col = col; if (alloc) *alloc = 0; return ret; need_complex: @@ -7743,9 +9207,10 @@ need_complex: static const xmlChar * xmlParseAttribute2(xmlParserCtxtPtr ctxt, - const xmlChar *pref, const xmlChar *elem, - const xmlChar **prefix, xmlChar **value, - int *len, int *alloc) { + const xmlChar * pref, const xmlChar * elem, + const xmlChar ** prefix, xmlChar ** value, + int *len, int *alloc) +{ const xmlChar *name; xmlChar *val, *internal_val = NULL; int normalize = 0; @@ -7754,9 +9219,9 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, GROW; name = xmlParseQName(ctxt, prefix); if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "error parsing attribute name\n"); - return(NULL); + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "error parsing attribute name\n"); + return (NULL); } /* @@ -7766,8 +9231,9 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, int type; type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, - pref, elem, *prefix, name); - if (type != 0) normalize = 1; + pref, elem, *prefix, name); + if (type != 0) + normalize = 1; } /* @@ -7776,58 +9242,75 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, SKIP_BLANKS; if (RAW == '=') { NEXT; - SKIP_BLANKS; - val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); - ctxt->instate = XML_PARSER_CONTENT; + SKIP_BLANKS; + val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); + if (normalize) { + /* + * Sometimes a second normalisation pass for spaces is needed + * but that only happens if charrefs or entities refernces + * have been used in the attribute value, i.e. the attribute + * value have been extracted in an allocated string already. + */ + if (*alloc) { + const xmlChar *val2; + + val2 = xmlAttrNormalizeSpace2(ctxt, val, len); + if ((val2 != NULL) && (val2 != val)) { + xmlFree(val); + val = (xmlChar *) val2; + } + } + } + ctxt->instate = XML_PARSER_CONTENT; } else { - xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, - "Specification mandate value for attribute %s\n", name); - return(NULL); + xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, + "Specification mandate value for attribute %s\n", + name); + return (NULL); } - if (*prefix == ctxt->str_xml) { - /* - * Check that xml:lang conforms to the specification - * No more registered as an error, just generate a warning now - * since this was deprecated in XML second edition - */ - if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { - internal_val = xmlStrndup(val, *len); - if (!xmlCheckLanguageID(internal_val)) { - xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, - "Malformed value for xml:lang : %s\n", - internal_val, NULL); - } - } + if (*prefix == ctxt->str_xml) { + /* + * Check that xml:lang conforms to the specification + * No more registered as an error, just generate a warning now + * since this was deprecated in XML second edition + */ + if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { + internal_val = xmlStrndup(val, *len); + if (!xmlCheckLanguageID(internal_val)) { + xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, + "Malformed value for xml:lang : %s\n", + internal_val, NULL); + } + } - /* - * Check that xml:space conforms to the specification - */ - if (xmlStrEqual(name, BAD_CAST "space")) { - internal_val = xmlStrndup(val, *len); - if (xmlStrEqual(internal_val, BAD_CAST "default")) - *(ctxt->space) = 0; - else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) - *(ctxt->space) = 1; - else { - xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, -"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", - internal_val, NULL); - } - } - if (internal_val) { - xmlFree(internal_val); - } - } + /* + * Check that xml:space conforms to the specification + */ + if (xmlStrEqual(name, BAD_CAST "space")) { + internal_val = xmlStrndup(val, *len); + if (xmlStrEqual(internal_val, BAD_CAST "default")) + *(ctxt->space) = 0; + else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) + *(ctxt->space) = 1; + else { + xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, + "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", + internal_val, NULL); + } + } + if (internal_val) { + xmlFree(internal_val); + } + } *value = val; - return(name); + return (name); } - /** * xmlParseStartTag2: * @ctxt: an XML parser context - * + * * parse a start of tag either for rule element or * EmptyElement. In both case we don't parse the tag closing chars. * This routine is called when running SAX2 parsing @@ -7836,13 +9319,13 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * With namespace: * @@ -7865,7 +9348,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, const xmlChar **atts = ctxt->atts; int maxatts = ctxt->maxatts; int nratts, nbatts, nbdef; - int i, j, nbNs, attval, oldline, oldcol; + int i, j, nbNs, attval, oldline, oldcol, inputNr; const xmlChar *base; unsigned long cur; int nsNr = ctxt->nsNr; @@ -7884,6 +9367,7 @@ reparse: SHRINK; base = ctxt->input->base; cur = ctxt->input->cur - ctxt->input->base; + inputNr = ctxt->inputNr; oldline = ctxt->input->line; oldcol = ctxt->input->col; nbatts = 0; @@ -7909,18 +9393,19 @@ reparse: */ SKIP_BLANKS; GROW; - if (ctxt->input->base != base) goto base_changed; + if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) + goto base_changed; - while ((RAW != '>') && + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && - (IS_BYTE_CHAR(RAW))) { + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *q = CUR_PTR; unsigned int cons = ctxt->input->consumed; int len = -1, alloc = 0; attname = xmlParseAttribute2(ctxt, prefix, localname, &aprefix, &attvalue, &len, &alloc); - if (ctxt->input->base != base) { + if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) { if ((attvalue != NULL) && (alloc != 0)) xmlFree(attvalue); attvalue = NULL; @@ -7932,20 +9417,42 @@ reparse: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); xmlURIPtr uri; + if (URL == NULL) { + xmlErrMemory(ctxt, "dictionary allocation failure"); + if ((attvalue != NULL) && (alloc != 0)) + xmlFree(attvalue); + return(NULL); + } if (*URL != 0) { uri = xmlParseURI((const char *) URL); if (uri == NULL) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI, - "xmlns: %s not a valid URI\n", - URL, NULL); + xmlNsErr(ctxt, XML_WAR_NS_URI, + "xmlns: '%s' is not a valid URI\n", + URL, NULL, NULL); } else { if (uri->scheme == NULL) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, - "xmlns: URI %s is not absolute\n", - URL, NULL); + xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, + "xmlns: URI %s is not absolute\n", + URL, NULL, NULL); } xmlFreeURI(uri); } + if (URL == ctxt->str_xml_ns) { + if (attname != ctxt->str_xml) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "xml namespace URI cannot be the default namespace\n", + NULL, NULL, NULL); + } + goto skip_default_ns; + } + if ((len == 29) && + (xmlStrEqual(URL, + BAD_CAST "http://www.w3.org/2000/xmlns/"))) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "reuse of the xmlns namespace name is forbidden\n", + NULL, NULL, NULL); + goto skip_default_ns; + } } /* * check that it's not a defined namespace @@ -7957,7 +9464,15 @@ reparse: xmlErrAttributeDup(ctxt, NULL, attname); else if (nsPush(ctxt, NULL, URL) > 0) nbNs++; +skip_default_ns: if (alloc != 0) xmlFree(attvalue); + if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) + break; + if (!IS_BLANK_CH(RAW)) { + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, + "attributes construct error\n"); + break; + } SKIP_BLANKS; continue; } @@ -7974,22 +9489,49 @@ reparse: /* * Do not keep a namespace definition node */ - if (alloc != 0) xmlFree(attvalue); - SKIP_BLANKS; - continue; + goto skip_ns; + } + if (URL == ctxt->str_xml_ns) { + if (attname != ctxt->str_xml) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "xml namespace URI mapped to wrong prefix\n", + NULL, NULL, NULL); + } + goto skip_ns; + } + if (attname == ctxt->str_xmlns) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "redefinition of the xmlns prefix is forbidden\n", + NULL, NULL, NULL); + goto skip_ns; } - uri = xmlParseURI((const char *) URL); - if (uri == NULL) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI, - "xmlns:%s: '%s' is not a valid URI\n", - attname, URL); + if ((len == 29) && + (xmlStrEqual(URL, + BAD_CAST "http://www.w3.org/2000/xmlns/"))) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "reuse of the xmlns namespace name is forbidden\n", + NULL, NULL, NULL); + goto skip_ns; + } + if ((URL == NULL) || (URL[0] == 0)) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "xmlns:%s: Empty XML namespace is not allowed\n", + attname, NULL, NULL); + goto skip_ns; } else { - if ((ctxt->pedantic) && (uri->scheme == NULL)) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, + uri = xmlParseURI((const char *) URL); + if (uri == NULL) { + xmlNsErr(ctxt, XML_WAR_NS_URI, + "xmlns:%s: '%s' is not a valid URI\n", + attname, URL, NULL); + } else { + if ((ctxt->pedantic) && (uri->scheme == NULL)) { + xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, "xmlns:%s: URI %s is not absolute\n", - attname, URL); + attname, URL, NULL); + } + xmlFreeURI(uri); } - xmlFreeURI(uri); } /* @@ -8002,9 +9544,18 @@ reparse: xmlErrAttributeDup(ctxt, aprefix, attname); else if (nsPush(ctxt, attname, URL) > 0) nbNs++; +skip_ns: if (alloc != 0) xmlFree(attvalue); + if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) + break; + if (!IS_BLANK_CH(RAW)) { + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, + "attributes construct error\n"); + break; + } SKIP_BLANKS; - if (ctxt->input->base != base) goto base_changed; + if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) + goto base_changed; continue; } @@ -8036,10 +9587,13 @@ reparse: xmlFree(attvalue); } -failed: +failed: GROW - if (ctxt->input->base != base) goto base_changed; + if (ctxt->instate == XML_PARSER_EOF) + break; + if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) + goto base_changed; if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) break; if (!IS_BLANK_CH(RAW)) { @@ -8055,7 +9609,8 @@ failed: break; } GROW; - if (ctxt->input->base != base) goto base_changed; + if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) + goto base_changed; } /* @@ -8067,8 +9622,8 @@ failed: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); if (defaults != NULL) { for (i = 0;i < defaults->nbAttrs;i++) { - attname = defaults->values[4 * i]; - aprefix = defaults->values[4 * i + 1]; + attname = defaults->values[5 * i]; + aprefix = defaults->values[5 * i + 1]; /* * special work for namespaces defaulted defs @@ -8083,9 +9638,9 @@ failed: if (j <= nbNs) continue; nsname = xmlGetNamespace(ctxt, NULL); - if (nsname != defaults->values[4 * i + 2]) { + if (nsname != defaults->values[5 * i + 2]) { if (nsPush(ctxt, NULL, - defaults->values[4 * i + 2]) > 0) + defaults->values[5 * i + 2]) > 0) nbNs++; } } else if (aprefix == ctxt->str_xmlns) { @@ -8100,7 +9655,7 @@ failed: nsname = xmlGetNamespace(ctxt, attname); if (nsname != defaults->values[2]) { if (nsPush(ctxt, attname, - defaults->values[4 * i + 2]) > 0) + defaults->values[5 * i + 2]) > 0) nbNs++; } } else { @@ -8126,8 +9681,14 @@ failed: atts[nbatts++] = NULL; else atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); - atts[nbatts++] = defaults->values[4 * i + 2]; - atts[nbatts++] = defaults->values[4 * i + 3]; + atts[nbatts++] = defaults->values[5 * i + 2]; + atts[nbatts++] = defaults->values[5 * i + 3]; + if ((ctxt->standalone == 1) && + (defaults->values[5 * i + 4] != NULL)) { + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, + "standalone: attribute %s on %s defaulted from external subset\n", + attname, localname); + } nbdef++; } } @@ -8154,7 +9715,7 @@ failed: /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same - * start-tag or empty-element tag. + * start-tag or empty-element tag. * As extended by the Namespace in XML REC. */ for (j = 0; j < i;j += 5) { @@ -8216,6 +9777,17 @@ base_changed: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) xmlFree((xmlChar *) atts[i]); } + + /* + * We can't switch from one entity to another in the middle + * of a start tag + */ + if (inputNr != ctxt->inputNr) { + xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, + "Start tag doesn't start and stop in the same entity\n"); + return(NULL); + } + ctxt->input->cur = ctxt->input->base + cur; ctxt->input->line = oldline; ctxt->input->col = oldcol; @@ -8255,9 +9827,11 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { if (ctxt->input->cur[tlen] == '>') { ctxt->input->cur += tlen + 1; + ctxt->input->col += tlen + 1; goto done; } ctxt->input->cur += tlen; + ctxt->input->col += tlen; name = (xmlChar*)1; } else { if (prefix == NULL) @@ -8270,6 +9844,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, * We should definitely be at the ending "S? '>'" part */ GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; SKIP_BLANKS; if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); @@ -8279,11 +9855,13 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, /* * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ if (name != (xmlChar*)1) { if (name == NULL) name = BAD_CAST "unparseable"; + if ((line == 0) && (ctxt->node != NULL)) + line = ctxt->node->line; xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, "Opening and ending tag mismatch: %s line %d and %s\n", ctxt->name, line, name); @@ -8306,7 +9884,7 @@ done: /** * xmlParseCDSect: * @ctxt: an XML parser context - * + * * Parse escaped pure raw content. * * [18] CDSect ::= CDStart CData CDEnd @@ -8359,14 +9937,21 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if ((size > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, + "CData section too big found", NULL); + xmlFree (buf); + return; + } + tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); if (tmp == NULL) { xmlFree(buf); xmlErrMemory(ctxt, NULL); return; } buf = tmp; + size *= 2; } COPY_BUF(rl,buf,len,r); r = s; @@ -8376,6 +9961,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { count++; if (count > 50) { GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } count = 0; } NEXTL(l); @@ -8455,7 +10044,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { /* * Fifth case : a reference. If if has not been resolved, - * parsing returns it's Name, create the node + * parsing returns it's Name, create the node */ else if (*cur == '&') { @@ -8480,7 +10069,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "detected an error in element content\n"); - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); break; } } @@ -8496,25 +10085,26 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { * * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ void xmlParseElement(xmlParserCtxtPtr ctxt) { const xmlChar *name; - const xmlChar *prefix; - const xmlChar *URI; + const xmlChar *prefix = NULL; + const xmlChar *URI = NULL; xmlParserNodeInfo node_info; - int line, tlen; + int line, tlen = 0; xmlNodePtr ret; int nsNr = ctxt->nsNr; - if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) { - xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, - "Excessive depth in document: change xmlParserMaxDepth = %d\n", - xmlParserMaxDepth); - ctxt->instate = XML_PARSER_EOF; + if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, + "Excessive depth in document: %d use XML_PARSE_HUGE option\n", + xmlParserMaxDepth); + xmlHaltParser(ctxt); return; } @@ -8541,6 +10131,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { else name = xmlParseStartTag(ctxt); #endif /* LIBXML_SAX1_ENABLED */ + if (ctxt->instate == XML_PARSER_EOF) + return; if (name == NULL) { spacePop(ctxt); return; @@ -8552,7 +10144,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { /* * [ VC: Root Element Type ] * The Name in the document type declaration must match the element - * type of the root element. + * type of the root element. */ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->node && (ctxt->node == ctxt->myDoc->children)) @@ -8621,6 +10213,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * Parse the content of the element: */ xmlParseContent(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + return; if (!IS_BYTE_CHAR(RAW)) { xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, "Premature end of data in tag %s line %d\n", @@ -8667,7 +10261,9 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * * parse the XML version value. * - * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ + * [26] VersionNum ::= '1.' [0-9]+ + * + * In practice allow [0-9].[0-9]+ at that level * * Returns the string giving the XML version number, or NULL */ @@ -8684,17 +10280,28 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) { return(NULL); } cur = CUR; - while (((cur >= 'a') && (cur <= 'z')) || - ((cur >= 'A') && (cur <= 'Z')) || - ((cur >= '0') && (cur <= '9')) || - (cur == '_') || (cur == '.') || - (cur == ':') || (cur == '-')) { + if (!((cur >= '0') && (cur <= '9'))) { + xmlFree(buf); + return(NULL); + } + buf[len++] = cur; + NEXT; + cur=CUR; + if (cur != '.') { + xmlFree(buf); + return(NULL); + } + buf[len++] = cur; + NEXT; + cur=CUR; + while ((cur >= '0') && (cur <= '9')) { if (len + 1 >= size) { xmlChar *tmp; size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { + xmlFree(buf); xmlErrMemory(ctxt, NULL); return(NULL); } @@ -8711,11 +10318,11 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) { /** * xmlParseVersionInfo: * @ctxt: an XML parser context - * + * * parse the XML version. * * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") - * + * * [25] Eq ::= S? '=' S? * * Returns the version string, e.g. "1.0" @@ -8780,7 +10387,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, NULL); return(NULL); } - + buf[len++] = cur; NEXT; cur = CUR; @@ -8820,7 +10427,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { /** * xmlParseEncodingDecl: * @ctxt: an XML parser context - * + * * parse the XML encoding declaration * * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") @@ -8849,6 +10456,8 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { encoding = xmlParseEncName(ctxt); if (RAW != '"') { xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); + xmlFree((xmlChar *) encoding); + return(NULL); } else NEXT; } else if (RAW == '\''){ @@ -8856,11 +10465,22 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { encoding = xmlParseEncName(ctxt); if (RAW != '\'') { xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); + xmlFree((xmlChar *) encoding); + return(NULL); } else NEXT; } else { xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); } + + /* + * Non standard parsing, allowing the user to ignore encoding + */ + if (ctxt->options & XML_PARSE_IGNORE_ENC) { + xmlFree((xmlChar *) encoding); + return(NULL); + } + /* * UTF-16 encoding stwich has already taken place at this stage, * more over the little-endian/big-endian selection is already done @@ -8868,6 +10488,18 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { if ((encoding != NULL) && ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { + /* + * If no encoding was passed to the parser, that we are + * using UTF-16 and no decoder is present i.e. the + * document is apparently UTF-8 compatible, then raise an + * encoding mismatch fatal error + */ + if ((ctxt->encoding == NULL) && + (ctxt->input->buf != NULL) && + (ctxt->input->buf->encoder == NULL)) { + xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, + "Document labelled UTF-16 but has UTF-8 content\n"); + } if (ctxt->encoding != NULL) xmlFree((xmlChar *) ctxt->encoding); ctxt->encoding = encoding; @@ -8891,7 +10523,11 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { handler = xmlFindCharEncodingHandler((const char *) encoding); if (handler != NULL) { - xmlSwitchToEncoding(ctxt, handler); + if (xmlSwitchToEncoding(ctxt, handler) < 0) { + /* failed to convert */ + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + return(NULL); + } } else { xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "Unsupported encoding %s\n", encoding); @@ -8909,7 +10545,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { * parse the XML standalone declaration * * [32] SDDecl ::= S 'standalone' Eq - * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) + * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) * * [ VC: Standalone Document Declaration ] * TODO The standalone document declaration must have the value "no" @@ -8925,12 +10561,17 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { * - element types with element content, if white space occurs directly * within any instance of those types. * - * Returns 1 if standalone, 0 otherwise + * Returns: + * 1 if standalone="yes" + * 0 if standalone="no" + * -2 if standalone attribute is missing or invalid + * (A standalone value of -2 means that the XML declaration was found, + * but no value was specified for the standalone attribute). */ int xmlParseSDDecl(xmlParserCtxtPtr ctxt) { - int standalone = -1; + int standalone = -2; SKIP_BLANKS; if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { @@ -8984,7 +10625,7 @@ xmlParseSDDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseXMLDecl: * @ctxt: an XML parser context - * + * * parse an XML declaration header * * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' @@ -9021,11 +10662,23 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { } else { if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { /* - * TODO: Blueberry should be detected here + * Changed here for XML-1.0 5th edition */ - xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, - "Unsupported version '%s'\n", - version, NULL); + if (ctxt->options & XML_PARSE_OLD10) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version); + } else { + if ((version[0] == '1') && ((version[1] == '.'))) { + xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version, NULL); + } else { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version); + } + } } if (ctxt->version != NULL) xmlFree((void *) ctxt->version); @@ -9043,7 +10696,8 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); } xmlParseEncodingDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || + (ctxt->instate == XML_PARSER_EOF)) { /* * The XML REC instructs us to stop parsing right here */ @@ -9060,6 +10714,12 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { } xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); } + + /* + * We can grow the input buffer freely at that point + */ + GROW; + SKIP_BLANKS; ctxt->input->standalone = xmlParseSDDecl(ctxt); @@ -9080,7 +10740,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseMisc: * @ctxt: an XML parser context - * + * * parse an XML Misc* optional field. * * [27] Misc ::= Comment | PI | S @@ -9088,9 +10748,10 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { void xmlParseMisc(xmlParserCtxtPtr ctxt) { - while (((RAW == '<') && (NXT(1) == '?')) || - (CMP4(CUR_PTR, '<', '!', '-', '-')) || - IS_BLANK_CH(CUR)) { + while ((ctxt->instate != XML_PARSER_EOF) && + (((RAW == '<') && (NXT(1) == '?')) || + (CMP4(CUR_PTR, '<', '!', '-', '-')) || + IS_BLANK_CH(CUR))) { if ((RAW == '<') && (NXT(1) == '?')) { xmlParsePI(ctxt); } else if (IS_BLANK_CH(CUR)) { @@ -9103,7 +10764,7 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) { /** * xmlParseDocument: * @ctxt: an XML parser context - * + * * parse an XML document (and build a tree if using the standard SAX * interface). * @@ -9137,10 +10798,12 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { */ if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); - if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && + if ((ctxt->encoding == NULL) && ((ctxt->input->end - ctxt->input->cur) >= 4)) { - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -9158,19 +10821,26 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { if (CUR == 0) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); + return(-1); } /* * Check for the XMLDecl in the Prolog. + * do not GROW here to avoid the detected encoder to decode more + * than just the first line, unless the amount of data is really + * too small to hold "<?xml version="1.0" encoding="foo" */ - GROW; + if ((ctxt->input->end - ctxt->input->cur) < 35) { + GROW; + } if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { /* * Note that we will switch encoding on the fly. */ xmlParseXMLDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || + (ctxt->instate == XML_PARSER_EOF)) { /* * The XML REC instructs us to stop parsing right here */ @@ -9183,6 +10853,12 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { } if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); + if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { + ctxt->myDoc->compression = ctxt->input->buf->compressed; + } /* * The Misc part of the Prolog @@ -9202,6 +10878,8 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { if (RAW == '[') { ctxt->instate = XML_PARSER_DTD; xmlParseInternalSubset(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); } /* @@ -9212,8 +10890,11 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { (!ctxt->disableSAX)) ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, ctxt->extSubSystem, ctxt->extSubURI); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); ctxt->inSubset = 0; + xmlCleanSpecialAttr(ctxt); ctxt->instate = XML_PARSER_PROLOG; xmlParseMisc(ctxt); @@ -9258,6 +10939,15 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { ctxt->myDoc = NULL; } + if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { + ctxt->myDoc->properties |= XML_DOC_WELLFORMED; + if (ctxt->valid) + ctxt->myDoc->properties |= XML_DOC_DTDVALID; + if (ctxt->nsWellFormed) + ctxt->myDoc->properties |= XML_DOC_NSVALID; + if (ctxt->options & XML_PARSE_OLD10) + ctxt->myDoc->properties |= XML_DOC_OLD10; + } if (! ctxt->wellFormed) { ctxt->valid = 0; return(-1); @@ -9268,7 +10958,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { /** * xmlParseExtParsedEnt: * @ctxt: an XML parser context - * + * * parse a general parsed entity * An external general parsed entity is well-formed if it matches the * production labeled extParsedEnt. @@ -9299,7 +10989,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -9342,6 +11032,8 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { } if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); /* * Doing validity checking on chunk doesn't make sense @@ -9352,7 +11044,9 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { ctxt->depth = 0; xmlParseContent(ctxt); - + if (ctxt->instate == XML_PARSER_EOF) + return(-1); + if ((RAW == '<') && (NXT(1) == '/')) { xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); } else if (RAW != 0) { @@ -9372,7 +11066,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { #ifdef LIBXML_PUSH_ENABLED /************************************************************************ * * - * Progressive parsing interfaces * + * Progressive parsing interfaces * * * ************************************************************************/ @@ -9409,8 +11103,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, buf = in->base; len = in->length; } else { - buf = in->buf->buffer->content; - len = in->buf->buffer->use; + buf = xmlBufContent(in->buf->buffer); + len = xmlBufUse(in->buf->buffer); } /* take into account the sequence length */ if (third) len -= 2; @@ -9433,7 +11127,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c' found at %d\n", first, next, base); - else + else xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c%c' found at %d\n", first, next, third, base); @@ -9449,7 +11143,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, else if (third == 0) xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c' failed\n", first, next); - else + else xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c%c' failed\n", first, next, third); #endif @@ -9531,7 +11225,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { if ((utf == NULL) || (len <= 0)) return(0); - + for (ix = 0; ix < len;) { /* string is 0-terminated */ c = utf[ix]; if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ @@ -9542,7 +11236,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { else return(-ix); } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ - if (ix + 2 > len) return(ix); + if (ix + 2 > len) return(-ix); if ((utf[ix+1] & 0xc0 ) != 0x80) return(-ix); codepoint = (utf[ix] & 0x1f) << 6; @@ -9551,7 +11245,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { return(-ix); ix += 2; } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ - if (ix + 3 > len) return(ix); + if (ix + 3 > len) return(-ix); if (((utf[ix+1] & 0xc0) != 0x80) || ((utf[ix+2] & 0xc0) != 0x80)) return(-ix); @@ -9562,7 +11256,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { return(-ix); ix += 3; } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ - if (ix + 4 > len) return(ix); + if (ix + 4 > len) return(-ix); if (((utf[ix+1] & 0xc0) != 0x80) || ((utf[ix+2] & 0xc0) != 0x80) || ((utf[ix+3] & 0xc0) != 0x80)) @@ -9659,11 +11353,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { } xmlParseGetLasts(ctxt, &lastlt, &lastgt); - while (1) { + while (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(0); - + /* * Pop-up of finished entities. */ @@ -9678,22 +11372,22 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { /* * If we are operating on converted input, try to flush * remainng chars to avoid them stalling in the non-converted - * buffer. + * buffer. But do not do this in document start where + * encoding="..." may not have been read and we work on a + * guessed encoding. */ - if ((ctxt->input->buf->raw != NULL) && - (ctxt->input->buf->raw->use > 0)) { - int base = ctxt->input->base - - ctxt->input->buf->buffer->content; - int current = ctxt->input->cur - ctxt->input->base; + if ((ctxt->instate != XML_PARSER_START) && + (ctxt->input->buf->raw != NULL) && + (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, + ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, 0, ""); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + current; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ - ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, + base, current); } - avail = ctxt->input->buf->buffer->use - + avail = xmlBufUse(ctxt->input->buf->buffer) - (ctxt->input->cur - ctxt->input->base); } if (avail < 1) @@ -9715,7 +11409,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (avail < 4) goto done; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines, @@ -9740,7 +11434,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: entering EOF\n"); @@ -9773,7 +11467,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { * The XML REC instructs us to stop parsing right * here */ - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); return(0); } ctxt->standalone = ctxt->input->standalone; @@ -9820,8 +11514,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { break; case XML_PARSER_START_TAG: { const xmlChar *name; - const xmlChar *prefix; - const xmlChar *URI; + const xmlChar *prefix = NULL; + const xmlChar *URI = NULL; int nsNr = ctxt->nsNr; if ((avail < 2) && (ctxt->inputNr == 1)) @@ -9829,7 +11523,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { cur = ctxt->input->cur[0]; if (cur != '<') { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); goto done; @@ -9857,9 +11551,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { else name = xmlParseStartTag(ctxt); #endif /* LIBXML_SAX1_ENABLED */ + if (ctxt->instate == XML_PARSER_EOF) + goto done; if (name == NULL) { spacePop(ctxt); - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); goto done; @@ -9868,7 +11564,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { /* * [ VC: Root Element Type ] * The Name in the document type declaration must match - * the element type of the root element. + * the element type of the root element. */ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->node && (ctxt->node == ctxt->myDoc->children)) @@ -9897,12 +11593,15 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->sax->endElement(ctxt->userData, name); #endif /* LIBXML_SAX1_ENABLED */ } + if (ctxt->instate == XML_PARSER_EOF) + goto done; spacePop(ctxt); if (ctxt->nameNr == 0) { ctxt->instate = XML_PARSER_EPILOG; } else { ctxt->instate = XML_PARSER_CONTENT; } + ctxt->progressive = 1; break; } if (RAW == '>') { @@ -9922,6 +11621,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { #endif /* LIBXML_SAX1_ENABLED */ ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; break; } case XML_PARSER_CONTENT: { @@ -9939,9 +11639,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { break; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; } else if ((cur == '<') && (next != '!')) { ctxt->instate = XML_PARSER_START_TAG; break; @@ -9955,10 +11659,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->input->cur += 4; term = xmlParseLookupSequence(ctxt, '-', '-', '>'); ctxt->input->cur -= 4; - if ((!terminate) && (term < 0)) + if ((!terminate) && (term < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } xmlParseComment(ctxt); ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && (ctxt->input->cur[2] == '[') && (ctxt->input->cur[3] == 'C') && @@ -10015,7 +11722,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "detected an error in element content\n"); - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); break; } break; @@ -10043,7 +11750,9 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { else xmlParseEndTag1(ctxt, 0); #endif /* LIBXML_SAX1_ENABLED */ - if (ctxt->nameNr == 0) { + if (ctxt->instate == XML_PARSER_EOF) { + /* Nothing */ + } else if (ctxt->nameNr == 0) { ctxt->instate = XML_PARSER_EPILOG; } else { ctxt->instate = XML_PARSER_CONTENT; @@ -10051,7 +11760,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { break; case XML_PARSER_CDATA_SECTION: { /* - * The Push mode need to have the SAX callback for + * The Push mode need to have the SAX callback for * cdataBlock merge back contiguous callbacks. */ int base; @@ -10061,7 +11770,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { int tmp; - tmp = xmlCheckCdataPush(ctxt->input->cur, + tmp = xmlCheckCdataPush(ctxt->input->cur, XML_PARSER_BIG_BUFFER_SIZE); if (tmp < 0) { tmp = -tmp; @@ -10076,6 +11785,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->sax->characters(ctxt->userData, ctxt->input->cur, tmp); } + if (ctxt->instate == XML_PARSER_EOF) + goto done; SKIPL(tmp); ctxt->checkIndex = 0; } @@ -10089,7 +11800,20 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->input->cur += tmp; goto encoding_error; } - if ((ctxt->sax != NULL) && (base > 0) && + if ((ctxt->sax != NULL) && (base == 0) && + (ctxt->sax->cdataBlock != NULL) && + (!ctxt->disableSAX)) { + /* + * Special case to provide identical behaviour + * between pull and push parsers on enpty CDATA + * sections + */ + if ((ctxt->input->cur - ctxt->input->base >= 9) && + (!strncmp((const char *)&ctxt->input->cur[-9], + "<![CDATA[", 9))) + ctxt->sax->cdataBlock(ctxt->userData, + BAD_CAST "", 0); + } else if ((ctxt->sax != NULL) && (base > 0) && (!ctxt->disableSAX)) { if (ctxt->sax->cdataBlock != NULL) ctxt->sax->cdataBlock(ctxt->userData, @@ -10098,6 +11822,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->sax->characters(ctxt->userData, ctxt->input->cur, base); } + if (ctxt->instate == XML_PARSER_EOF) + goto done; SKIPL(base + 3); ctxt->checkIndex = 0; ctxt->instate = XML_PARSER_CONTENT; @@ -10114,7 +11840,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - + avail = xmlBufUse(ctxt->input->buf->buffer) - (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; @@ -10122,25 +11848,38 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; + ctxt->instate = XML_PARSER_MISC; + ctxt->progressive = 1; + ctxt->checkIndex = 0; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_MISC; + ctxt->progressive = 1; + ctxt->checkIndex = 0; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && @@ -10150,14 +11889,20 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { (ctxt->input->cur[7] == 'P') && (ctxt->input->cur[8] == 'E')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { + ctxt->progressive = XML_PARSER_DTD; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing internal subset\n"); #endif ctxt->inSubset = 1; + ctxt->progressive = 0; + ctxt->checkIndex = 0; xmlParseDocTypeDecl(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; if (RAW == '[') { ctxt->instate = XML_PARSER_DTD; #ifdef DEBUG_PUSH @@ -10175,6 +11920,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->intSubName, ctxt->extSubSystem, ctxt->extSubURI); ctxt->inSubset = 0; + xmlCleanSpecialAttr(ctxt); ctxt->instate = XML_PARSER_PROLOG; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -10186,7 +11932,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; - ctxt->progressive = 1; + ctxt->progressive = XML_PARSER_START_TAG; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -10199,38 +11945,50 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (ctxt->input->buf == NULL) avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); - if (avail < 2) + avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); + if (avail < 2) goto done; cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; + ctxt->instate = XML_PARSER_PROLOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_PROLOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; if (ctxt->progressive == 0) - ctxt->progressive = 1; + ctxt->progressive = XML_PARSER_START_TAG; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -10243,38 +12001,49 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (ctxt->input->buf == NULL) avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_EPILOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_EPILOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); - ctxt->instate = XML_PARSER_EOF; + xmlHaltParser(ctxt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: entering EOF\n"); @@ -10298,29 +12067,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { int base, i; xmlChar *buf; xmlChar quote = 0; + size_t use; base = ctxt->input->cur - ctxt->input->base; if (base < 0) return(0); if (ctxt->checkIndex > base) base = ctxt->checkIndex; - buf = ctxt->input->buf->buffer->content; - for (;(unsigned int) base < ctxt->input->buf->buffer->use; - base++) { + buf = xmlBufContent(ctxt->input->buf->buffer); + use = xmlBufUse(ctxt->input->buf->buffer); + for (;(unsigned int) base < use; base++) { if (quote != 0) { if (buf[base] == quote) quote = 0; - continue; + continue; } if ((quote == 0) && (buf[base] == '<')) { int found = 0; /* special handling of comments */ - if (((unsigned int) base + 4 < - ctxt->input->buf->buffer->use) && + if (((unsigned int) base + 4 < use) && (buf[base + 1] == '!') && (buf[base + 2] == '-') && (buf[base + 3] == '-')) { - for (;(unsigned int) base + 3 < - ctxt->input->buf->buffer->use; base++) { + for (;(unsigned int) base + 3 < use; base++) { if ((buf[base] == '-') && (buf[base + 1] == '-') && (buf[base + 2] == '>')) { @@ -10351,17 +12119,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { fprintf(stderr, "%c%c%c%c: ", buf[base], buf[base + 1], buf[base + 2], buf[base + 3]); #endif - if ((unsigned int) base +1 >= - ctxt->input->buf->buffer->use) + if ((unsigned int) base +1 >= use) break; if (buf[base + 1] == ']') { /* conditional crap, skip both ']' ! */ base++; continue; } - for (i = 1; - (unsigned int) base + i < ctxt->input->buf->buffer->use; - i++) { + for (i = 1; (unsigned int) base + i < use; i++) { if (buf[base + i] == '>') { #if 0 fprintf(stderr, "found\n"); @@ -10379,7 +12144,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { fprintf(stderr, "end of stream\n"); #endif break; - + } not_end_of_int_subset: continue; /* for */ @@ -10387,6 +12152,10 @@ not_end_of_int_subset: /* * We didn't found the end of the Internal subset */ + if (quote == 0) + ctxt->checkIndex = base; + else + ctxt->checkIndex = 0; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, @@ -10395,13 +12164,19 @@ not_end_of_int_subset: goto done; found_end_int_subset: + ctxt->checkIndex = 0; xmlParseInternalSubset(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->inSubset = 2; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && (ctxt->sax->externalSubset != NULL)) ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, ctxt->extSubSystem, ctxt->extSubURI); ctxt->inSubset = 0; + xmlCleanSpecialAttr(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_PROLOG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH @@ -10484,7 +12259,7 @@ found_end_int_subset: break; } } -done: +done: #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); #endif @@ -10504,6 +12279,55 @@ encoding_error: } /** + * xmlParseCheckTransition: + * @ctxt: an XML parser context + * @chunk: a char array + * @size: the size in byte of the chunk + * + * Check depending on the current parser state if the chunk given must be + * processed immediately or one need more data to advance on parsing. + * + * Returns -1 in case of error, 0 if the push is not needed and 1 if needed + */ +static int +xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { + if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) + return(-1); + if (ctxt->instate == XML_PARSER_START_TAG) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->progressive == XML_PARSER_COMMENT) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->instate == XML_PARSER_CDATA_SECTION) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->progressive == XML_PARSER_PI) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->instate == XML_PARSER_END_TAG) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if ((ctxt->progressive == XML_PARSER_DTD) || + (ctxt->instate == XML_PARSER_DTD)) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + return(1); +} + +/** * xmlParseChunk: * @ctxt: an XML parser context * @chunk: an char array @@ -10518,11 +12342,16 @@ int xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { int end_in_lf = 0; + int remain = 0; + size_t old_avail = 0; + size_t avail = 0; if (ctxt == NULL) return(XML_ERR_INTERNAL_ERROR); if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(ctxt->errNo); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); if (ctxt->instate == XML_PARSER_START) xmlDetectSAX2(ctxt); if ((size > 0) && (chunk != NULL) && (!terminate) && @@ -10530,22 +12359,58 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, end_in_lf = 1; size--; } + +xmldecl_done: + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; int res; - - res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + + old_avail = xmlBufUse(ctxt->input->buf->buffer); + /* + * Specific handling if we autodetected an encoding, we should not + * push more than the first line ... which depend on the encoding + * And only push the rest once the final encoding was detected + */ + if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { + unsigned int len = 45; + + if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UTF-16")) || + (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UTF16"))) + len = 90; + else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UCS-4")) || + (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UCS4"))) + len = 180; + + if (ctxt->input->buf->rawconsumed < len) + len -= ctxt->input->buf->rawconsumed; + + /* + * Change size for reading the initial declaration only + * if size is greater than len. Otherwise, memmove in xmlBufferAdd + * will blindly copy extra bytes from memory. + */ + if ((unsigned int) size > len) { + remain = size - len; + size = len; + } else { + remain = 0; + } + } + res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); if (res < 0) { ctxt->errNo = XML_PARSER_EOF; - ctxt->disableSAX = 1; + xmlHaltParser(ctxt); return (XML_PARSER_EOF); } - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -10556,44 +12421,88 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, if ((in->encoder != NULL) && (in->buffer != NULL) && (in->raw != NULL)) { int nbchars; - - nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; + + nbchars = xmlCharEncInput(in, terminate); if (nbchars < 0) { /* TODO 2.6.0 */ xmlGenericError(xmlGenericErrorContext, "xmlParseChunk: encoder error\n"); return(XML_ERR_INVALID_ENCODING); } + xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); } } } - xmlParseTryOrFinish(ctxt, terminate); + if (remain != 0) { + xmlParseTryOrFinish(ctxt, 0); + } else { + if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) + avail = xmlBufUse(ctxt->input->buf->buffer); + /* + * Depending on the current state it may not be such + * a good idea to try parsing if there is nothing in the chunk + * which would be worth doing a parser state transition and we + * need to wait for more data + */ + if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || + (old_avail == 0) || (avail == 0) || + (xmlParseCheckTransition(ctxt, + (const char *)&ctxt->input->base[old_avail], + avail - old_avail))) + xmlParseTryOrFinish(ctxt, terminate); + } + if (ctxt->instate == XML_PARSER_EOF) + return(ctxt->errNo); + + if ((ctxt->input != NULL) && + (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + xmlHaltParser(ctxt); + } + if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) + return(ctxt->errNo); + + if (remain != 0) { + chunk += size; + size = remain; + remain = 0; + goto xmldecl_done; + } if ((end_in_lf == 1) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, + ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; + xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); + + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, + base, current); } - if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) - return(ctxt->errNo); if (terminate) { /* * Check for termination */ - int avail = 0; + int cur_avail = 0; if (ctxt->input != NULL) { if (ctxt->input->buf == NULL) - avail = ctxt->input->length - - (ctxt->input->cur - ctxt->input->base); + cur_avail = ctxt->input->length - + (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - - (ctxt->input->cur - ctxt->input->base); + cur_avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); } - + if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->instate != XML_PARSER_EPILOG)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); - } - if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { + } + if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); } if (ctxt->instate != XML_PARSER_EOF) { @@ -10602,12 +12511,15 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, } ctxt->instate = XML_PARSER_EOF; } - return((xmlParserErrors) ctxt->errNo); + if (ctxt->wellFormed == 0) + return((xmlParserErrors) ctxt->errNo); + else + return(0); } /************************************************************************ * * - * I/O front end functions to the parser * + * I/O front end functions to the parser * * * ************************************************************************/ @@ -10631,7 +12543,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, */ xmlParserCtxtPtr -xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, +xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename) { xmlParserCtxtPtr ctxt; xmlParserInputPtr inputStream; @@ -10680,7 +12592,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); if (user_data != NULL) ctxt->userData = user_data; - } + } if (filename == NULL) { ctxt->directory = NULL; } else { @@ -10706,11 +12618,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, } } inputStream->buf = buf; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = - &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; - + xmlBufResetInput(inputStream->buf->buffer, inputStream); inputPush(ctxt, inputStream); /* @@ -10721,15 +12629,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, if ((size == 0) || (chunk == NULL)) { ctxt->charset = XML_CHAR_ENCODING_NONE; } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; - xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -10744,24 +12649,47 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, #endif /* LIBXML_PUSH_ENABLED */ /** - * xmlStopParser: + * xmlHaltParser: * @ctxt: an XML parser context * - * Blocks further parser processing + * Blocks further parser processing don't override error + * for internal use */ -void -xmlStopParser(xmlParserCtxtPtr ctxt) { +static void +xmlHaltParser(xmlParserCtxtPtr ctxt) { if (ctxt == NULL) return; ctxt->instate = XML_PARSER_EOF; ctxt->disableSAX = 1; if (ctxt->input != NULL) { + /* + * in case there was a specific allocation deallocate before + * overriding base + */ + if (ctxt->input->free != NULL) { + ctxt->input->free((xmlChar *) ctxt->input->base); + ctxt->input->free = NULL; + } ctxt->input->cur = BAD_CAST""; ctxt->input->base = ctxt->input->cur; } } /** + * xmlStopParser: + * @ctxt: an XML parser context + * + * Blocks further parser processing + */ +void +xmlStopParser(xmlParserCtxtPtr ctxt) { + if (ctxt == NULL) + return; + xmlHaltParser(ctxt); + ctxt->errNo = XML_ERR_USER_STOP; +} + +/** * xmlCreateIOParserCtxt: * @sax: a SAX handler * @user_data: The user data returned on SAX callbacks @@ -10782,11 +12710,15 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, xmlParserCtxtPtr ctxt; xmlParserInputPtr inputStream; xmlParserInputBufferPtr buf; - + if (ioread == NULL) return(NULL); buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); - if (buf == NULL) return(NULL); + if (buf == NULL) { + if (ioclose != NULL) + ioclose(ioctx); + return (NULL); + } ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { @@ -10811,7 +12743,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); if (user_data != NULL) ctxt->userData = user_data; - } + } inputStream = xmlNewIOInputStream(ctxt, buf, enc); if (inputStream == NULL) { @@ -10826,7 +12758,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, #ifdef LIBXML_VALID_ENABLED /************************************************************************ * * - * Front ends when parsing a DTD * + * Front ends when parsing a DTD * * * ************************************************************************/ @@ -10837,7 +12769,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, * @enc: the charset encoding if known * * Load and parse a DTD - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. * @input will be freed by the function in any case. */ @@ -10859,10 +12791,13 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, return(NULL); } + /* We are loading a DTD */ + ctxt->options |= XML_PARSE_DTDLOAD; + /* * Set-up the SAX context */ - if (sax != NULL) { + if (sax != NULL) { if (ctxt->sax != NULL) xmlFree(ctxt->sax); ctxt->sax = sax; @@ -10885,7 +12820,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, /* * plug some encoding conversion routines here. */ - xmlPushInput(ctxt, pinput); + if (xmlPushInput(ctxt, pinput) < 0) { + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } @@ -10902,12 +12841,17 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, */ ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return(NULL); + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", BAD_CAST "none", BAD_CAST "none"); if ((enc == XML_CHAR_ENCODING_NONE) && ((ctxt->input->end - ctxt->input->cur) >= 4)) { - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -10946,7 +12890,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, } if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -10957,7 +12901,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, * @SystemID: a NAME* containing the URL to the DTD * * Load and parse an external subset. - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. */ @@ -10977,16 +12921,19 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, return(NULL); } + /* We are loading a DTD */ + ctxt->options |= XML_PARSE_DTDLOAD; + /* * Set-up the SAX context */ - if (sax != NULL) { + if (sax != NULL) { if (ctxt->sax != NULL) xmlFree(ctxt->sax); ctxt->sax = sax; ctxt->userData = ctxt; } - + /* * Canonicalise the system ID */ @@ -11014,7 +12961,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, /* * plug some encoding conversion routines here. */ - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) { + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + if (systemIdCanonic != NULL) + xmlFree(systemIdCanonic); + return(NULL); + } if ((ctxt->input->end - ctxt->input->cur) >= 4) { enc = xmlDetectCharEncoding(ctxt->input->cur, 4); xmlSwitchEncoding(ctxt, enc); @@ -11035,6 +12988,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, */ ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", ExternalID, SystemID); xmlParseExternalSubset(ctxt, ExternalID, SystemID); @@ -11061,7 +13021,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, } if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -11072,7 +13032,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, * @SystemID: a NAME* containing the URL to the DTD * * Load and parse an external subset. - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. */ @@ -11084,7 +13044,7 @@ xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { /************************************************************************ * * - * Front ends when parsing an Entity * + * Front ends when parsing an Entity * * * ************************************************************************/ @@ -11115,12 +13075,11 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, int ret = 0; xmlChar start[4]; xmlCharEncoding enc; - xmlParserInputPtr inputStream; - char *directory = NULL; if (ctx == NULL) return(-1); - if (ctx->depth > 40) { + if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || + (ctx->depth > 1024)) { return(XML_ERR_ENTITY_LOOP); } @@ -11131,27 +13090,11 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ return(-1); - ctxt = xmlNewParserCtxt(); + ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); if (ctxt == NULL) { return(-1); } - - ctxt->userData = ctxt; - ctxt->_private = ctx->_private; - - inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); - if (inputStream == NULL) { - xmlFreeParserCtxt(ctxt); - return(-1); - } - - inputPush(ctxt, inputStream); - if ((ctxt->directory == NULL) && (directory == NULL)) - directory = xmlParserGetDirectory((char *)URL); - if ((ctxt->directory == NULL) && (directory != NULL)) - ctxt->directory = directory; - oldsax = ctxt->sax; ctxt->sax = ctx->sax; xmlDetectSAX2(ctxt); @@ -11160,6 +13103,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, xmlFreeParserCtxt(ctxt); return(-1); } + newDoc->properties = XML_DOC_INTERNAL; if (ctx->myDoc->dict) { newDoc->dict = ctx->myDoc->dict; xmlDictReference(newDoc->dict); @@ -11189,7 +13133,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, newDoc->children->doc = ctx->myDoc; } - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -11211,9 +13155,27 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, */ if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { xmlParseTextDecl(ctxt); + /* + * An XML-1.0 document can't reference an entity not XML-1.0 + */ + if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && + (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { + xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, + "Version mismatch between document and entity\n"); + } } /* + * If the user provided its own SAX callbacks then reuse the + * useData callback field, otherwise the expected setup in a + * DOM builder is to have userData == ctxt + */ + if (ctx->userData == ctx) + ctxt->userData = ctxt; + else + ctxt->userData = ctx->userData; + + /* * Doing validity checking on chunk doesn't make sense */ ctxt->instate = XML_PARSER_CONTENT; @@ -11244,7 +13206,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, ctxt->linenumbers = ctx->linenumbers; xmlParseContent(ctxt); - + ctx->validate = ctxt->validate; ctx->valid = ctxt->valid; if ((RAW == '<') && (NXT(1) == '/')) { @@ -11287,7 +13249,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, newDoc->intSubset = NULL; newDoc->extSubset = NULL; xmlFreeDoc(newDoc); - + return(ret); } @@ -11321,12 +13283,12 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlChar start[4]; xmlCharEncoding enc; - if (depth > 40) { + if (((depth > 40) && + ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || + (depth > 1024)) { return(XML_ERR_ENTITY_LOOP); } - - if (list != NULL) *list = NULL; if ((URL == NULL) && (ID == NULL)) @@ -11335,7 +13297,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, return(XML_ERR_INTERNAL_ERROR); - ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); + ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); ctxt->userData = ctxt; if (oldctxt != NULL) { @@ -11372,6 +13334,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlFreeParserCtxt(ctxt); return(XML_ERR_INTERNAL_ERROR); } + newDoc->properties = XML_DOC_INTERNAL; newDoc->intSubset = doc->intSubset; newDoc->extSubset = doc->extSubset; newDoc->dict = doc->dict; @@ -11398,7 +13361,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, ctxt->myDoc = doc; newRoot->doc = doc; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -11426,7 +13389,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, ctxt->depth = depth; xmlParseContent(ctxt); - + if ((RAW == '<') && (NXT(1) == '/')) { xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); } else if (RAW != 0) { @@ -11459,11 +13422,34 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, } ret = XML_ERR_OK; } - if (sax != NULL) + + /* + * Record in the parent context the number of entities replacement + * done when parsing that reference. + */ + if (oldctxt != NULL) + oldctxt->nbentities += ctxt->nbentities; + + /* + * Also record the size of the entity parsed + */ + if (ctxt->input != NULL && oldctxt != NULL) { + oldctxt->sizeentities += ctxt->input->consumed; + oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); + } + /* + * And record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) + xmlCopyError(&ctxt->lastError, &oldctxt->lastError); + + if (sax != NULL) ctxt->sax = oldsax; - oldctxt->node_seq.maximum = ctxt->node_seq.maximum; - oldctxt->node_seq.length = ctxt->node_seq.length; - oldctxt->node_seq.buffer = ctxt->node_seq.buffer; + if (oldctxt != NULL) { + oldctxt->node_seq.maximum = ctxt->node_seq.maximum; + oldctxt->node_seq.length = ctxt->node_seq.length; + oldctxt->node_seq.buffer = ctxt->node_seq.buffer; + } ctxt->node_seq.maximum = 0; ctxt->node_seq.length = 0; ctxt->node_seq.buffer = NULL; @@ -11471,7 +13457,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, newDoc->intSubset = NULL; newDoc->extSubset = NULL; xmlFreeDoc(newDoc); - + return(ret); } @@ -11548,9 +13534,9 @@ xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, * * Returns XML_ERR_OK if the chunk is well balanced, and the parser * error code otherwise - * + * * In case recover is set to 1, the nodelist will not be empty even if - * the parsed chunk is not well balanced. + * the parsed chunk is not well balanced. */ static xmlParserErrors xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, @@ -11563,8 +13549,12 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlNodePtr last = NULL; int size; xmlParserErrors ret = XML_ERR_OK; +#ifdef SAX2 + int i; +#endif - if (oldctxt->depth > 40) { + if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || + (oldctxt->depth > 1024)) { return(XML_ERR_ENTITY_LOOP); } @@ -11588,12 +13578,19 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); +#ifdef SAX2 + /* propagate namespaces down the entity */ + for (i = 0;i < oldctxt->nsNr;i += 2) { + nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); + } +#endif + oldsax = ctxt->sax; ctxt->sax = oldctxt->sax; xmlDetectSAX2(ctxt); ctxt->replaceEntities = oldctxt->replaceEntities; ctxt->options = oldctxt->options; - + ctxt->_private = oldctxt->_private; if (oldctxt->myDoc == NULL) { newDoc = xmlNewDoc(BAD_CAST "1.0"); @@ -11603,6 +13600,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlFreeParserCtxt(ctxt); return(XML_ERR_INTERNAL_ERROR); } + newDoc->properties = XML_DOC_INTERNAL; newDoc->dict = ctxt->dict; xmlDictReference(newDoc->dict); ctxt->myDoc = newDoc; @@ -11658,7 +13656,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, } else { ret = XML_ERR_OK; } - + if ((lst != NULL) && (ret == XML_ERR_OK)) { xmlNodePtr cur; @@ -11687,7 +13685,20 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, ctxt->myDoc->children = content; ctxt->myDoc->last = last; } - + + /* + * Record in the parent context the number of entities replacement + * done when parsing that reference. + */ + if (oldctxt != NULL) + oldctxt->nbentities += ctxt->nbentities; + + /* + * Also record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) + xmlCopyError(&ctxt->lastError, &oldctxt->lastError); + ctxt->sax = oldsax; ctxt->dict = NULL; ctxt->attsDefault = NULL; @@ -11696,7 +13707,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, if (newDoc != NULL) { xmlFreeDoc(newDoc); } - + return(ret); } @@ -11770,22 +13781,22 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, if (doc->type == XML_DOCUMENT_NODE) ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); #ifdef LIBXML_HTML_ENABLED - else if (doc->type == XML_HTML_DOCUMENT_NODE) + else if (doc->type == XML_HTML_DOCUMENT_NODE) { ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); + /* + * When parsing in context, it makes no sense to add implied + * elements like html/body/etc... + */ + options |= HTML_PARSE_NOIMPLIED; + } #endif else return(XML_ERR_INTERNAL_ERROR); if (ctxt == NULL) return(XML_ERR_NO_MEMORY); - fake = xmlNewComment(NULL); - if (fake == NULL) { - xmlFreeParserCtxt(ctxt); - return(XML_ERR_NO_MEMORY); - } - xmlAddChild(node, fake); - /* + /* * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. * We need a dictionary for xmlDetectSAX2, so if there's no doc dict * we must wait until the last moment to free the original one. @@ -11797,9 +13808,33 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, } else options |= XML_PARSE_NODICT; - xmlCtxtUseOptions(ctxt, options); + if (doc->encoding != NULL) { + xmlCharEncodingHandlerPtr hdlr; + + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); + + hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); + if (hdlr != NULL) { + xmlSwitchToEncoding(ctxt, hdlr); + } else { + return(XML_ERR_UNSUPPORTED_ENCODING); + } + } + + xmlCtxtUseOptionsInternal(ctxt, options, NULL); xmlDetectSAX2(ctxt); ctxt->myDoc = doc; + /* parsing in context, i.e. as within existing content */ + ctxt->instate = XML_PARSER_CONTENT; + + fake = xmlNewComment(NULL); + if (fake == NULL) { + xmlFreeParserCtxt(ctxt); + return(XML_ERR_NO_MEMORY); + } + xmlAddChild(node, fake); if (node->type == XML_ELEMENT_NODE) { nodePush(ctxt, node); @@ -11828,8 +13863,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, } cur = cur->parent; } - ctxt->instate = XML_PARSER_CONTENT; - } + } if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { /* @@ -11864,12 +13898,12 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, } else { ret = XML_ERR_OK; } - + /* * Return the newly created nodeset after unlinking it from * the pseudo sibling. */ - + cur = fake->next; fake->next = NULL; node->last = fake; @@ -11897,7 +13931,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, if (doc->dict != NULL) ctxt->dict = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); #else /* !SAX2 */ return(XML_ERR_INTERNAL_ERROR); @@ -11925,13 +13959,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, * * Returns 0 if the chunk is well balanced, -1 in case of args problem and * the parser error code otherwise - * + * * In case recover is set to 1, the nodelist will not be empty even if - * the parsed chunk is not well balanced. + * the parsed chunk is not well balanced, assuming the parsing succeeded to + * some extent. */ int xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, - void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, + void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, int recover) { xmlParserCtxtPtr ctxt; xmlDocPtr newDoc; @@ -11966,6 +14001,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, xmlFreeParserCtxt(ctxt); return(-1); } + newDoc->properties = XML_DOC_INTERNAL; if ((doc != NULL) && (doc->dict != NULL)) { xmlDictFree(ctxt->dict); ctxt->dict = doc->dict; @@ -11975,7 +14011,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); ctxt->dictNames = 1; } else { - xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); + xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); } if (doc != NULL) { newDoc->intSubset = doc->intSubset; @@ -12038,7 +14074,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, } else { ret = 0; } - + if ((lst != NULL) && ((ret == 0) || (recover == 1))) { xmlNodePtr cur; @@ -12055,15 +14091,15 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, } newDoc->children->children = NULL; } - - if (sax != NULL) + + if (sax != NULL) ctxt->sax = oldsax; xmlFreeParserCtxt(ctxt); newDoc->intSubset = NULL; newDoc->extSubset = NULL; newDoc->oldNs = NULL; xmlFreeDoc(newDoc); - + return(ret); } @@ -12111,7 +14147,7 @@ xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12135,10 +14171,11 @@ xmlParseEntity(const char *filename) { #endif /* LIBXML_SAX1_ENABLED */ /** - * xmlCreateEntityParserCtxt: + * xmlCreateEntityParserCtxtInternal: * @URL: the entity URL * @ID: the entity PUBLIC ID * @base: a possible base for the target URI + * @pctx: parser context used to set options on new context * * Create a parser context for an external entity * Automatic support for ZLIB/Compress compressed document is provided @@ -12146,19 +14183,24 @@ xmlParseEntity(const char *filename) { * * Returns the new parser context or NULL */ -xmlParserCtxtPtr -xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, - const xmlChar *base) { +static xmlParserCtxtPtr +xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, + const xmlChar *base, xmlParserCtxtPtr pctx) { xmlParserCtxtPtr ctxt; xmlParserInputPtr inputStream; char *directory = NULL; xmlChar *uri; - + ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { return(NULL); } + if (pctx != NULL) { + ctxt->options = pctx->options; + ctxt->_private = pctx->_private; + } + uri = xmlBuildURI(URL, base); if (uri == NULL) { @@ -12193,9 +14235,28 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, return(ctxt); } +/** + * xmlCreateEntityParserCtxt: + * @URL: the entity URL + * @ID: the entity PUBLIC ID + * @base: a possible base for the target URI + * + * Create a parser context for an external entity + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, + const xmlChar *base) { + return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); + +} + /************************************************************************ * * - * Front ends when parsing from a file * + * Front ends when parsing from a file * * * ************************************************************************/ @@ -12204,7 +14265,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, * @filename: the filename or URL * @options: a combination of xmlParserOption * - * Create a parser context for a file or URL content. + * Create a parser context for a file or URL content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time and for file accesses * @@ -12224,9 +14285,9 @@ xmlCreateURLParserCtxt(const char *filename, int options) } if (options) - xmlCtxtUseOptions(ctxt, options); + xmlCtxtUseOptionsInternal(ctxt, options, NULL); ctxt->linenumbers = 1; - + inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); if (inputStream == NULL) { xmlFreeParserCtxt(ctxt); @@ -12246,7 +14307,7 @@ xmlCreateURLParserCtxt(const char *filename, int options) * xmlCreateFileParserCtxt: * @filename: the filename * - * Create a parser context for a file content. + * Create a parser context for a file content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. * @@ -12283,7 +14344,6 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, int recovery, void *data) { xmlDocPtr ret; xmlParserCtxtPtr ctxt; - char *directory = NULL; xmlInitParser(); @@ -12301,10 +14361,8 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, ctxt->_private = data; } - if ((ctxt->directory == NULL) && (directory == NULL)) - directory = xmlParserGetDirectory(filename); - if ((ctxt->directory == NULL) && (directory != NULL)) - ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); + if (ctxt->directory == NULL) + ctxt->directory = xmlParserGetDirectory(filename); ctxt->recovery = recovery; @@ -12327,7 +14385,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12357,13 +14415,14 @@ xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, * @cur: a pointer to an array of xmlChar * * parse an XML in-memory document and build a tree. - * In the case the document is not Well Formed, a tree is built anyway - * - * Returns the resulting document tree + * In the case the document is not Well Formed, a attempt to build a + * tree is tried anyway + * + * Returns the resulting document tree or NULL in case of failure */ xmlDocPtr -xmlRecoverDoc(xmlChar *cur) { +xmlRecoverDoc(const xmlChar *cur) { return(xmlSAXParseDoc(NULL, cur, 1)); } @@ -12389,9 +14448,10 @@ xmlParseFile(const char *filename) { * * parse an XML file and build a tree. Automatic support for ZLIB/Compress * compressed document is provided by default if found at compile-time. - * In the case the document is not Well Formed, a tree is built anyway + * In the case the document is not Well Formed, it attempts to build + * a tree anyway * - * Returns the resulting document tree + * Returns the resulting document tree or NULL in case of failure */ xmlDocPtr @@ -12425,7 +14485,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, xmlClearParserCtxt(ctxt); return; } - + xmlClearParserCtxt(ctxt); if (filename != NULL) input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); @@ -12443,7 +14503,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, * * parse an XML file and call the given SAX handler routines. * Automatic support for ZLIB/Compress compressed document is provided - * + * * Returns 0 in case of success or a error number otherwise */ int @@ -12451,21 +14511,19 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename) { int ret = 0; xmlParserCtxtPtr ctxt; - + ctxt = xmlCreateFileParserCtxt(filename); if (ctxt == NULL) return -1; -#ifdef LIBXML_SAX1_ENABLED if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) -#endif /* LIBXML_SAX1_ENABLED */ xmlFree(ctxt->sax); ctxt->sax = sax; xmlDetectSAX2(ctxt); if (user_data != NULL) ctxt->userData = user_data; - + xmlParseDocument(ctxt); - + if (ctxt->wellFormed) ret = 0; else { @@ -12481,14 +14539,14 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, ctxt->myDoc = NULL; } xmlFreeParserCtxt(ctxt); - + return ret; } #endif /* LIBXML_SAX1_ENABLED */ /************************************************************************ * * - * Front ends when parsing from memory * + * Front ends when parsing from memory * * * ************************************************************************/ @@ -12532,9 +14590,7 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size) { input->filename = NULL; input->buf = buf; - input->base = input->buf->buffer->content; - input->cur = input->buf->buffer->content; - input->end = &input->buf->buffer->content[input->buf->buffer->use]; + xmlBufResetInput(input->buf->buffer, input); inputPush(ctxt, input); return(ctxt); @@ -12566,6 +14622,8 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, xmlDocPtr ret; xmlParserCtxtPtr ctxt; + xmlInitParser(); + ctxt = xmlCreateMemoryParserCtxt(buffer, size); if (ctxt == NULL) return(NULL); if (sax != NULL) { @@ -12588,10 +14646,10 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, xmlFreeDoc(ctxt->myDoc); ctxt->myDoc = NULL; } - if (sax != NULL) + if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12606,7 +14664,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, * parse an XML in-memory block and use the given SAX function block * to handle the parsing callback. If sax is NULL, fallback to the default * DOM tree building routines. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -12621,7 +14679,7 @@ xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, * @size: the size of the array * * parse an XML in-memory block and build a tree. - * + * * Returns the resulting document tree */ @@ -12635,9 +14693,10 @@ xmlDocPtr xmlParseMemory(const char *buffer, int size) { * @size: the size of the array * * parse an XML in-memory block and build a tree. - * In the case the document is not Well Formed, a tree is built anyway - * - * Returns the resulting document tree + * In the case the document is not Well Formed, an attempt to + * build a tree is tried anyway + * + * Returns the resulting document tree or NULL in case of error */ xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { @@ -12653,26 +14712,28 @@ xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { * * A better SAX parsing routine. * parse an XML in-memory buffer and call the given SAX handler routines. - * + * * Returns 0 in case of success or a error number otherwise */ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, const char *buffer, int size) { int ret = 0; xmlParserCtxtPtr ctxt; - xmlSAXHandlerPtr oldsax = NULL; - - if (sax == NULL) return -1; + + xmlInitParser(); + ctxt = xmlCreateMemoryParserCtxt(buffer, size); if (ctxt == NULL) return -1; - oldsax = ctxt->sax; + if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) + xmlFree(ctxt->sax); ctxt->sax = sax; xmlDetectSAX2(ctxt); + if (user_data != NULL) ctxt->userData = user_data; - + xmlParseDocument(ctxt); - + if (ctxt->wellFormed) ret = 0; else { @@ -12681,13 +14742,14 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, else ret = -1; } - ctxt->sax = oldsax; + if (sax != NULL) + ctxt->sax = NULL; if (ctxt->myDoc != NULL) { xmlFreeDoc(ctxt->myDoc); ctxt->myDoc = NULL; } xmlFreeParserCtxt(ctxt); - + return ret; } #endif /* LIBXML_SAX1_ENABLED */ @@ -12721,7 +14783,7 @@ xmlCreateDocParserCtxt(const xmlChar *cur) { * parse an XML in-memory document and build a tree. * It use the given SAX function block to handle the parsing callback. * If sax is NULL, fallback to the default DOM tree building routines. - * + * * Returns the resulting document tree */ @@ -12736,7 +14798,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { ctxt = xmlCreateDocParserCtxt(cur); if (ctxt == NULL) return(NULL); - if (sax != NULL) { + if (sax != NULL) { oldsax = ctxt->sax; ctxt->sax = sax; ctxt->userData = NULL; @@ -12753,7 +14815,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { if (sax != NULL) ctxt->sax = oldsax; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12762,7 +14824,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { * @cur: a pointer to an array of xmlChar * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ @@ -12775,8 +14837,8 @@ xmlParseDoc(const xmlChar *cur) { #ifdef LIBXML_LEGACY_ENABLED /************************************************************************ * * - * Specific function to keep track of entities references * - * and used by the XSLT debugger * + * Specific function to keep track of entities references * + * and used by the XSLT debugger * * * ************************************************************************/ @@ -12786,7 +14848,7 @@ static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; * xmlAddEntityReference: * @ent : A valid entity * @firstNode : A valid first node for children of entity - * @lastNode : A valid last node of children entity + * @lastNode : A valid last node of children entity * * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY */ @@ -12815,7 +14877,7 @@ xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) /************************************************************************ * * - * Miscellaneous * + * Miscellaneous * * * ************************************************************************/ @@ -12839,37 +14901,57 @@ xmlInitParser(void) { if (xmlParserInitialized != 0) return; - if ((xmlGenericError == xmlGenericErrorDefaultFunc) || - (xmlGenericError == NULL)) - initGenericErrorDefaultFunc(NULL); - xmlInitGlobals(); - xmlInitThreads(); - xmlInitMemory(); - xmlInitCharEncodingHandlers(); - xmlDefaultSAXHandlerInit(); - xmlRegisterDefaultInputCallbacks(); +#ifdef LIBXML_THREAD_ENABLED + __xmlGlobalInitMutexLock(); + if (xmlParserInitialized == 0) { +#endif + xmlInitThreads(); + xmlInitGlobals(); + if ((xmlGenericError == xmlGenericErrorDefaultFunc) || + (xmlGenericError == NULL)) + initGenericErrorDefaultFunc(NULL); + xmlInitMemory(); + xmlInitializeDict(); + xmlInitCharEncodingHandlers(); + xmlDefaultSAXHandlerInit(); + xmlRegisterDefaultInputCallbacks(); #ifdef LIBXML_OUTPUT_ENABLED - xmlRegisterDefaultOutputCallbacks(); + xmlRegisterDefaultOutputCallbacks(); #endif /* LIBXML_OUTPUT_ENABLED */ #ifdef LIBXML_HTML_ENABLED - htmlInitAutoClose(); - htmlDefaultSAXHandlerInit(); + htmlInitAutoClose(); + htmlDefaultSAXHandlerInit(); #endif #ifdef LIBXML_XPATH_ENABLED - xmlXPathInit(); + xmlXPathInit(); +#endif + xmlParserInitialized = 1; +#ifdef LIBXML_THREAD_ENABLED + } + __xmlGlobalInitMutexUnlock(); #endif - xmlParserInitialized = 1; } /** * xmlCleanupParser: * - * Cleanup function for the XML library. It tries to reclaim all - * parsing related global memory allocated for the library processing. - * It doesn't deallocate any document related memory. Calling this - * function should not prevent reusing the library but one should - * call xmlCleanupParser() only when the process has - * finished using the library or XML document built with it. + * This function name is somewhat misleading. It does not clean up + * parser state, it cleans up memory allocated by the library itself. + * It is a cleanup function for the XML library. It tries to reclaim all + * related global memory allocated for the library processing. + * It doesn't deallocate any document related memory. One should + * call xmlCleanupParser() only when the process has finished using + * the library and all XML/HTML documents built with it. + * See also xmlInitParser() which has the opposite function of preparing + * the library for operations. + * + * WARNING: if your application is multithreaded or has plugin support + * calling this may crash the application if another thread or + * a plugin is still using libxml2. It's sometimes very hard to + * guess if libxml2 is in use in the application, some libraries + * or plugins may use it without notice. In case of doubt abstain + * from calling this function or do it just before calling exit() + * to avoid leak reports from valgrind ! */ void @@ -12890,8 +14972,8 @@ xmlCleanupParser(void) { xmlSchemaCleanupTypes(); xmlRelaxNGCleanupTypes(); #endif - xmlCleanupGlobals(); xmlResetLastError(); + xmlCleanupGlobals(); xmlCleanupThreads(); /* must be last if called not from the main thread */ xmlCleanupMemory(); xmlParserInitialized = 0; @@ -12911,7 +14993,7 @@ xmlCleanupParser(void) { * current scope */ #define DICT_FREE(str) \ - if ((str) && ((!dict) || \ + if ((str) && ((!dict) || \ (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ xmlFree((char *)(str)); @@ -12926,7 +15008,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; xmlDictPtr dict; - + if (ctxt == NULL) return; @@ -12939,8 +15021,12 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->input = NULL; ctxt->spaceNr = 0; - ctxt->spaceTab[0] = -1; - ctxt->space = &ctxt->spaceTab[0]; + if (ctxt->spaceTab != NULL) { + ctxt->spaceTab[0] = -1; + ctxt->space = &ctxt->spaceTab[0]; + } else { + ctxt->space = NULL; + } ctxt->nodeNr = 0; @@ -12988,6 +15074,9 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; + ctxt->nbentities = 0; + ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) { @@ -13072,25 +15161,18 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) filename); inputStream->buf = buf; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = - &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; + xmlBufResetInput(buf->buffer, inputStream); inputPush(ctxt, inputStream); if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> - use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -13099,6 +15181,10 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, if (encoding != NULL) { xmlCharEncodingHandlerPtr hdlr; + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = xmlStrdup((const xmlChar *) encoding); + hdlr = xmlFindCharEncodingHandler(encoding); if (hdlr != NULL) { xmlSwitchToEncoding(ctxt, hdlr); @@ -13113,50 +15199,63 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, return(0); } + /** - * xmlCtxtUseOptions: + * xmlCtxtUseOptionsInternal: * @ctxt: an XML parser context * @options: a combination of xmlParserOption + * @encoding: the user provided encoding to use * * Applies the options to the parser context * * Returns 0 in case of success, the set of unknown or unimplemented options * in case of error. */ -int -xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) +static int +xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) { if (ctxt == NULL) return(-1); + if (encoding != NULL) { + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = xmlStrdup((const xmlChar *) encoding); + } if (options & XML_PARSE_RECOVER) { ctxt->recovery = 1; options -= XML_PARSE_RECOVER; + ctxt->options |= XML_PARSE_RECOVER; } else ctxt->recovery = 0; if (options & XML_PARSE_DTDLOAD) { ctxt->loadsubset = XML_DETECT_IDS; options -= XML_PARSE_DTDLOAD; + ctxt->options |= XML_PARSE_DTDLOAD; } else ctxt->loadsubset = 0; if (options & XML_PARSE_DTDATTR) { ctxt->loadsubset |= XML_COMPLETE_ATTRS; options -= XML_PARSE_DTDATTR; + ctxt->options |= XML_PARSE_DTDATTR; } if (options & XML_PARSE_NOENT) { ctxt->replaceEntities = 1; /* ctxt->loadsubset |= XML_DETECT_IDS; */ options -= XML_PARSE_NOENT; + ctxt->options |= XML_PARSE_NOENT; } else ctxt->replaceEntities = 0; if (options & XML_PARSE_PEDANTIC) { ctxt->pedantic = 1; options -= XML_PARSE_PEDANTIC; + ctxt->options |= XML_PARSE_PEDANTIC; } else ctxt->pedantic = 0; if (options & XML_PARSE_NOBLANKS) { ctxt->keepBlanks = 0; ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; options -= XML_PARSE_NOBLANKS; + ctxt->options |= XML_PARSE_NOBLANKS; } else ctxt->keepBlanks = 1; if (options & XML_PARSE_DTDVALID) { @@ -13166,6 +15265,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) if (options & XML_PARSE_NOERROR) ctxt->vctxt.error = NULL; options -= XML_PARSE_DTDVALID; + ctxt->options |= XML_PARSE_DTDVALID; } else ctxt->validate = 0; if (options & XML_PARSE_NOWARNING) { @@ -13185,17 +15285,20 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) ctxt->sax->endElementNs = NULL; ctxt->sax->initialized = 1; options -= XML_PARSE_SAX1; + ctxt->options |= XML_PARSE_SAX1; } #endif /* LIBXML_SAX1_ENABLED */ if (options & XML_PARSE_NODICT) { ctxt->dictNames = 0; options -= XML_PARSE_NODICT; + ctxt->options |= XML_PARSE_NODICT; } else { ctxt->dictNames = 1; } if (options & XML_PARSE_NOCDATA) { ctxt->sax->cdataBlock = NULL; options -= XML_PARSE_NOCDATA; + ctxt->options |= XML_PARSE_NOCDATA; } if (options & XML_PARSE_NSCLEAN) { ctxt->options |= XML_PARSE_NSCLEAN; @@ -13209,11 +15312,53 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) ctxt->options |= XML_PARSE_COMPACT; options -= XML_PARSE_COMPACT; } + if (options & XML_PARSE_OLD10) { + ctxt->options |= XML_PARSE_OLD10; + options -= XML_PARSE_OLD10; + } + if (options & XML_PARSE_NOBASEFIX) { + ctxt->options |= XML_PARSE_NOBASEFIX; + options -= XML_PARSE_NOBASEFIX; + } + if (options & XML_PARSE_HUGE) { + ctxt->options |= XML_PARSE_HUGE; + options -= XML_PARSE_HUGE; + if (ctxt->dict != NULL) + xmlDictSetLimit(ctxt->dict, 0); + } + if (options & XML_PARSE_OLDSAX) { + ctxt->options |= XML_PARSE_OLDSAX; + options -= XML_PARSE_OLDSAX; + } + if (options & XML_PARSE_IGNORE_ENC) { + ctxt->options |= XML_PARSE_IGNORE_ENC; + options -= XML_PARSE_IGNORE_ENC; + } + if (options & XML_PARSE_BIG_LINES) { + ctxt->options |= XML_PARSE_BIG_LINES; + options -= XML_PARSE_BIG_LINES; + } ctxt->linenumbers = 1; return (options); } /** + * xmlCtxtUseOptions: + * @ctxt: an XML parser context + * @options: a combination of xmlParserOption + * + * Applies the options to the parser context + * + * Returns 0 in case of success, the set of unknown or unimplemented options + * in case of error. + */ +int +xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) +{ + return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); +} + +/** * xmlDoRead: * @ctxt: an XML parser context * @URL: the base URL to use for the document @@ -13222,7 +15367,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) * @reuse: keep the context for reuse * * Common front-end for the xmlRead functions - * + * * Returns the resulting document tree or NULL */ static xmlDocPtr @@ -13230,8 +15375,8 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, int options, int reuse) { xmlDocPtr ret; - - xmlCtxtUseOptions(ctxt, options); + + xmlCtxtUseOptionsInternal(ctxt, options, encoding); if (encoding != NULL) { xmlCharEncodingHandlerPtr hdlr; @@ -13267,7 +15412,7 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, * @options: a combination of xmlParserOption * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13277,6 +15422,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio if (cur == NULL) return (NULL); + xmlInitParser(); ctxt = xmlCreateDocParserCtxt(cur); if (ctxt == NULL) @@ -13291,7 +15437,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio * @options: a combination of xmlParserOption * * parse an XML file from the filesystem or the network. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13299,6 +15445,7 @@ xmlReadFile(const char *filename, const char *encoding, int options) { xmlParserCtxtPtr ctxt; + xmlInitParser(); ctxt = xmlCreateURLParserCtxt(filename, options); if (ctxt == NULL) return (NULL); @@ -13314,7 +15461,7 @@ xmlReadFile(const char *filename, const char *encoding, int options) * @options: a combination of xmlParserOption * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13322,6 +15469,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin { xmlParserCtxtPtr ctxt; + xmlInitParser(); ctxt = xmlCreateMemoryParserCtxt(buffer, size); if (ctxt == NULL) return (NULL); @@ -13338,7 +15486,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin * parse an XML from a file descriptor and build a tree. * NOTE that the file descriptor will not be closed when the * reader is closed or reset. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13350,6 +15498,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options) if (fd < 0) return (NULL); + xmlInitParser(); input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); if (input == NULL) @@ -13380,7 +15529,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options) * @options: a combination of xmlParserOption * * parse an XML document from I/O functions and source and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13393,11 +15542,15 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, if (ioread == NULL) return (NULL); + xmlInitParser(); input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, XML_CHAR_ENCODING_NONE); - if (input == NULL) + if (input == NULL) { + if (ioclose != NULL) + ioclose(ioctx); return (NULL); + } ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { xmlFreeParserInputBuffer(input); @@ -13423,7 +15576,7 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13436,6 +15589,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, return (NULL); if (ctxt == NULL) return (NULL); + xmlInitParser(); xmlCtxtReset(ctxt); @@ -13456,7 +15610,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, * * parse an XML file from the filesystem or the network. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13469,6 +15623,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, return (NULL); if (ctxt == NULL) return (NULL); + xmlInitParser(); xmlCtxtReset(ctxt); @@ -13491,7 +15646,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13505,6 +15660,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, return (NULL); if (buffer == NULL) return (NULL); + xmlInitParser(); xmlCtxtReset(ctxt); @@ -13535,7 +15691,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, * This reuses the existing @ctxt parser context * NOTE that the file descriptor will not be closed when the * reader is closed or reset. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13549,6 +15705,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, return (NULL); if (ctxt == NULL) return (NULL); + xmlInitParser(); xmlCtxtReset(ctxt); @@ -13578,7 +15735,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, * * parse an XML document from I/O functions and source and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13594,13 +15751,17 @@ xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, return (NULL); if (ctxt == NULL) return (NULL); + xmlInitParser(); xmlCtxtReset(ctxt); input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, XML_CHAR_ENCODING_NONE); - if (input == NULL) + if (input == NULL) { + if (ioclose != NULL) + ioclose(ioctx); return (NULL); + } stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input); |