summaryrefslogtreecommitdiffstats
path: root/gnulib-local/lib/libxml/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'gnulib-local/lib/libxml/parser.c')
-rw-r--r--gnulib-local/lib/libxml/parser.c5533
1 files changed, 3847 insertions, 1686 deletions
diff --git a/gnulib-local/lib/libxml/parser.c b/gnulib-local/lib/libxml/parser.c
index bd44585..c5741e3 100644
--- a/gnulib-local/lib/libxml/parser.c
+++ b/gnulib-local/lib/libxml/parser.c
@@ -17,7 +17,7 @@
* parserInternals.c to reduce this file size.
* As much as possible the functions are associated with their relative
* production in the XML specification. A few productions defining the
- * different ranges of character are actually implanted either in
+ * different ranges of character are actually implanted either in
* parserInternals.h or parserInternals.c
* The DOM tree build is realized from the default SAX callbacks in
* the module SAX.c.
@@ -40,6 +40,7 @@
#endif
#include <stdlib.h>
+#include <limits.h>
#include <string.h>
#include <stdarg.h>
#include <libxml/xmlmemory.h>
@@ -79,36 +80,193 @@
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
+#ifdef HAVE_LZMA_H
+#include <lzma.h>
+#endif
+
+#include "buf.h"
+#include "enc.h"
+
+static void
+xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
+
+static xmlParserCtxtPtr
+xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
+ const xmlChar *base, xmlParserCtxtPtr pctx);
+
+static void xmlHaltParser(xmlParserCtxtPtr ctxt);
+
+/************************************************************************
+ * *
+ * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
+ * *
+ ************************************************************************/
+
+#define XML_PARSER_BIG_ENTITY 1000
+#define XML_PARSER_LOT_ENTITY 5000
+
+/*
+ * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
+ * replacement over the size in byte of the input indicates that you have
+ * and eponential behaviour. A value of 10 correspond to at least 3 entity
+ * replacement per byte of input.
+ */
+#define XML_PARSER_NON_LINEAR 10
+
+/*
+ * xmlParserEntityCheck
+ *
+ * Function to check non-linear entity expansion behaviour
+ * This is here to detect and stop exponential linear entity expansion
+ * This is not a limitation of the parser but a safety
+ * boundary feature. It can be disabled with the XML_PARSE_HUGE
+ * parser option.
+ */
+static int
+xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
+ xmlEntityPtr ent, size_t replacement)
+{
+ size_t consumed = 0;
+
+ if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
+ return (0);
+ if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
+ return (1);
+
+ /*
+ * This may look absurd but is needed to detect
+ * entities problems
+ */
+ if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
+ (ent->content != NULL) && (ent->checked == 0)) {
+ unsigned long oldnbent = ctxt->nbentities;
+ xmlChar *rep;
+
+ ent->checked = 1;
+
+ rep = xmlStringDecodeEntities(ctxt, ent->content,
+ XML_SUBSTITUTE_REF, 0, 0, 0);
+
+ ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
+ if (rep != NULL) {
+ if (xmlStrchr(rep, '<'))
+ ent->checked |= 1;
+ xmlFree(rep);
+ rep = NULL;
+ }
+ }
+ if (replacement != 0) {
+ if (replacement < XML_MAX_TEXT_LENGTH)
+ return(0);
+
+ /*
+ * If the volume of entity copy reaches 10 times the
+ * amount of parsed data and over the large text threshold
+ * then that's very likely to be an abuse.
+ */
+ if (ctxt->input != NULL) {
+ consumed = ctxt->input->consumed +
+ (ctxt->input->cur - ctxt->input->base);
+ }
+ consumed += ctxt->sizeentities;
+
+ if (replacement < XML_PARSER_NON_LINEAR * consumed)
+ return(0);
+ } else if (size != 0) {
+ /*
+ * Do the check based on the replacement size of the entity
+ */
+ if (size < XML_PARSER_BIG_ENTITY)
+ return(0);
+
+ /*
+ * A limit on the amount of text data reasonably used
+ */
+ if (ctxt->input != NULL) {
+ consumed = ctxt->input->consumed +
+ (ctxt->input->cur - ctxt->input->base);
+ }
+ consumed += ctxt->sizeentities;
+
+ if ((size < XML_PARSER_NON_LINEAR * consumed) &&
+ (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
+ return (0);
+ } else if (ent != NULL) {
+ /*
+ * use the number of parsed entities in the replacement
+ */
+ size = ent->checked / 2;
+
+ /*
+ * The amount of data parsed counting entities size only once
+ */
+ if (ctxt->input != NULL) {
+ consumed = ctxt->input->consumed +
+ (ctxt->input->cur - ctxt->input->base);
+ }
+ consumed += ctxt->sizeentities;
+
+ /*
+ * Check the density of entities for the amount of data
+ * knowing an entity reference will take at least 3 bytes
+ */
+ if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
+ return (0);
+ } else {
+ /*
+ * strange we got no data for checking
+ */
+ if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
+ (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
+ (ctxt->nbentities <= 10000))
+ return (0);
+ }
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
+ return (1);
+}
/**
* xmlParserMaxDepth:
*
- * arbitrary depth limit for the XML documents that we allow to
- * process. This is not a limitation of the parser but a safety
- * boundary feature.
+ * arbitrary depth limit for the XML documents that we allow to
+ * process. This is not a limitation of the parser but a safety
+ * boundary feature. It can be disabled with the XML_PARSE_HUGE
+ * parser option.
*/
-unsigned int xmlParserMaxDepth = 1024;
+unsigned int xmlParserMaxDepth = 256;
-#define SAX2 1
+
+#define SAX2 1
#define XML_PARSER_BIG_BUFFER_SIZE 300
#define XML_PARSER_BUFFER_SIZE 100
-
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
+/**
+ * XML_PARSER_CHUNK_SIZE
+ *
+ * When calling GROW that's the minimal amount of data
+ * the parser expected to have received. It is not a hard
+ * limit but an optimization when reading strings like Names
+ * It is not strictly needed as long as inputs available characters
+ * are followed by 0, which should be provided by the I/O level
+ */
+#define XML_PARSER_CHUNK_SIZE 100
+
/*
* List of XML prefixed PI allowed by W3C specs
*/
static const char *xmlW3CPIs[] = {
"xml-stylesheet",
+ "xml-model",
NULL
};
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
-xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
- const xmlChar **str);
+static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
+ const xmlChar **str);
static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
@@ -116,6 +274,9 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
void *user_data, int depth, const xmlChar *URL,
const xmlChar *ID, xmlNodePtr *list);
+static int
+xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
+ const char *encoding);
#ifdef LIBXML_LEGACY_ENABLED
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
@@ -126,9 +287,12 @@ static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
const xmlChar *string, void *user_data, xmlNodePtr *lst);
+static int
+xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
+
/************************************************************************
* *
- * Some factorized error routines *
+ * Some factorized error routines *
* *
************************************************************************/
@@ -149,14 +313,15 @@ xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
return;
if (ctxt != NULL)
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
+
if (prefix == NULL)
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
- ctxt->errNo, XML_ERR_FATAL, NULL, 0,
+ XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
(const char *) localname, NULL, NULL, 0, 0,
"Attribute %s redefined\n", localname);
else
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
- ctxt->errNo, XML_ERR_FATAL, NULL, 0,
+ XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
(const char *) prefix, (const char *) localname,
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
localname);
@@ -179,193 +344,201 @@ static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
{
const char *errmsg;
+ char errstr[129] = "";
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
switch (error) {
case XML_ERR_INVALID_HEX_CHARREF:
- errmsg = "CharRef: invalid hexadecimal value\n";
+ errmsg = "CharRef: invalid hexadecimal value";
break;
case XML_ERR_INVALID_DEC_CHARREF:
- errmsg = "CharRef: invalid decimal value\n";
+ errmsg = "CharRef: invalid decimal value";
break;
case XML_ERR_INVALID_CHARREF:
- errmsg = "CharRef: invalid value\n";
+ errmsg = "CharRef: invalid value";
break;
case XML_ERR_INTERNAL_ERROR:
errmsg = "internal error";
break;
case XML_ERR_PEREF_AT_EOF:
- errmsg = "PEReference at end of document\n";
+ errmsg = "PEReference at end of document";
break;
case XML_ERR_PEREF_IN_PROLOG:
- errmsg = "PEReference in prolog\n";
+ errmsg = "PEReference in prolog";
break;
case XML_ERR_PEREF_IN_EPILOG:
- errmsg = "PEReference in epilog\n";
+ errmsg = "PEReference in epilog";
break;
case XML_ERR_PEREF_NO_NAME:
- errmsg = "PEReference: no name\n";
+ errmsg = "PEReference: no name";
break;
case XML_ERR_PEREF_SEMICOL_MISSING:
- errmsg = "PEReference: expecting ';'\n";
+ errmsg = "PEReference: expecting ';'";
break;
case XML_ERR_ENTITY_LOOP:
- errmsg = "Detected an entity reference loop\n";
+ errmsg = "Detected an entity reference loop";
break;
case XML_ERR_ENTITY_NOT_STARTED:
- errmsg = "EntityValue: \" or ' expected\n";
+ errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ENTITY_PE_INTERNAL:
- errmsg = "PEReferences forbidden in internal subset\n";
+ errmsg = "PEReferences forbidden in internal subset";
break;
case XML_ERR_ENTITY_NOT_FINISHED:
- errmsg = "EntityValue: \" or ' expected\n";
+ errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ATTRIBUTE_NOT_STARTED:
- errmsg = "AttValue: \" or ' expected\n";
+ errmsg = "AttValue: \" or ' expected";
break;
case XML_ERR_LT_IN_ATTRIBUTE:
- errmsg = "Unescaped '<' not allowed in attributes values\n";
+ errmsg = "Unescaped '<' not allowed in attributes values";
break;
case XML_ERR_LITERAL_NOT_STARTED:
- errmsg = "SystemLiteral \" or ' expected\n";
+ errmsg = "SystemLiteral \" or ' expected";
break;
case XML_ERR_LITERAL_NOT_FINISHED:
- errmsg = "Unfinished System or Public ID \" or ' expected\n";
+ errmsg = "Unfinished System or Public ID \" or ' expected";
break;
case XML_ERR_MISPLACED_CDATA_END:
- errmsg = "Sequence ']]>' not allowed in content\n";
+ errmsg = "Sequence ']]>' not allowed in content";
break;
case XML_ERR_URI_REQUIRED:
- errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
+ errmsg = "SYSTEM or PUBLIC, the URI is missing";
break;
case XML_ERR_PUBID_REQUIRED:
- errmsg = "PUBLIC, the Public Identifier is missing\n";
+ errmsg = "PUBLIC, the Public Identifier is missing";
break;
case XML_ERR_HYPHEN_IN_COMMENT:
- errmsg = "Comment must not contain '--' (double-hyphen)\n";
+ errmsg = "Comment must not contain '--' (double-hyphen)";
break;
case XML_ERR_PI_NOT_STARTED:
- errmsg = "xmlParsePI : no target name\n";
+ errmsg = "xmlParsePI : no target name";
break;
case XML_ERR_RESERVED_XML_NAME:
- errmsg = "Invalid PI name\n";
+ errmsg = "Invalid PI name";
break;
case XML_ERR_NOTATION_NOT_STARTED:
- errmsg = "NOTATION: Name expected here\n";
+ errmsg = "NOTATION: Name expected here";
break;
case XML_ERR_NOTATION_NOT_FINISHED:
- errmsg = "'>' required to close NOTATION declaration\n";
+ errmsg = "'>' required to close NOTATION declaration";
break;
case XML_ERR_VALUE_REQUIRED:
- errmsg = "Entity value required\n";
+ errmsg = "Entity value required";
break;
case XML_ERR_URI_FRAGMENT:
errmsg = "Fragment not allowed";
break;
case XML_ERR_ATTLIST_NOT_STARTED:
- errmsg = "'(' required to start ATTLIST enumeration\n";
+ errmsg = "'(' required to start ATTLIST enumeration";
break;
case XML_ERR_NMTOKEN_REQUIRED:
- errmsg = "NmToken expected in ATTLIST enumeration\n";
+ errmsg = "NmToken expected in ATTLIST enumeration";
break;
case XML_ERR_ATTLIST_NOT_FINISHED:
- errmsg = "')' required to finish ATTLIST enumeration\n";
+ errmsg = "')' required to finish ATTLIST enumeration";
break;
case XML_ERR_MIXED_NOT_STARTED:
- errmsg = "MixedContentDecl : '|' or ')*' expected\n";
+ errmsg = "MixedContentDecl : '|' or ')*' expected";
break;
case XML_ERR_PCDATA_REQUIRED:
- errmsg = "MixedContentDecl : '#PCDATA' expected\n";
+ errmsg = "MixedContentDecl : '#PCDATA' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_STARTED:
- errmsg = "ContentDecl : Name or '(' expected\n";
+ errmsg = "ContentDecl : Name or '(' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
- errmsg = "ContentDecl : ',' '|' or ')' expected\n";
+ errmsg = "ContentDecl : ',' '|' or ')' expected";
break;
case XML_ERR_PEREF_IN_INT_SUBSET:
errmsg =
- "PEReference: forbidden within markup decl in internal subset\n";
+ "PEReference: forbidden within markup decl in internal subset";
break;
case XML_ERR_GT_REQUIRED:
- errmsg = "expected '>'\n";
+ errmsg = "expected '>'";
break;
case XML_ERR_CONDSEC_INVALID:
- errmsg = "XML conditional section '[' expected\n";
+ errmsg = "XML conditional section '[' expected";
break;
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
- errmsg = "Content error in the external subset\n";
+ errmsg = "Content error in the external subset";
break;
case XML_ERR_CONDSEC_INVALID_KEYWORD:
errmsg =
- "conditional section INCLUDE or IGNORE keyword expected\n";
+ "conditional section INCLUDE or IGNORE keyword expected";
break;
case XML_ERR_CONDSEC_NOT_FINISHED:
- errmsg = "XML conditional section not closed\n";
+ errmsg = "XML conditional section not closed";
break;
case XML_ERR_XMLDECL_NOT_STARTED:
- errmsg = "Text declaration '<?xml' required\n";
+ errmsg = "Text declaration '<?xml' required";
break;
case XML_ERR_XMLDECL_NOT_FINISHED:
- errmsg = "parsing XML declaration: '?>' expected\n";
+ errmsg = "parsing XML declaration: '?>' expected";
break;
case XML_ERR_EXT_ENTITY_STANDALONE:
- errmsg = "external parsed entities cannot be standalone\n";
+ errmsg = "external parsed entities cannot be standalone";
break;
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
- errmsg = "EntityRef: expecting ';'\n";
+ errmsg = "EntityRef: expecting ';'";
break;
case XML_ERR_DOCTYPE_NOT_FINISHED:
- errmsg = "DOCTYPE improperly terminated\n";
+ errmsg = "DOCTYPE improperly terminated";
break;
case XML_ERR_LTSLASH_REQUIRED:
- errmsg = "EndTag: '</' not found\n";
+ errmsg = "EndTag: '</' not found";
break;
case XML_ERR_EQUAL_REQUIRED:
- errmsg = "expected '='\n";
+ errmsg = "expected '='";
break;
case XML_ERR_STRING_NOT_CLOSED:
- errmsg = "String not closed expecting \" or '\n";
+ errmsg = "String not closed expecting \" or '";
break;
case XML_ERR_STRING_NOT_STARTED:
- errmsg = "String not started expecting ' or \"\n";
+ errmsg = "String not started expecting ' or \"";
break;
case XML_ERR_ENCODING_NAME:
- errmsg = "Invalid XML encoding name\n";
+ errmsg = "Invalid XML encoding name";
break;
case XML_ERR_STANDALONE_VALUE:
- errmsg = "standalone accepts only 'yes' or 'no'\n";
+ errmsg = "standalone accepts only 'yes' or 'no'";
break;
case XML_ERR_DOCUMENT_EMPTY:
- errmsg = "Document is empty\n";
+ errmsg = "Document is empty";
break;
case XML_ERR_DOCUMENT_END:
- errmsg = "Extra content at the end of the document\n";
+ errmsg = "Extra content at the end of the document";
break;
case XML_ERR_NOT_WELL_BALANCED:
- errmsg = "chunk is not well balanced\n";
+ errmsg = "chunk is not well balanced";
break;
case XML_ERR_EXTRA_CONTENT:
- errmsg = "extra content at the end of well balanced chunk\n";
+ errmsg = "extra content at the end of well balanced chunk";
break;
case XML_ERR_VERSION_MISSING:
- errmsg = "Malformed declaration expecting version\n";
+ errmsg = "Malformed declaration expecting version";
+ break;
+ case XML_ERR_NAME_TOO_LONG:
+ errmsg = "Name too long use XML_PARSE_HUGE option";
break;
#if 0
case:
- errmsg = "\n";
+ errmsg = "";
break;
#endif
default:
- errmsg = "Unregistered error message\n";
+ errmsg = "Unregistered error message";
}
+ if (info == NULL)
+ snprintf(errstr, 128, "%s\n", errmsg);
+ else
+ snprintf(errstr, 128, "%s: %%s\n", errmsg);
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
- XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
+ XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
info);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
@@ -392,7 +565,7 @@ xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
- XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
+ XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
@@ -415,20 +588,28 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2)
{
xmlStructuredErrorFunc schannel = NULL;
-
+
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
(ctxt->sax->initialized == XML_SAX2_MAGIC))
schannel = ctxt->sax->serror;
- __xmlRaiseError(schannel,
+ if (ctxt != NULL) {
+ __xmlRaiseError(schannel,
(ctxt->sax) ? ctxt->sax->warning : NULL,
ctxt->userData,
ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_WARNING, NULL, 0,
(const char *) str1, (const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
+ } else {
+ __xmlRaiseError(schannel, NULL, NULL,
+ ctxt, NULL, XML_FROM_PARSER, error,
+ XML_ERR_WARNING, NULL, 0,
+ (const char *) str1, (const char *) str2, NULL, 0, 0,
+ msg, (const char *) str1, (const char *) str2);
+ }
}
/**
@@ -442,7 +623,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
*/
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
- const char *msg, const xmlChar *str1)
+ const char *msg, const xmlChar *str1, const xmlChar *str2)
{
xmlStructuredErrorFunc schannel = NULL;
@@ -454,14 +635,20 @@ xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
schannel = ctxt->sax->serror;
}
- __xmlRaiseError(schannel,
+ if (ctxt != NULL) {
+ __xmlRaiseError(schannel,
ctxt->vctxt.error, ctxt->vctxt.userData,
ctxt, NULL, XML_FROM_DTD, error,
XML_ERR_ERROR, NULL, 0, (const char *) str1,
- NULL, NULL, 0, 0,
- msg, (const char *) str1);
- if (ctxt != NULL) {
+ (const char *) str2, NULL, 0, 0,
+ msg, (const char *) str1, (const char *) str2);
ctxt->valid = 0;
+ } else {
+ __xmlRaiseError(schannel, NULL, NULL,
+ ctxt, NULL, XML_FROM_DTD, error,
+ XML_ERR_ERROR, NULL, 0, (const char *) str1,
+ (const char *) str2, NULL, 0, 0,
+ msg, (const char *) str1, (const char *) str2);
}
}
@@ -506,7 +693,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
*/
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
- const char *msg, const xmlChar *str1, int val,
+ const char *msg, const xmlChar *str1, int val,
const xmlChar *str2)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
@@ -607,9 +794,34 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
ctxt->nsWellFormed = 0;
}
+/**
+ * xmlNsWarn
+ * @ctxt: an XML parser context
+ * @error: the error number
+ * @msg: the message
+ * @info1: extra information string
+ * @info2: extra information string
+ *
+ * Handle a namespace warning error
+ */
+static void
+xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
+ const char *msg,
+ const xmlChar * info1, const xmlChar * info2,
+ const xmlChar * info3)
+{
+ if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
+ (ctxt->instate == XML_PARSER_EOF))
+ return;
+ __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
+ XML_ERR_WARNING, NULL, 0, (const char *) info1,
+ (const char *) info2, (const char *) info3, 0, 0, msg,
+ info1, info2, info3);
+}
+
/************************************************************************
* *
- * Library wide options *
+ * Library wide options *
* *
************************************************************************/
@@ -806,13 +1018,25 @@ xmlHasFeature(xmlFeature feature)
return(1);
#else
return(0);
-#endif
+#endif
case XML_WITH_ZLIB:
#ifdef LIBXML_ZLIB_ENABLED
return(1);
#else
return(0);
#endif
+ case XML_WITH_LZMA:
+#ifdef LIBXML_LZMA_ENABLED
+ return(1);
+#else
+ return(0);
+#endif
+ case XML_WITH_ICU:
+#ifdef LIBXML_ICU_ENABLED
+ return(1);
+#else
+ return(0);
+#endif
default:
break;
}
@@ -821,7 +1045,7 @@ xmlHasFeature(xmlFeature feature)
/************************************************************************
* *
- * SAX2 defaulted attributes handling *
+ * SAX2 defaulted attributes handling *
* *
************************************************************************/
@@ -845,8 +1069,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
- if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
- (ctxt->str_xml_ns == NULL)) {
+ if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
+ (ctxt->str_xml_ns == NULL)) {
xmlErrMemory(ctxt, NULL);
}
}
@@ -856,10 +1080,108 @@ typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
int nbAttrs; /* number of defaulted attributes on that element */
int maxAttrs; /* the size of the array */
- const xmlChar *values[4]; /* array of localname/prefix/values */
+ const xmlChar *values[5]; /* array of localname/prefix/values/external */
};
/**
+ * xmlAttrNormalizeSpace:
+ * @src: the source string
+ * @dst: the target string
+ *
+ * Normalize the space in non CDATA attribute values:
+ * If the attribute type is not CDATA, then the XML processor MUST further
+ * process the normalized attribute value by discarding any leading and
+ * trailing space (#x20) characters, and by replacing sequences of space
+ * (#x20) characters by a single space (#x20) character.
+ * Note that the size of dst need to be at least src, and if one doesn't need
+ * to preserve dst (and it doesn't come from a dictionary or read-only) then
+ * passing src as dst is just fine.
+ *
+ * Returns a pointer to the normalized value (dst) or NULL if no conversion
+ * is needed.
+ */
+static xmlChar *
+xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
+{
+ if ((src == NULL) || (dst == NULL))
+ return(NULL);
+
+ while (*src == 0x20) src++;
+ while (*src != 0) {
+ if (*src == 0x20) {
+ while (*src == 0x20) src++;
+ if (*src != 0)
+ *dst++ = 0x20;
+ } else {
+ *dst++ = *src++;
+ }
+ }
+ *dst = 0;
+ if (dst == src)
+ return(NULL);
+ return(dst);
+}
+
+/**
+ * xmlAttrNormalizeSpace2:
+ * @src: the source string
+ *
+ * Normalize the space in non CDATA attribute values, a slightly more complex
+ * front end to avoid allocation problems when running on attribute values
+ * coming from the input.
+ *
+ * Returns a pointer to the normalized value (dst) or NULL if no conversion
+ * is needed.
+ */
+static const xmlChar *
+xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
+{
+ int i;
+ int remove_head = 0;
+ int need_realloc = 0;
+ const xmlChar *cur;
+
+ if ((ctxt == NULL) || (src == NULL) || (len == NULL))
+ return(NULL);
+ i = *len;
+ if (i <= 0)
+ return(NULL);
+
+ cur = src;
+ while (*cur == 0x20) {
+ cur++;
+ remove_head++;
+ }
+ while (*cur != 0) {
+ if (*cur == 0x20) {
+ cur++;
+ if ((*cur == 0x20) || (*cur == 0)) {
+ need_realloc = 1;
+ break;
+ }
+ } else
+ cur++;
+ }
+ if (need_realloc) {
+ xmlChar *ret;
+
+ ret = xmlStrndup(src + remove_head, i - remove_head + 1);
+ if (ret == NULL) {
+ xmlErrMemory(ctxt, NULL);
+ return(NULL);
+ }
+ xmlAttrNormalizeSpace(ret, ret);
+ *len = (int) strlen((const char *)ret);
+ return(ret);
+ } else if (remove_head) {
+ *len -= remove_head;
+ memmove(src, src + remove_head, 1 + *len);
+ return(src);
+ }
+ return(NULL);
+}
+
+/**
* xmlAddDefAttrs:
* @ctxt: an XML parser context
* @fullname: the element fullname
@@ -878,6 +1200,14 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
const xmlChar *name;
const xmlChar *prefix;
+ /*
+ * Allows to detect attribute redefinitions
+ */
+ if (ctxt->attsSpecial != NULL) {
+ if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
+ return;
+ }
+
if (ctxt->attsDefault == NULL) {
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
if (ctxt->attsDefault == NULL)
@@ -903,22 +1233,30 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
if (defaults == NULL) {
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
- (4 * 4) * sizeof(const xmlChar *));
+ (4 * 5) * sizeof(const xmlChar *));
if (defaults == NULL)
goto mem_error;
defaults->nbAttrs = 0;
defaults->maxAttrs = 4;
- xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
+ if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
+ defaults, NULL) < 0) {
+ xmlFree(defaults);
+ goto mem_error;
+ }
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
xmlDefAttrsPtr temp;
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
- (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
+ (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
if (temp == NULL)
goto mem_error;
defaults = temp;
defaults->maxAttrs *= 2;
- xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
+ if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
+ defaults, NULL) < 0) {
+ xmlFree(defaults);
+ goto mem_error;
+ }
}
/*
@@ -934,13 +1272,17 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
}
- defaults->values[4 * defaults->nbAttrs] = name;
- defaults->values[4 * defaults->nbAttrs + 1] = prefix;
+ defaults->values[5 * defaults->nbAttrs] = name;
+ defaults->values[5 * defaults->nbAttrs + 1] = prefix;
/* intern the string and precompute the end */
len = xmlStrlen(value);
value = xmlDictLookup(ctxt->dict, value, len);
- defaults->values[4 * defaults->nbAttrs + 2] = value;
- defaults->values[4 * defaults->nbAttrs + 3] = value + len;
+ defaults->values[5 * defaults->nbAttrs + 2] = value;
+ defaults->values[5 * defaults->nbAttrs + 3] = value + len;
+ if (ctxt->external)
+ defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
+ else
+ defaults->values[5 * defaults->nbAttrs + 4] = NULL;
defaults->nbAttrs++;
return;
@@ -957,7 +1299,7 @@ mem_error:
* @fullattr: the attribute fullname
* @type: the attribute type
*
- * Register that this attribute is not CDATA
+ * Register this attribute type
*/
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
@@ -971,6 +1313,9 @@ xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
goto mem_error;
}
+ if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
+ return;
+
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
(void *) (long) type);
return;
@@ -981,6 +1326,45 @@ mem_error:
}
/**
+ * xmlCleanSpecialAttrCallback:
+ *
+ * Removes CDATA attributes from the special attribute table
+ */
+static void
+xmlCleanSpecialAttrCallback(void *payload, void *data,
+ const xmlChar *fullname, const xmlChar *fullattr,
+ const xmlChar *unused ATTRIBUTE_UNUSED) {
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
+
+ if (((long) payload) == XML_ATTRIBUTE_CDATA) {
+ xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
+ }
+}
+
+/**
+ * xmlCleanSpecialAttr:
+ * @ctxt: an XML parser context
+ *
+ * Trim the list of attributes defined to remove all those of type
+ * CDATA as they are not special. This call should be done when finishing
+ * to parse the DTD and before starting to parse the document root.
+ */
+static void
+xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
+{
+ if (ctxt->attsSpecial == NULL)
+ return;
+
+ xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
+
+ if (xmlHashSize(ctxt->attsSpecial) == 0) {
+ xmlHashFree(ctxt->attsSpecial, NULL);
+ ctxt->attsSpecial = NULL;
+ }
+ return;
+}
+
+/**
* xmlCheckLanguageID:
* @lang: pointer to the string value
*
@@ -996,70 +1380,192 @@ mem_error:
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
* [38] Subcode ::= ([a-z] | [A-Z])+
*
+ * The current REC reference the sucessors of RFC 1766, currently 5646
+ *
+ * http://www.rfc-editor.org/rfc/rfc5646.txt
+ * langtag = language
+ * ["-" script]
+ * ["-" region]
+ * *("-" variant)
+ * *("-" extension)
+ * ["-" privateuse]
+ * language = 2*3ALPHA ; shortest ISO 639 code
+ * ["-" extlang] ; sometimes followed by
+ * ; extended language subtags
+ * / 4ALPHA ; or reserved for future use
+ * / 5*8ALPHA ; or registered language subtag
+ *
+ * extlang = 3ALPHA ; selected ISO 639 codes
+ * *2("-" 3ALPHA) ; permanently reserved
+ *
+ * script = 4ALPHA ; ISO 15924 code
+ *
+ * region = 2ALPHA ; ISO 3166-1 code
+ * / 3DIGIT ; UN M.49 code
+ *
+ * variant = 5*8alphanum ; registered variants
+ * / (DIGIT 3alphanum)
+ *
+ * extension = singleton 1*("-" (2*8alphanum))
+ *
+ * ; Single alphanumerics
+ * ; "x" reserved for private use
+ * singleton = DIGIT ; 0 - 9
+ * / %x41-57 ; A - W
+ * / %x59-5A ; Y - Z
+ * / %x61-77 ; a - w
+ * / %x79-7A ; y - z
+ *
+ * it sounds right to still allow Irregular i-xxx IANA and user codes too
+ * The parser below doesn't try to cope with extension or privateuse
+ * that could be added but that's not interoperable anyway
+ *
* Returns 1 if correct 0 otherwise
**/
int
xmlCheckLanguageID(const xmlChar * lang)
{
- const xmlChar *cur = lang;
+ const xmlChar *cur = lang, *nxt;
if (cur == NULL)
return (0);
if (((cur[0] == 'i') && (cur[1] == '-')) ||
- ((cur[0] == 'I') && (cur[1] == '-'))) {
+ ((cur[0] == 'I') && (cur[1] == '-')) ||
+ ((cur[0] == 'x') && (cur[1] == '-')) ||
+ ((cur[0] == 'X') && (cur[1] == '-'))) {
/*
- * IANA code
+ * Still allow IANA code and user code which were coming
+ * from the previous version of the XML-1.0 specification
+ * it's deprecated but we should not fail
*/
cur += 2;
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
+ while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
((cur[0] >= 'a') && (cur[0] <= 'z')))
cur++;
- } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
- ((cur[0] == 'X') && (cur[1] == '-'))) {
- /*
- * User code
- */
- cur += 2;
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
+ return(cur[0] == 0);
+ }
+ nxt = cur;
+ while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
+ ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
+ nxt++;
+ if (nxt - cur >= 4) {
/*
- * ISO639
+ * Reserved
*/
- cur++;
- if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- else
- return (0);
- } else
- return (0);
- while (cur[0] != 0) { /* non input consuming */
- if (cur[0] != '-')
- return (0);
- cur++;
- if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- else
- return (0);
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
+ if ((nxt - cur > 8) || (nxt[0] != 0))
+ return(0);
+ return(1);
}
+ if (nxt - cur < 2)
+ return(0);
+ /* we got an ISO 639 code */
+ if (nxt[0] == 0)
+ return(1);
+ if (nxt[0] != '-')
+ return(0);
+
+ nxt++;
+ cur = nxt;
+ /* now we can have extlang or script or region or variant */
+ if ((nxt[0] >= '0') && (nxt[0] <= '9'))
+ goto region_m49;
+
+ while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
+ ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
+ nxt++;
+ if (nxt - cur == 4)
+ goto script;
+ if (nxt - cur == 2)
+ goto region;
+ if ((nxt - cur >= 5) && (nxt - cur <= 8))
+ goto variant;
+ if (nxt - cur != 3)
+ return(0);
+ /* we parsed an extlang */
+ if (nxt[0] == 0)
+ return(1);
+ if (nxt[0] != '-')
+ return(0);
+
+ nxt++;
+ cur = nxt;
+ /* now we can have script or region or variant */
+ if ((nxt[0] >= '0') && (nxt[0] <= '9'))
+ goto region_m49;
+
+ while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
+ ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
+ nxt++;
+ if (nxt - cur == 2)
+ goto region;
+ if ((nxt - cur >= 5) && (nxt - cur <= 8))
+ goto variant;
+ if (nxt - cur != 4)
+ return(0);
+ /* we parsed a script */
+script:
+ if (nxt[0] == 0)
+ return(1);
+ if (nxt[0] != '-')
+ return(0);
+
+ nxt++;
+ cur = nxt;
+ /* now we can have region or variant */
+ if ((nxt[0] >= '0') && (nxt[0] <= '9'))
+ goto region_m49;
+
+ while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
+ ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
+ nxt++;
+
+ if ((nxt - cur >= 5) && (nxt - cur <= 8))
+ goto variant;
+ if (nxt - cur != 2)
+ return(0);
+ /* we parsed a region */
+region:
+ if (nxt[0] == 0)
+ return(1);
+ if (nxt[0] != '-')
+ return(0);
+
+ nxt++;
+ cur = nxt;
+ /* now we can just have a variant */
+ while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
+ ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
+ nxt++;
+
+ if ((nxt - cur < 5) || (nxt - cur > 8))
+ return(0);
+
+ /* we parsed a variant */
+variant:
+ if (nxt[0] == 0)
+ return(1);
+ if (nxt[0] != '-')
+ return(0);
+ /* extensions and private use subtags not checked */
return (1);
+
+region_m49:
+ if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
+ ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
+ nxt += 3;
+ goto region;
+ }
+ return(0);
}
/************************************************************************
* *
- * Parser stacks related functions and macros *
+ * Parser stacks related functions and macros *
* *
************************************************************************/
-xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
- const xmlChar ** str);
+static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
+ const xmlChar ** str);
#ifdef SAX2
/**
@@ -1078,7 +1584,7 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
{
if (ctxt->options & XML_PARSE_NSCLEAN) {
int i;
- for (i = 0;i < ctxt->nsNr;i += 2) {
+ for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
if (ctxt->nsTab[i] == prefix) {
/* in scope */
if (ctxt->nsTab[i + 1] == URL)
@@ -1099,15 +1605,16 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
return (-1);
}
} else if (ctxt->nsNr >= ctxt->nsMax) {
+ const xmlChar ** tmp;
ctxt->nsMax *= 2;
- ctxt->nsTab = (const xmlChar **)
- xmlRealloc((char *) ctxt->nsTab,
- ctxt->nsMax * sizeof(ctxt->nsTab[0]));
- if (ctxt->nsTab == NULL) {
+ tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
+ ctxt->nsMax * sizeof(ctxt->nsTab[0]));
+ if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->nsMax /= 2;
return (-1);
}
+ ctxt->nsTab = tmp;
}
ctxt->nsTab[ctxt->nsNr++] = prefix;
ctxt->nsTab[ctxt->nsNr++] = URL;
@@ -1134,7 +1641,7 @@ nsPop(xmlParserCtxtPtr ctxt, int nr)
}
if (ctxt->nsNr <= 0)
return (0);
-
+
for (i = 0;i < nr;i++) {
ctxt->nsNr--;
ctxt->nsTab[ctxt->nsNr] = NULL;
@@ -1184,13 +1691,13 @@ mem_error:
*
* Pushes a new parser input on top of the input stack
*
- * Returns 0 in case of error, the index in the stack otherwise
+ * Returns -1 in case of error, the index in the stack otherwise
*/
int
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
if ((ctxt == NULL) || (value == NULL))
- return(0);
+ return(-1);
if (ctxt->inputNr >= ctxt->inputMax) {
ctxt->inputMax *= 2;
ctxt->inputTab =
@@ -1199,7 +1706,10 @@ inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
sizeof(ctxt->inputTab[0]));
if (ctxt->inputTab == NULL) {
xmlErrMemory(ctxt, NULL);
- return (0);
+ xmlFreeInputStream(value);
+ ctxt->inputMax /= 2;
+ value = NULL;
+ return (-1);
}
}
ctxt->inputTab[ctxt->inputNr] = value;
@@ -1239,7 +1749,7 @@ inputPop(xmlParserCtxtPtr ctxt)
*
* Pushes a new element node on top of the node stack
*
- * Returns 0 in case of error, the index in the stack otherwise
+ * Returns -1 in case of error, the index in the stack otherwise
*/
int
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
@@ -1253,22 +1763,24 @@ nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
sizeof(ctxt->nodeTab[0]));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
- return (0);
+ return (-1);
}
ctxt->nodeTab = tmp;
ctxt->nodeMax *= 2;
}
- if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
+ if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
- "Excessive depth in document: change xmlParserMaxDepth = %d\n",
+ "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
xmlParserMaxDepth);
- ctxt->instate = XML_PARSER_EOF;
- return(0);
+ xmlHaltParser(ctxt);
+ return(-1);
}
ctxt->nodeTab[ctxt->nodeNr] = value;
ctxt->node = value;
return (ctxt->nodeNr++);
}
+
/**
* nodePop:
* @ctxt: an XML parser context
@@ -1384,15 +1896,14 @@ namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
if (ctxt->nameNr >= ctxt->nameMax) {
const xmlChar * *tmp;
- ctxt->nameMax *= 2;
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
- ctxt->nameMax *
+ ctxt->nameMax * 2 *
sizeof(ctxt->nameTab[0]));
if (tmp == NULL) {
- ctxt->nameMax /= 2;
goto mem_error;
}
ctxt->nameTab = tmp;
+ ctxt->nameMax *= 2;
}
ctxt->nameTab[ctxt->nameNr] = value;
ctxt->name = value;
@@ -1428,13 +1939,17 @@ namePop(xmlParserCtxtPtr ctxt)
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
if (ctxt->spaceNr >= ctxt->spaceMax) {
+ int *tmp;
+
ctxt->spaceMax *= 2;
- ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
- ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
- if (ctxt->spaceTab == NULL) {
+ tmp = (int *) xmlRealloc(ctxt->spaceTab,
+ ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
+ if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
- return(0);
+ ctxt->spaceMax /=2;
+ return(-1);
}
+ ctxt->spaceTab = tmp;
}
ctxt->spaceTab[ctxt->spaceNr] = val;
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
@@ -1474,7 +1989,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
* to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings without newlines within the parser.
- * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
+ * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
* defined char within the parser.
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
@@ -1523,10 +2038,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
#define SKIPL(val) do { \
int skipl; \
for(skipl=0; skipl<val; skipl++) { \
- if (*(ctxt->input->cur) == '\n') { \
+ if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
- } else ctxt->input->col++; \
- ctxt->nbChars++; \
+ } else ctxt->input->col++; \
+ ctxt->nbChars++; \
ctxt->input->cur++; \
} \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
@@ -1552,8 +2067,25 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
xmlGROW (ctxt);
static void xmlGROW (xmlParserCtxtPtr ctxt) {
+ unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
+ unsigned long curBase = ctxt->input->cur - ctxt->input->base;
+
+ if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
+ (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
+ ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
+ xmlHaltParser(ctxt);
+ return;
+ }
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if ((*ctxt->input->cur == 0) &&
+ if ((ctxt->input->cur > ctxt->input->end) ||
+ (ctxt->input->cur < ctxt->input->base)) {
+ xmlHaltParser(ctxt);
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
+ return;
+ }
+ if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
xmlPopInput(ctxt);
}
@@ -1612,6 +2144,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
while (IS_BLANK_CH(*cur)) {
if (*cur == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
+ } else {
+ ctxt->input->col++;
}
cur++;
res++;
@@ -1626,7 +2160,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int cur;
do {
cur = CUR;
- while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
+ while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
+ (ctxt->instate != XML_PARSER_EOF))) {
NEXT;
cur = CUR;
res++;
@@ -1640,7 +2175,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
* Need to handle support of entities branching here
*/
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
- } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
+ } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
+ (ctxt->instate != XML_PARSER_EOF));
}
return(res);
}
@@ -1680,10 +2216,12 @@ xmlPopInput(xmlParserCtxtPtr ctxt) {
*
* xmlPushInput: switch to a new input stream which is stacked on top
* of the previous one(s).
+ * Returns -1 in case of error or the index in the input stack
*/
-void
+int
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
- if (input == NULL) return;
+ int ret;
+ if (input == NULL) return(-1);
if (xmlParserDebugEntities) {
if ((ctxt->input != NULL) && (ctxt->input->filename))
@@ -1693,8 +2231,11 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
xmlGenericError(xmlGenericErrorContext,
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
}
- inputPush(ctxt, input);
+ ret = inputPush(ctxt, input);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
GROW;
+ return(ret);
}
/**
@@ -1708,7 +2249,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*
* Returns the value parsed (as an int), 0 in case of error
*/
@@ -1729,8 +2270,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
- if ((RAW >= '0') && (RAW <= '9'))
+ if ((RAW >= '0') && (RAW <= '9'))
val = val * 16 + (CUR - '0');
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
val = val * 16 + (CUR - 'a') + 10;
@@ -1760,8 +2303,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
- if ((RAW >= '0') && (RAW <= '9'))
+ if ((RAW >= '0') && (RAW <= '9'))
val = val * 10 + (CUR - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
@@ -1787,7 +2332,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
@@ -1812,7 +2357,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*
* Returns the value parsed (as an int), 0 in case of error, str will be
* updated to the current value of the index
@@ -1831,7 +2376,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
ptr += 3;
cur = *ptr;
while (cur != ';') { /* Non input consuming loop */
- if ((cur >= '0') && (cur <= '9'))
+ if ((cur >= '0') && (cur <= '9'))
val = val * 16 + (cur - '0');
else if ((cur >= 'a') && (cur <= 'f'))
val = val * 16 + (cur - 'a') + 10;
@@ -1854,7 +2399,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
ptr += 2;
cur = *ptr;
while (cur != ';') { /* Non input consuming loops */
- if ((cur >= '0') && (cur <= '9'))
+ if ((cur >= '0') && (cur <= '9'))
val = val * 10 + (cur - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
@@ -1878,7 +2423,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
@@ -1900,9 +2445,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*
* Returns the new input stream or NULL
*/
-
+
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
-
+
static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlParserInputPtr input;
@@ -1924,7 +2469,8 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
buffer = xmlMallocAtomic(length);
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
- return(NULL);
+ xmlFree(input);
+ return(NULL);
}
buffer [0] = ' ';
buffer [1] = '%';
@@ -1943,12 +2489,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
/**
* xmlParserHandlePEReference:
* @ctxt: the parser context
- *
+ *
* [69] PEReference ::= '%' Name ';'
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
@@ -1966,9 +2512,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
* NOTE: misleading but this is handled.
*
* A PEReference may have been detected in the current input stream
- * the handling is done accordingly to
+ * the handling is done accordingly to
* http://www.w3.org/TR/REC-xml#entproc
- * i.e.
+ * i.e.
* - Included in literal in entity values
* - Included as Parameter Entity reference within DTDs
*/
@@ -2045,8 +2591,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if (entity == NULL) {
-
+
/*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an
@@ -2071,16 +2619,18 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
- name);
- } else
+ name, NULL);
+ } else
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
ctxt->valid = 0;
}
+ xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else if (ctxt->input->free != deallocblankswrapper) {
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
- xmlPushInput(ctxt, input);
+ if (xmlPushInput(ctxt, input) < 0)
+ return;
} else {
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
@@ -2088,14 +2638,32 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
xmlCharEncoding enc;
/*
+ * Note: external parameter entities will not be loaded, it
+ * is not required for a non-validating parser, unless the
+ * option of validating, or substituting entities were
+ * given. Doing so is far more secure as the parser will
+ * only process data coming from the document entity by
+ * default.
+ */
+ if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
+ ((ctxt->options & XML_PARSE_NOENT) == 0) &&
+ ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
+ ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
+ ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
+ (ctxt->replaceEntities == 0) &&
+ (ctxt->validate == 0))
+ return;
+
+ /*
* handle the extra spaces added before and after
* c.f. http://www.w3.org/TR/REC-xml#as-PE
* this is done independently.
*/
input = xmlNewEntityInputStream(ctxt, entity);
- xmlPushInput(ctxt, input);
+ if (xmlPushInput(ctxt, input) < 0)
+ return;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -2105,6 +2673,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* the amount of data in the buffer.
*/
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
@@ -2135,14 +2705,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
/*
* Macro used to grow the current buffer.
+ * buffer##_size is expected to be a size_t
+ * mem_error: is expected to handle memory allocation failures
*/
-#define growBuffer(buffer) { \
+#define growBuffer(buffer, n) { \
xmlChar *tmp; \
- buffer##_size *= 2; \
- tmp = (xmlChar *) \
- xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
+ size_t new_size = buffer##_size * 2 + n; \
+ if (new_size < buffer##_size) goto mem_error; \
+ tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
if (tmp == NULL) goto mem_error; \
buffer = tmp; \
+ buffer##_size = new_size; \
}
/**
@@ -2154,7 +2727,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
- *
+ *
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
@@ -2168,19 +2741,22 @@ xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int what, xmlChar end, xmlChar end2, xmlChar end3) {
xmlChar *buffer = NULL;
- int buffer_size = 0;
+ size_t buffer_size = 0;
+ size_t nbchars = 0;
xmlChar *current = NULL;
+ xmlChar *rep = NULL;
const xmlChar *last;
xmlEntityPtr ent;
int c,l;
- int nbchars = 0;
if ((ctxt == NULL) || (str == NULL) || (len < 0))
return(NULL);
last = str + len;
- if (ctxt->depth > 40) {
+ if (((ctxt->depth > 40) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
+ (ctxt->depth > 1024)) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
return(NULL);
}
@@ -2189,7 +2765,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
* allocate a translation buffer.
*/
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
- buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
+ buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
if (buffer == NULL) goto mem_error;
/*
@@ -2209,8 +2785,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
if (val != 0) {
COPY_BUF(0,buffer,nbchars,val);
}
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
if (xmlParserDebugEntities)
@@ -2218,42 +2794,53 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
"String decoding Entity Reference: %.30s\n",
str);
ent = xmlParseStringEntityRef(ctxt, &str);
+ if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
+ (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
+ goto int_error;
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
+ if (ent != NULL)
+ ctxt->nbentities += ent->checked / 2;
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
COPY_BUF(0,buffer,nbchars,ent->content[0]);
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else {
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
"predefined entity has no content\n");
}
} else if ((ent != NULL) && (ent->content != NULL)) {
- xmlChar *rep;
-
ctxt->depth++;
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
0, 0, 0);
ctxt->depth--;
+
+ if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
+ (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
+ goto int_error;
+
if (rep != NULL) {
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
+ goto int_error;
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
xmlFree(rep);
+ rep = NULL;
}
} else if (ent != NULL) {
int i = xmlStrlen(ent->name);
const xmlChar *cur = ent->name;
buffer[nbchars++] = '&';
- if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
+ if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
}
for (;i > 0;i--)
buffer[nbchars++] = *cur++;
@@ -2264,9 +2851,15 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
xmlGenericError(xmlGenericErrorContext,
"String decoding PE Reference: %.30s\n", str);
ent = xmlParseStringPEReference(ctxt, &str);
+ if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
+ goto int_error;
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
+ if (ent != NULL)
+ ctxt->nbentities += ent->checked / 2;
if (ent != NULL) {
- xmlChar *rep;
-
+ if (ent->content == NULL) {
+ xmlLoadEntityContent(ctxt, ent);
+ }
ctxt->depth++;
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
0, 0, 0);
@@ -2275,19 +2868,21 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
+ goto int_error;
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
xmlFree(rep);
+ rep = NULL;
}
}
} else {
COPY_BUF(l,buffer,nbchars,c);
str += l;
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
if (str < last)
@@ -2295,11 +2890,16 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
else
c = 0;
}
- buffer[nbchars++] = 0;
+ buffer[nbchars] = 0;
return(buffer);
mem_error:
xmlErrMemory(ctxt, NULL);
+int_error:
+ if (rep != NULL)
+ xmlFree(rep);
+ if (buffer != NULL)
+ xmlFree(buffer);
return(NULL);
}
@@ -2311,7 +2911,7 @@ mem_error:
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
- *
+ *
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
@@ -2414,7 +3014,7 @@ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
* xmlSplitQName:
* @ctxt: an XML parser context
* @name: an XML parser context
- * @prefix: a xmlChar **
+ * @prefix: a xmlChar **
*
* parse an UTF8 encoded XML qualified name string
*
@@ -2465,7 +3065,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
* for the processing speed.
*/
max = len * 2;
-
+
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
@@ -2480,7 +3080,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
if (tmp == NULL) {
- xmlFree(tmp);
+ xmlFree(buffer);
xmlErrMemory(ctxt, NULL);
return(NULL);
}
@@ -2491,7 +3091,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
}
buffer[len] = 0;
}
-
+
if ((c == ':') && (*cur == 0)) {
if (buffer != NULL)
xmlFree(buffer);
@@ -2544,7 +3144,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
* for the processing speed.
*/
max = len * 2;
-
+
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
@@ -2570,7 +3170,7 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
}
buffer[len] = 0;
}
-
+
if (buffer == NULL)
ret = xmlStrndup(buf, len);
else {
@@ -2588,10 +3188,221 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
* *
************************************************************************/
-static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
+/************************************************************************
+ * *
+ * Routines to parse Name, NCName and NmToken *
+ * *
+ ************************************************************************/
+#ifdef DEBUG
+static unsigned long nbParseName = 0;
+static unsigned long nbParseNmToken = 0;
+static unsigned long nbParseNCName = 0;
+static unsigned long nbParseNCNameComplex = 0;
+static unsigned long nbParseNameComplex = 0;
+static unsigned long nbParseStringName = 0;
+#endif
+
+/*
+ * The two following functions are related to the change of accepted
+ * characters for Name and NmToken in the Revision 5 of XML-1.0
+ * They correspond to the modified production [4] and the new production [4a]
+ * changes in that revision. Also note that the macros used for the
+ * productions Letter, Digit, CombiningChar and Extender are not needed
+ * anymore.
+ * We still keep compatibility to pre-revision5 parsing semantic if the
+ * new XML_PARSE_OLD10 option is given to the parser.
+ */
+static int
+xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) {
+ /*
+ * Use the new checks of production [4] [4a] amd [5] of the
+ * Update 5 of XML-1.0
+ */
+ if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
+ (((c >= 'a') && (c <= 'z')) ||
+ ((c >= 'A') && (c <= 'Z')) ||
+ (c == '_') || (c == ':') ||
+ ((c >= 0xC0) && (c <= 0xD6)) ||
+ ((c >= 0xD8) && (c <= 0xF6)) ||
+ ((c >= 0xF8) && (c <= 0x2FF)) ||
+ ((c >= 0x370) && (c <= 0x37D)) ||
+ ((c >= 0x37F) && (c <= 0x1FFF)) ||
+ ((c >= 0x200C) && (c <= 0x200D)) ||
+ ((c >= 0x2070) && (c <= 0x218F)) ||
+ ((c >= 0x2C00) && (c <= 0x2FEF)) ||
+ ((c >= 0x3001) && (c <= 0xD7FF)) ||
+ ((c >= 0xF900) && (c <= 0xFDCF)) ||
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
+ ((c >= 0x10000) && (c <= 0xEFFFF))))
+ return(1);
+ } else {
+ if (IS_LETTER(c) || (c == '_') || (c == ':'))
+ return(1);
+ }
+ return(0);
+}
+
+static int
+xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) {
+ /*
+ * Use the new checks of production [4] [4a] amd [5] of the
+ * Update 5 of XML-1.0
+ */
+ if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
+ (((c >= 'a') && (c <= 'z')) ||
+ ((c >= 'A') && (c <= 'Z')) ||
+ ((c >= '0') && (c <= '9')) || /* !start */
+ (c == '_') || (c == ':') ||
+ (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
+ ((c >= 0xC0) && (c <= 0xD6)) ||
+ ((c >= 0xD8) && (c <= 0xF6)) ||
+ ((c >= 0xF8) && (c <= 0x2FF)) ||
+ ((c >= 0x300) && (c <= 0x36F)) || /* !start */
+ ((c >= 0x370) && (c <= 0x37D)) ||
+ ((c >= 0x37F) && (c <= 0x1FFF)) ||
+ ((c >= 0x200C) && (c <= 0x200D)) ||
+ ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
+ ((c >= 0x2070) && (c <= 0x218F)) ||
+ ((c >= 0x2C00) && (c <= 0x2FEF)) ||
+ ((c >= 0x3001) && (c <= 0xD7FF)) ||
+ ((c >= 0xF900) && (c <= 0xFDCF)) ||
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
+ ((c >= 0x10000) && (c <= 0xEFFFF))))
+ return(1);
+ } else {
+ if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ (c == '.') || (c == '-') ||
+ (c == '_') || (c == ':') ||
+ (IS_COMBINING(c)) ||
+ (IS_EXTENDER(c)))
+ return(1);
+ }
+ return(0);
+}
+
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
int *len, int *alloc, int normalize);
+static const xmlChar *
+xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
+ int len = 0, l;
+ int c;
+ int count = 0;
+
+#ifdef DEBUG
+ nbParseNameComplex++;
+#endif
+
+ /*
+ * Handler for more complex cases
+ */
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) {
+ /*
+ * Use the new checks of production [4] [4a] amd [5] of the
+ * Update 5 of XML-1.0
+ */
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
+ (!(((c >= 'a') && (c <= 'z')) ||
+ ((c >= 'A') && (c <= 'Z')) ||
+ (c == '_') || (c == ':') ||
+ ((c >= 0xC0) && (c <= 0xD6)) ||
+ ((c >= 0xD8) && (c <= 0xF6)) ||
+ ((c >= 0xF8) && (c <= 0x2FF)) ||
+ ((c >= 0x370) && (c <= 0x37D)) ||
+ ((c >= 0x37F) && (c <= 0x1FFF)) ||
+ ((c >= 0x200C) && (c <= 0x200D)) ||
+ ((c >= 0x2070) && (c <= 0x218F)) ||
+ ((c >= 0x2C00) && (c <= 0x2FEF)) ||
+ ((c >= 0x3001) && (c <= 0xD7FF)) ||
+ ((c >= 0xF900) && (c <= 0xFDCF)) ||
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
+ ((c >= 0x10000) && (c <= 0xEFFFF))))) {
+ return(NULL);
+ }
+ len += l;
+ NEXTL(l);
+ c = CUR_CHAR(l);
+ while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
+ (((c >= 'a') && (c <= 'z')) ||
+ ((c >= 'A') && (c <= 'Z')) ||
+ ((c >= '0') && (c <= '9')) || /* !start */
+ (c == '_') || (c == ':') ||
+ (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
+ ((c >= 0xC0) && (c <= 0xD6)) ||
+ ((c >= 0xD8) && (c <= 0xF6)) ||
+ ((c >= 0xF8) && (c <= 0x2FF)) ||
+ ((c >= 0x300) && (c <= 0x36F)) || /* !start */
+ ((c >= 0x370) && (c <= 0x37D)) ||
+ ((c >= 0x37F) && (c <= 0x1FFF)) ||
+ ((c >= 0x200C) && (c <= 0x200D)) ||
+ ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
+ ((c >= 0x2070) && (c <= 0x218F)) ||
+ ((c >= 0x2C00) && (c <= 0x2FEF)) ||
+ ((c >= 0x3001) && (c <= 0xD7FF)) ||
+ ((c >= 0xF900) && (c <= 0xFDCF)) ||
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
+ ((c >= 0x10000) && (c <= 0xEFFFF))
+ )) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ }
+ len += l;
+ NEXTL(l);
+ c = CUR_CHAR(l);
+ }
+ } else {
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
+ (!IS_LETTER(c) && (c != '_') &&
+ (c != ':'))) {
+ return(NULL);
+ }
+ len += l;
+ NEXTL(l);
+ c = CUR_CHAR(l);
+
+ while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
+ ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ (c == '.') || (c == '-') ||
+ (c == '_') || (c == ':') ||
+ (IS_COMBINING(c)) ||
+ (IS_EXTENDER(c)))) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ }
+ len += l;
+ NEXTL(l);
+ c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
+ }
+ }
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
+ return(NULL);
+ }
+ if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
+ return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
+ return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
+}
+
/**
* xmlParseName:
* @ctxt: an XML parser context
@@ -2616,6 +3427,10 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
GROW;
+#ifdef DEBUG
+ nbParseName++;
+#endif
+
/*
* Accelerator for simple ASCII names
*/
@@ -2632,6 +3447,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
in++;
if ((*in > 0) && (*in < 0x80)) {
count = in - ctxt->input->cur;
+ if ((count > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
+ return(NULL);
+ }
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
@@ -2641,9 +3461,136 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
return(ret);
}
}
+ /* accelerator for special cases */
return(xmlParseNameComplex(ctxt));
}
+static const xmlChar *
+xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
+ int len = 0, l;
+ int c;
+ int count = 0;
+ const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
+
+#ifdef DEBUG
+ nbParseNCNameComplex++;
+#endif
+
+ /*
+ * Handler for more complex cases
+ */
+ GROW;
+ end = ctxt->input->cur;
+ c = CUR_CHAR(l);
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
+ (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
+ return(NULL);
+ }
+
+ while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
+ (xmlIsNameChar(ctxt, c) && (c != ':'))) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ }
+ len += l;
+ NEXTL(l);
+ end = ctxt->input->cur;
+ c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ /*
+ * when shrinking to extend the buffer we really need to preserve
+ * the part of the name we already parsed. Hence rolling back
+ * by current lenght.
+ */
+ ctxt->input->cur -= l;
+ GROW;
+ ctxt->input->cur += l;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ end = ctxt->input->cur;
+ c = CUR_CHAR(l);
+ }
+ }
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
+ return(xmlDictLookup(ctxt->dict, end - len, len));
+}
+
+/**
+ * xmlParseNCName:
+ * @ctxt: an XML parser context
+ * @len: length of the string parsed
+ *
+ * parse an XML name.
+ *
+ * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
+ * CombiningChar | Extender
+ *
+ * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
+ *
+ * Returns the Name parsed or NULL
+ */
+
+static const xmlChar *
+xmlParseNCName(xmlParserCtxtPtr ctxt) {
+ const xmlChar *in, *e;
+ const xmlChar *ret;
+ int count = 0;
+
+#ifdef DEBUG
+ nbParseNCName++;
+#endif
+
+ /*
+ * Accelerator for simple ASCII names
+ */
+ in = ctxt->input->cur;
+ e = ctxt->input->end;
+ if ((((*in >= 0x61) && (*in <= 0x7A)) ||
+ ((*in >= 0x41) && (*in <= 0x5A)) ||
+ (*in == '_')) && (in < e)) {
+ in++;
+ while ((((*in >= 0x61) && (*in <= 0x7A)) ||
+ ((*in >= 0x41) && (*in <= 0x5A)) ||
+ ((*in >= 0x30) && (*in <= 0x39)) ||
+ (*in == '_') || (*in == '-') ||
+ (*in == '.')) && (in < e))
+ in++;
+ if (in >= e)
+ goto complex;
+ if ((*in > 0) && (*in < 0x80)) {
+ count = in - ctxt->input->cur;
+ if ((count > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
+ ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
+ ctxt->input->cur = in;
+ ctxt->nbChars += count;
+ ctxt->input->col += count;
+ if (ret == NULL) {
+ xmlErrMemory(ctxt, NULL);
+ }
+ return(ret);
+ }
+ }
+complex:
+ return(xmlParseNCNameComplex(ctxt));
+}
+
/**
* xmlParseNameAndCompare:
* @ctxt: an XML parser context
@@ -2662,15 +3609,17 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
const xmlChar *ret;
GROW;
-
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+
in = ctxt->input->cur;
while (*in != 0 && *in == *cmp) {
- ++in;
+ ++in;
++cmp;
ctxt->input->col++;
}
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
- /* success */
+ /* success */
ctxt->input->cur = in;
return (const xmlChar*) 1;
}
@@ -2683,42 +3632,6 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
return ret;
}
-static const xmlChar *
-xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
- int len = 0, l;
- int c;
- int count = 0;
-
- /*
- * Handler for more complex cases
- */
- GROW;
- c = CUR_CHAR(l);
- if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
- (!IS_LETTER(c) && (c != '_') &&
- (c != ':'))) {
- return(NULL);
- }
-
- while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
- ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c)))) {
- if (count++ > 100) {
- count = 0;
- GROW;
- }
- len += l;
- NEXTL(l);
- c = CUR_CHAR(l);
- }
- if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
-}
-
/**
* xmlParseStringName:
* @ctxt: an XML parser context
@@ -2733,7 +3646,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
*
* [6] Names ::= Name (#x20 Name)*
*
- * Returns the Name parsed or NULL. The @str pointer
+ * Returns the Name parsed or NULL. The @str pointer
* is updated to the current location in the string.
*/
@@ -2744,17 +3657,19 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
int len = 0, l;
int c;
+#ifdef DEBUG
+ nbParseStringName++;
+#endif
+
c = CUR_SCHAR(cur, l);
- if (!IS_LETTER(c) && (c != '_') &&
- (c != ':')) {
+ if (!xmlIsNameStartChar(ctxt, c)) {
return(NULL);
}
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
+ COPY_BUF(l,buf,len,c);
+ cur += l;
+ c = CUR_SCHAR(cur, l);
+ while (xmlIsNameChar(ctxt, c)) {
COPY_BUF(l,buf,len,c);
cur += l;
c = CUR_SCHAR(cur, l);
@@ -2765,21 +3680,23 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
*/
xmlChar *buffer;
int max = len * 2;
-
+
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
return(NULL);
}
memcpy(buffer, buf, len);
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- /* test bigentname.xml */
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
+ while (xmlIsNameChar(ctxt, c)) {
if (len + 10 > max) {
xmlChar *tmp;
+
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ xmlFree(buffer);
+ return(NULL);
+ }
max *= 2;
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
@@ -2799,6 +3716,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
return(buffer);
}
}
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
*str = cur;
return(xmlStrndup(buf, len));
}
@@ -2806,7 +3728,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
/**
* xmlParseNmtoken:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Nmtoken.
*
* [7] Nmtoken ::= (NameChar)+
@@ -2823,21 +3745,30 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
int c;
int count = 0;
+#ifdef DEBUG
+ nbParseNmToken++;
+#endif
+
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- if (count++ > 100) {
+ while (xmlIsNameChar(ctxt, c)) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
}
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
if (len >= XML_MAX_NAMELEN) {
/*
* Okay someone managed to make a huge token, so he's ready to pay
@@ -2845,25 +3776,31 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
*/
xmlChar *buffer;
int max = len * 2;
-
+
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
return(NULL);
}
memcpy(buffer, buf, len);
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- if (count++ > 100) {
+ while (xmlIsNameChar(ctxt, c)) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buffer);
+ return(NULL);
+ }
}
if (len + 10 > max) {
xmlChar *tmp;
+ if ((max > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
+ xmlFree(buffer);
+ return(NULL);
+ }
max *= 2;
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
@@ -2884,6 +3821,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
}
if (len == 0)
return(NULL);
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
+ return(NULL);
+ }
return(xmlStrndup(buf, len));
}
@@ -2930,6 +3872,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
ctxt->instate = XML_PARSER_ENTITY_VALUE;
input = ctxt->input;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
NEXT;
c = CUR_CHAR(l);
/*
@@ -2937,12 +3883,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
* When a parameter entity reference appears in a literal entity
* value, ... a single or double quote character in the replacement
* text is always treated as a normal data character and will not
- * terminate the literal.
+ * terminate the literal.
* In practice it means we stop the loop only when back at parsing
* the initial entity and the quote is found
*/
- while ((IS_CHAR(c)) && ((c != stop) || /* checked */
- (ctxt->input != input))) {
+ while (((IS_CHAR(c)) && ((c != stop) || /* checked */
+ (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
if (len + 5 >= size) {
xmlChar *tmp;
@@ -2971,6 +3917,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
}
}
buf[len] = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
/*
* Raise problem w.r.t. '&' and '%' being used in non-entities
@@ -3018,12 +3968,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
*/
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
0, 0, 0);
- if (orig != NULL)
+ if (orig != NULL)
*orig = buf;
else
xmlFree(buf);
}
-
+
return(ret);
}
@@ -3043,8 +3993,9 @@ static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
xmlChar limit = 0;
xmlChar *buf = NULL;
- int len = 0;
- int buf_size = 0;
+ xmlChar *rep = NULL;
+ size_t len = 0;
+ size_t buf_size = 0;
int c, l, in_space = 0;
xmlChar *current = NULL;
xmlEntityPtr ent;
@@ -3061,20 +4012,31 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
return(NULL);
}
-
+
/*
* allocate a translation buffer.
*/
buf_size = XML_PARSER_BUFFER_SIZE;
- buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
+ buf = (xmlChar *) xmlMallocAtomic(buf_size);
if (buf == NULL) goto mem_error;
/*
* OK loop until we reach one of the ending char or a size limit.
*/
c = CUR_CHAR(l);
- while ((NXT(0) != limit) && /* checked */
- (c != '<')) {
+ while (((NXT(0) != limit) && /* checked */
+ (IS_CHAR(c)) && (c != '<')) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
+ /*
+ * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
+ * special option is given
+ */
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ goto mem_error;
+ }
if (c == 0) break;
if (c == '&') {
in_space = 0;
@@ -3083,8 +4045,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
if (val == '&') {
if (ctxt->replaceEntities) {
- if (len > buf_size - 10) {
- growBuffer(buf);
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
buf[len++] = '&';
} else {
@@ -3092,8 +4054,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
* The reparsing will be done in xmlStringGetNodeList()
* called by the attribute() function in SAX.c
*/
- if (len > buf_size - 10) {
- growBuffer(buf);
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
buf[len++] = '&';
buf[len++] = '#';
@@ -3101,18 +4063,21 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
buf[len++] = '8';
buf[len++] = ';';
}
- } else {
- if (len > buf_size - 10) {
- growBuffer(buf);
+ } else if (val != 0) {
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
len += xmlCopyChar(0, &buf[len], val);
}
} else {
ent = xmlParseEntityRef(ctxt);
+ ctxt->nbentities++;
+ if (ent != NULL)
+ ctxt->nbentities += ent->owner;
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (len > buf_size - 10) {
- growBuffer(buf);
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
if ((ctxt->replaceEntities == 0) &&
(ent->content[0] == '&')) {
@@ -3124,10 +4089,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
} else {
buf[len++] = ent->content[0];
}
- } else if ((ent != NULL) &&
+ } else if ((ent != NULL) &&
(ctxt->replaceEntities != 0)) {
- xmlChar *rep;
-
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
rep = xmlStringDecodeEntities(ctxt, ent->content,
XML_SUBSTITUTE_REF,
@@ -3135,16 +4098,22 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
if (rep != NULL) {
current = rep;
while (*current != 0) { /* non input consuming */
- buf[len++] = *current++;
- if (len > buf_size - 10) {
- growBuffer(buf);
+ if ((*current == 0xD) || (*current == 0xA) ||
+ (*current == 0x9)) {
+ buf[len++] = 0x20;
+ current++;
+ } else
+ buf[len++] = *current++;
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
}
xmlFree(rep);
+ rep = NULL;
}
} else {
- if (len > buf_size - 10) {
- growBuffer(buf);
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
if (ent->content != NULL)
buf[len++] = ent->content[0];
@@ -3158,20 +4127,27 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
* entities problems
*/
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
- (ent->content != NULL)) {
- xmlChar *rep;
+ (ent->content != NULL) && (ent->checked == 0)) {
+ unsigned long oldnbent = ctxt->nbentities;
+
rep = xmlStringDecodeEntities(ctxt, ent->content,
- XML_SUBSTITUTE_REF, 0, 0, 0);
- if (rep != NULL)
+ XML_SUBSTITUTE_REF, 0, 0, 0);
+
+ ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
+ if (rep != NULL) {
+ if (xmlStrchr(rep, '<'))
+ ent->checked |= 1;
xmlFree(rep);
+ rep = NULL;
+ }
}
/*
* Just output the reference
*/
buf[len++] = '&';
- if (len > buf_size - i - 10) {
- growBuffer(buf);
+ while (len + i + 10 > buf_size) {
+ growBuffer(buf, i + 10);
}
for (;i > 0;i--)
buf[len++] = *cur++;
@@ -3183,8 +4159,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
if ((len != 0) || (!normalize)) {
if ((!normalize) || (!in_space)) {
COPY_BUF(l,buf,len,0x20);
- if (len > buf_size - 10) {
- growBuffer(buf);
+ while (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
}
in_space = 1;
@@ -3192,8 +4168,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
} else {
in_space = 0;
COPY_BUF(l,buf,len,c);
- if (len > buf_size - 10) {
- growBuffer(buf);
+ if (len + 10 > buf_size) {
+ growBuffer(buf, 10);
}
}
NEXTL(l);
@@ -3201,22 +4177,46 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
GROW;
c = CUR_CHAR(l);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto error;
+
if ((in_space) && (normalize)) {
- while (buf[len - 1] == 0x20) len--;
+ while ((len > 0) && (buf[len - 1] == 0x20)) len--;
}
buf[len] = 0;
if (RAW == '<') {
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
} else if (RAW != limit) {
- xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
- "AttValue: ' expected\n");
+ if ((c != 0) && (!IS_CHAR(c))) {
+ xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
+ "invalid character in attribute value\n");
+ } else {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue: ' expected\n");
+ }
} else
NEXT;
- if (attlen != NULL) *attlen = len;
+
+ /*
+ * There we potentially risk an overflow, don't allow attribute value of
+ * length more than INT_MAX it is a very reasonnable assumption !
+ */
+ if (len >= INT_MAX) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ goto mem_error;
+ }
+
+ if (attlen != NULL) *attlen = (int) len;
return(buf);
mem_error:
xmlErrMemory(ctxt, NULL);
+error:
+ if (buf != NULL)
+ xmlFree(buf);
+ if (rep != NULL)
+ xmlFree(rep);
return(NULL);
}
@@ -3233,20 +4233,20 @@ mem_error:
*
* 3.3.3 Attribute-Value Normalization:
* Before the value of an attribute is passed to the application or
- * checked for validity, the XML processor must normalize it as follows:
+ * checked for validity, the XML processor must normalize it as follows:
* - a character reference is processed by appending the referenced
* character to the attribute value
* - an entity reference is processed by recursively processing the
- * replacement text of the entity
+ * replacement text of the entity
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
* appending #x20 to the normalized value, except that only a single
* #x20 is appended for a "#xD#xA" sequence that is part of an external
- * parsed entity or the literal entity value of an internal parsed entity
- * - other characters are processed by appending them to the normalized value
+ * parsed entity or the literal entity value of an internal parsed entity
+ * - other characters are processed by appending them to the normalized value
* If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
- * (#x20) characters by a single space (#x20) character.
+ * (#x20) characters by a single space (#x20) character.
* All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA.
*
@@ -3263,7 +4263,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) {
/**
* xmlParseSystemLiteral:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Literal
*
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
@@ -3292,7 +4292,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
return(NULL);
}
-
+
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
@@ -3304,6 +4304,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
if (len + 5 >= size) {
xmlChar *tmp;
+ if ((size > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
+ xmlFree(buf);
+ ctxt->instate = (xmlParserInputState) state;
+ return(NULL);
+ }
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
@@ -3318,6 +4325,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
@@ -3381,6 +4392,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
if (len + 1 >= size) {
xmlChar *tmp;
+ if ((size > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
+ xmlFree(buf);
+ return(NULL);
+ }
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
@@ -3395,6 +4412,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
NEXT;
cur = CUR;
@@ -3414,7 +4435,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
return(buf);
}
-void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
+static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
/*
* used for the test in the inner loop of the char data testing
@@ -3465,7 +4486,7 @@ static const unsigned char test_char_data[256] = {
* The right angle bracket (>) may be represented using the string "&gt;",
* and must, for compatibility, be escaped using "&gt;" or a character
* reference when it appears in the string "]]>" in content, when that
- * string is not marking the end of a CDATA section.
+ * string is not marking the end of a CDATA section.
*
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
*/
@@ -3488,7 +4509,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
in = ctxt->input->cur;
do {
get_more_space:
- while (*in == 0x20) in++;
+ while (*in == 0x20) { in++; ctxt->input->col++; }
if (*in == 0xA) {
do {
ctxt->input->line++; ctxt->input->col = 1;
@@ -3578,6 +4599,9 @@ get_more:
line = ctxt->input->line;
col = ctxt->input->col;
}
+ /* something really bad happened in the SAX callback */
+ if (ctxt->instate != XML_PARSER_CONTENT)
+ return;
}
ctxt->input->cur = in;
if (*in == 0xD) {
@@ -3598,6 +4622,8 @@ get_more:
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
in = ctxt->input->cur;
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
nbchar = 0;
@@ -3616,7 +4642,7 @@ get_more:
* of xmlParseCharData() when the parsing requires handling
* of non-ASCII characters.
*/
-void
+static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
int nbchar = 0;
@@ -3627,7 +4653,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
GROW;
cur = CUR_CHAR(l);
while ((cur != '<') && /* checked */
- (cur != '&') &&
+ (cur != '&') &&
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
if ((cur == ']') && (NXT(1) == ']') &&
(NXT(2) == '>')) {
@@ -3658,11 +4684,16 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
}
}
nbchar = 0;
+ /* something really bad happened in the SAX callback */
+ if (ctxt->instate != XML_PARSER_CONTENT)
+ return;
}
count++;
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
}
NEXTL(l);
cur = CUR_CHAR(l);
@@ -3755,7 +4786,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
}
} else {
/*
- * We handle [83] so we return immediately, if
+ * We handle [83] so we return immediately, if
* "S SystemLiteral" is not detected. From a purely parsing
* point of view that's a nice mess.
*/
@@ -3764,7 +4795,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
ptr = CUR_PTR;
if (!IS_BLANK_CH(*ptr)) return(NULL);
-
+
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
}
@@ -3792,12 +4823,15 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
static void
-xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
+xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
+ size_t len, size_t size) {
int q, ql;
int r, rl;
int cur, l;
- xmlParserInputPtr input = ctxt->input;
- int count = 0;
+ size_t count = 0;
+ int inputid;
+
+ inputid = ctxt->input->id;
if (buf == NULL) {
len = 0;
@@ -3808,13 +4842,28 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
return;
}
}
+ GROW; /* Assure there's enough input data */
q = CUR_CHAR(ql);
if (q == 0)
goto not_terminated;
+ if (!IS_CHAR(q)) {
+ xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
+ "xmlParseComment: invalid xmlChar value %d\n",
+ q);
+ xmlFree (buf);
+ return;
+ }
NEXTL(ql);
r = CUR_CHAR(rl);
if (r == 0)
goto not_terminated;
+ if (!IS_CHAR(r)) {
+ xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
+ "xmlParseComment: invalid xmlChar value %d\n",
+ q);
+ xmlFree (buf);
+ return;
+ }
NEXTL(rl);
cur = CUR_CHAR(l);
if (cur == 0)
@@ -3825,16 +4874,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
if ((r == '-') && (q == '-')) {
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
if (len + 5 >= size) {
xmlChar *new_buf;
- size *= 2;
- new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ size_t new_size;
+
+ new_size = size * 2;
+ new_buf = (xmlChar *) xmlRealloc(buf, new_size);
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
return;
}
buf = new_buf;
+ size = new_size;
}
COPY_BUF(ql,buf,len,q);
q = r;
@@ -3846,6 +4905,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
}
NEXTL(l);
cur = CUR_CHAR(l);
@@ -3856,12 +4919,15 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
}
}
buf[len] = 0;
- if (!IS_CHAR(cur)) {
+ if (cur == 0) {
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated \n<!--%.50s\n", buf);
- xmlFree(buf);
+ } else if (!IS_CHAR(cur)) {
+ xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
+ "xmlParseComment: invalid xmlChar value %d\n",
+ cur);
} else {
- if (input != ctxt->input) {
+ if (inputid != ctxt->input->id) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Comment doesn't start and stop in the same entity\n");
}
@@ -3869,14 +4935,16 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->comment(ctxt->userData, buf);
- xmlFree(buf);
}
+ xmlFree(buf);
return;
not_terminated:
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated\n", NULL);
xmlFree(buf);
+ return;
}
+
/**
* xmlParseComment:
* @ctxt: an XML parser context
@@ -3890,20 +4958,22 @@ not_terminated:
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
- int size = XML_PARSER_BUFFER_SIZE;
- int len = 0;
+ size_t size = XML_PARSER_BUFFER_SIZE;
+ size_t len = 0;
xmlParserInputState state;
const xmlChar *in;
- int nbchar = 0, ccol;
+ size_t nbchar = 0;
+ int ccol;
+ int inputid;
/*
* Check that there is a comment right here.
*/
if ((RAW != '<') || (NXT(1) != '!') ||
(NXT(2) != '-') || (NXT(3) != '-')) return;
-
state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
+ inputid = ctxt->input->id;
SKIP(4);
SHRINK;
GROW;
@@ -3973,6 +5043,13 @@ get_more:
buf[len] = 0;
}
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
ctxt->input->cur = in;
if (*in == 0xA) {
in++;
@@ -3990,10 +5067,18 @@ get_more:
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
in = ctxt->input->cur;
if (*in == '-') {
if (in[1] == '-') {
if (in[2] == '>') {
+ if (ctxt->input->id != inputid) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
+ "comment doesn't start and stop in the same entity\n");
+ }
SKIP(3);
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
(!ctxt->disableSAX)) {
@@ -4004,16 +5089,18 @@ get_more:
}
if (buf != NULL)
xmlFree(buf);
- ctxt->instate = state;
+ if (ctxt->instate != XML_PARSER_EOF)
+ ctxt->instate = state;
return;
}
- if (buf != NULL)
- xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
- "Comment not terminated \n<!--%.50s\n",
+ if (buf != NULL) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
+ "Double hyphen within comment: "
+ "<!--%.50s\n",
buf);
- else
- xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
- "Comment not terminated \n", NULL);
+ } else
+ xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
+ "Double hyphen within comment\n", NULL);
in++;
ctxt->input->col++;
}
@@ -4031,7 +5118,7 @@ get_more:
/**
* xmlParsePITarget:
* @ctxt: an XML parser context
- *
+ *
* parse the name of a PI
*
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
@@ -4067,6 +5154,10 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
"xmlParsePITarget: invalid name prefix 'xml'\n",
NULL, NULL);
}
+ if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
+ }
return(name);
}
@@ -4075,7 +5166,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
* xmlParseCatalogPI:
* @ctxt: an XML parser context
* @catalog: the PI value string
- *
+ *
* parse an XML Catalog Processing Instruction.
*
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
@@ -4135,7 +5226,7 @@ error:
/**
* xmlParsePI:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Processing Instruction.
*
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
@@ -4146,8 +5237,8 @@ error:
void
xmlParsePI(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
- int len = 0;
- int size = XML_PARSER_BUFFER_SIZE;
+ size_t len = 0;
+ size_t size = XML_PARSER_BUFFER_SIZE;
int cur, l;
const xmlChar *target;
xmlParserInputState state;
@@ -4183,7 +5274,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
(ctxt->sax->processingInstruction != NULL))
ctxt->sax->processingInstruction(ctxt->userData,
target, NULL);
- ctxt->instate = state;
+ if (ctxt->instate != XML_PARSER_EOF)
+ ctxt->instate = state;
return;
}
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
@@ -4203,9 +5295,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
((cur != '?') || (NXT(1) != '>'))) {
if (len + 5 >= size) {
xmlChar *tmp;
-
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ size_t new_size = size * 2;
+ tmp = (xmlChar *) xmlRealloc(buf, new_size);
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFree(buf);
@@ -4213,11 +5304,24 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
return;
}
buf = tmp;
+ size = new_size;
}
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "PI %s too big found", target);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
@@ -4228,6 +5332,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
cur = CUR_CHAR(l);
}
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "PI %s too big found", target);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
buf[len] = 0;
if (cur != '?') {
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
@@ -4263,7 +5375,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
} else {
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
}
- ctxt->instate = state;
+ if (ctxt->instate != XML_PARSER_EOF)
+ ctxt->instate = state;
}
}
@@ -4288,7 +5401,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
const xmlChar *name;
xmlChar *Pubid;
xmlChar *Systemid;
-
+
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
xmlParserInputPtr input = ctxt->input;
SHRINK;
@@ -4310,6 +5423,11 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
"Space required after the NOTATION name'\n");
return;
}
+ if (xmlStrchr(name, ':') != NULL) {
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ "colons are forbidden from notation names '%s'\n",
+ name, NULL, NULL);
+ }
SKIP_BLANKS;
/*
@@ -4366,7 +5484,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
int isParameter = 0;
xmlChar *orig = NULL;
int skipped;
-
+
/* GROW; done in the caller */
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
xmlParserInputPtr input = ctxt->input;
@@ -4394,6 +5512,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
"xmlParseEntityDecl: no name\n");
return;
}
+ if (xmlStrchr(name, ':') != NULL) {
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ "colons are forbidden from entities names '%s'\n",
+ name, NULL, NULL);
+ }
skipped = SKIP_BLANKS;
if (skipped == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
@@ -4465,6 +5588,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
if (ctxt->myDoc == NULL) {
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
+ if (ctxt->myDoc == NULL) {
+ xmlErrMemory(ctxt, "New Doc failed");
+ return;
+ }
+ ctxt->myDoc->properties = XML_DOC_INTERNAL;
}
if (ctxt->myDoc->intSubset == NULL)
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
@@ -4533,6 +5661,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
if (ctxt->myDoc == NULL) {
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
+ if (ctxt->myDoc == NULL) {
+ xmlErrMemory(ctxt, "New Doc failed");
+ return;
+ }
+ ctxt->myDoc->properties = XML_DOC_INTERNAL;
}
if (ctxt->myDoc->intSubset == NULL)
@@ -4545,10 +5678,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
}
}
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
SKIP_BLANKS;
if (RAW != '>') {
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
"xmlParseEntityDecl: entity %s not terminated\n", name);
+ xmlHaltParser(ctxt);
} else {
if (input != ctxt->input) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
@@ -4608,13 +5744,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
*
* [ VC: Fixed Attribute Default ]
* if an attribute has a default value declared with the #FIXED
- * keyword, instances of that attribute must match the default value.
+ * keyword, instances of that attribute must match the default value.
*
* [ WFC: No < in Attribute Values ]
* handled in xmlParseAttValue()
*
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
- * or XML_ATTRIBUTE_FIXED.
+ * or XML_ATTRIBUTE_FIXED.
*/
int
@@ -4663,7 +5799,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
*
* [ VC: Notation Attributes ]
* Values of this type must match one of the notation names included
- * in the declaration; all notation names in the declaration must be declared.
+ * in the declaration; all notation names in the declaration must be declared.
*
* Returns: the notation attribute tree built while parsing
*/
@@ -4671,7 +5807,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
const xmlChar *name;
- xmlEnumerationPtr ret = NULL, last = NULL, cur;
+ xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
if (RAW != '(') {
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
@@ -4685,22 +5821,39 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) {
if (name == NULL) {
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
"Name expected in NOTATION declaration\n");
- return(ret);
+ xmlFreeEnumeration(ret);
+ return(NULL);
}
- cur = xmlCreateEnumeration(name);
- if (cur == NULL) return(ret);
- if (last == NULL) ret = last = cur;
- else {
- last->next = cur;
- last = cur;
+ tmp = ret;
+ while (tmp != NULL) {
+ if (xmlStrEqual(name, tmp->name)) {
+ xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
+ "standalone: attribute notation value token %s duplicated\n",
+ name, NULL);
+ if (!xmlDictOwns(ctxt->dict, name))
+ xmlFree((xmlChar *) name);
+ break;
+ }
+ tmp = tmp->next;
+ }
+ if (tmp == NULL) {
+ cur = xmlCreateEnumeration(name);
+ if (cur == NULL) {
+ xmlFreeEnumeration(ret);
+ return(NULL);
+ }
+ if (last == NULL) ret = last = cur;
+ else {
+ last->next = cur;
+ last = cur;
+ }
}
SKIP_BLANKS;
} while (RAW == '|');
if (RAW != ')') {
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
- if ((last != NULL) && (last != ret))
- xmlFreeEnumeration(last);
- return(ret);
+ xmlFreeEnumeration(ret);
+ return(NULL);
}
NEXT;
return(ret);
@@ -4724,7 +5877,7 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) {
xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
xmlChar *name;
- xmlEnumerationPtr ret = NULL, last = NULL, cur;
+ xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
if (RAW != '(') {
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
@@ -4739,13 +5892,31 @@ xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
return(ret);
}
- cur = xmlCreateEnumeration(name);
- xmlFree(name);
- if (cur == NULL) return(ret);
- if (last == NULL) ret = last = cur;
- else {
- last->next = cur;
- last = cur;
+ tmp = ret;
+ while (tmp != NULL) {
+ if (xmlStrEqual(name, tmp->name)) {
+ xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
+ "standalone: attribute enumeration value token %s duplicated\n",
+ name, NULL);
+ if (!xmlDictOwns(ctxt->dict, name))
+ xmlFree(name);
+ break;
+ }
+ tmp = tmp->next;
+ }
+ if (tmp == NULL) {
+ cur = xmlCreateEnumeration(name);
+ if (!xmlDictOwns(ctxt->dict, name))
+ xmlFree(name);
+ if (cur == NULL) {
+ xmlFreeEnumeration(ret);
+ return(NULL);
+ }
+ if (last == NULL) ret = last = cur;
+ else {
+ last->next = cur;
+ last = cur;
+ }
}
SKIP_BLANKS;
} while (RAW == '|');
@@ -4828,15 +5999,15 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
* [ VC: Entity Name ]
* Values of type ENTITY must match the Name production, values
* of type ENTITIES must match Names; each Entity Name must match the
- * name of an unparsed entity declared in the DTD.
+ * name of an unparsed entity declared in the DTD.
*
* [ VC: Name Token ]
* Values of type NMTOKEN must match the Nmtoken production; values
- * of type NMTOKENS must match Nmtokens.
+ * of type NMTOKENS must match Nmtokens.
*
* Returns the attribute type
*/
-int
+int
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
SHRINK;
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
@@ -4901,7 +6072,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
}
SKIP_BLANKS;
GROW;
- while (RAW != '>') {
+ while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
int type;
int def;
@@ -4946,6 +6117,8 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
xmlFreeEnumeration(tree);
break;
}
+ if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
+ xmlAttrNormalizeSpace(defaultValue, defaultValue);
GROW;
if (RAW != '>') {
@@ -4977,11 +6150,11 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
xmlFreeEnumeration(tree);
if ((ctxt->sax2) && (defaultValue != NULL) &&
- (def != XML_ATTRIBUTE_IMPLIED) &&
+ (def != XML_ATTRIBUTE_IMPLIED) &&
(def != XML_ATTRIBUTE_REQUIRED)) {
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
}
- if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
+ if (ctxt->sax2) {
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
}
if (defaultValue != NULL)
@@ -4990,8 +6163,9 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
}
if (RAW == '>') {
if (input != ctxt->input) {
- xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
- "Attribute list declaration doesn't start and stop in the same entity\n");
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
+ "Attribute list declaration doesn't start and stop in the same entity\n",
+ NULL, NULL);
}
NEXT;
}
@@ -5005,7 +6179,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
*
* parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
- *
+ *
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
* '(' S? '#PCDATA' S? ')'
*
@@ -5013,7 +6187,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
*
* [ VC: No Duplicate Types ]
* The same name must not appear more than once in a single
- * mixed-content declaration.
+ * mixed-content declaration.
*
* returns: the list of the xmlElementContentPtr describing the element choices
*/
@@ -5031,10 +6205,12 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start and stop in the same entity\n",
- NULL);
+ NULL, NULL);
}
NEXT;
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
+ if (ret == NULL)
+ return(NULL);
if (RAW == '*') {
ret->ocur = XML_ELEMENT_CONTENT_MULT;
NEXT;
@@ -5045,7 +6221,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
if (ret == NULL) return(NULL);
}
- while (RAW == '|') {
+ while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
NEXT;
if (elem == NULL) {
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
@@ -5083,11 +6259,12 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
if (cur->c2 != NULL)
cur->c2->parent = cur;
}
- ret->ocur = XML_ELEMENT_CONTENT_MULT;
+ if (ret != NULL)
+ ret->ocur = XML_ELEMENT_CONTENT_MULT;
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start and stop in the same entity\n",
- NULL);
+ NULL, NULL);
}
SKIP(2);
} else {
@@ -5103,13 +6280,14 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
}
/**
- * xmlParseElementChildrenContentDecl:
+ * xmlParseElementChildrenContentDeclPriv:
* @ctxt: an XML parser context
* @inputchk: the input used for the current entity, needed for boundary checks
+ * @depth: the level of recursion
*
* parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
- *
+ *
*
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
*
@@ -5130,15 +6308,23 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
* be empty, and neither the first nor last non-blank character of
* the replacement text should be a connector (| or ,).
*
- * Returns the tree of xmlElementContentPtr describing the element
+ * Returns the tree of xmlElementContentPtr describing the element
* hierarchy.
*/
-xmlElementContentPtr
-xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
+static xmlElementContentPtr
+xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
+ int depth) {
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
const xmlChar *elem;
xmlChar type = 0;
+ if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
+ (depth > 2048)) {
+ xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
+"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
+ depth);
+ return(NULL);
+ }
SKIP_BLANKS;
GROW;
if (RAW == '(') {
@@ -5147,7 +6333,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
/* Recurse on first child */
NEXT;
SKIP_BLANKS;
- cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
+ cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
+ depth + 1);
SKIP_BLANKS;
GROW;
} else {
@@ -5178,7 +6365,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
}
SKIP_BLANKS;
SHRINK;
- while (RAW != ')') {
+ while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
/*
* Each loop we parse one separator and one element.
*/
@@ -5265,6 +6452,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
}
} else {
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
+ if ((last != NULL) && (last != ret))
+ xmlFreeDocElementContent(ctxt->myDoc, last);
if (ret != NULL)
xmlFreeDocElementContent(ctxt->myDoc, ret);
return(NULL);
@@ -5277,7 +6466,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
/* Recurse on second child */
NEXT;
SKIP_BLANKS;
- last = xmlParseElementChildrenContentDecl(ctxt, inputid);
+ last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
+ depth + 1);
SKIP_BLANKS;
} else {
elem = xmlParseName(ctxt);
@@ -5288,6 +6478,11 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
return(NULL);
}
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
+ if (last == NULL) {
+ if (ret != NULL)
+ xmlFreeDocElementContent(ctxt->myDoc, ret);
+ return(NULL);
+ }
if (RAW == '?') {
last->ocur = XML_ELEMENT_CONTENT_OPT;
NEXT;
@@ -5312,7 +6507,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start and stop in the same entity\n",
- NULL);
+ NULL, NULL);
}
NEXT;
if (RAW == '?') {
@@ -5383,6 +6578,42 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
}
/**
+ * xmlParseElementChildrenContentDecl:
+ * @ctxt: an XML parser context
+ * @inputchk: the input used for the current entity, needed for boundary checks
+ *
+ * parse the declaration for a Mixed Element content
+ * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
+ *
+ * [47] children ::= (choice | seq) ('?' | '*' | '+')?
+ *
+ * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
+ *
+ * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
+ *
+ * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
+ *
+ * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
+ * TODO Parameter-entity replacement text must be properly nested
+ * with parenthesized groups. That is to say, if either of the
+ * opening or closing parentheses in a choice, seq, or Mixed
+ * construct is contained in the replacement text for a parameter
+ * entity, both must be contained in the same replacement text. For
+ * interoperability, if a parameter-entity reference appears in a
+ * choice, seq, or Mixed construct, its replacement text should not
+ * be empty, and neither the first nor last non-blank character of
+ * the replacement text should be a connector (| or ,).
+ *
+ * Returns the tree of xmlElementContentPtr describing the element
+ * hierarchy.
+ */
+xmlElementContentPtr
+xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
+ /* stub left for API/ABI compat */
+ return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
+}
+
+/**
* xmlParseElementContentDecl:
* @ctxt: an XML parser context
* @name: the name of the element being defined.
@@ -5390,7 +6621,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
*
* parse the declaration for an Element content either Mixed or Children,
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
- *
+ *
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
*
* returns: the type of element content XML_ELEMENT_TYPE_xxx
@@ -5413,12 +6644,14 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
}
NEXT;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
SKIP_BLANKS;
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
res = XML_ELEMENT_TYPE_MIXED;
} else {
- tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
+ tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
res = XML_ELEMENT_TYPE_ELEMENT;
}
SKIP_BLANKS;
@@ -5516,7 +6749,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element declaration doesn't start and stop in the same entity\n");
}
-
+
NEXT;
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
(ctxt->sax->elementDecl != NULL)) {
@@ -5528,7 +6761,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
/*
* this is a trick: if xmlAddElementDecl is called,
* instead of copying the full tree it is plugged directly
- * if called from the parser. Avoid duplicating the
+ * if called from the parser. Avoid duplicating the
* interfaces or change the API/ABI
*/
xmlFreeDocElementContent(ctxt->myDoc, content);
@@ -5545,8 +6778,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
* xmlParseConditionalSections
* @ctxt: an XML parser context
*
- * [61] conditionalSect ::= includeSect | ignoreSect
- * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+ * [61] conditionalSect ::= includeSect | ignoreSect
+ * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
@@ -5554,6 +6787,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
+ int id = ctxt->input->id;
+
SKIP(3);
SKIP_BLANKS;
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
@@ -5561,7 +6796,14 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS;
if (RAW != '[') {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
+ xmlHaltParser(ctxt);
+ return;
} else {
+ if (ctxt->input->id != id) {
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
+ "All markup of the conditional section is not in the same entity\n",
+ NULL, NULL);
+ }
NEXT;
}
if (xmlParserDebugEntities) {
@@ -5573,8 +6815,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
"Entering INCLUDE Conditional Section\n");
}
- while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
- (NXT(2) != '>'))) {
+ while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
+ (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -5616,7 +6858,14 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS;
if (RAW != '[') {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
+ xmlHaltParser(ctxt);
+ return;
} else {
+ if (ctxt->input->id != id) {
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
+ "All markup of the conditional section is not in the same entity\n",
+ NULL, NULL);
+ }
NEXT;
}
if (xmlParserDebugEntities) {
@@ -5637,7 +6886,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_IGNORE;
- while ((depth >= 0) && (RAW != 0)) {
+ while (((depth >= 0) && (RAW != 0)) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
depth++;
SKIP(3);
@@ -5665,6 +6915,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
} else {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
+ xmlHaltParser(ctxt);
+ return;
}
if (RAW == 0)
@@ -5673,14 +6925,21 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
if (RAW == 0) {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
} else {
- SKIP(3);
+ if (ctxt->input->id != id) {
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
+ "All markup of the conditional section is not in the same entity\n",
+ NULL, NULL);
+ }
+ if ((ctxt-> instate != XML_PARSER_EOF) &&
+ ((ctxt->input->cur + 3) <= ctxt->input->end))
+ SKIP(3);
}
}
/**
* xmlParseMarkupDecl:
* @ctxt: an XML parser context
- *
+ *
* parse Markup declarations
*
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
@@ -5697,7 +6956,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
* In the internal DTD subset, parameter-entity references can occur
* only where markup declarations can occur, not within markup declarations.
* (This does not apply to references that occur in external parameter
- * entities or to the external subset.)
+ * entities or to the external subset.)
*/
void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
@@ -5728,6 +6987,14 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
xmlParsePI(ctxt);
}
}
+
+ /*
+ * detect requirement to exit there and act accordingly
+ * and avoid having instate overriden later on
+ */
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
+
/*
* This is only for internal subset. On external entities,
* the replacement is done before parsing stage
@@ -5751,12 +7018,10 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
/**
* xmlParseTextDecl:
* @ctxt: an XML parser context
- *
+ *
* parse an XML declaration header for external entities
*
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
- *
- * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
*/
void
@@ -5828,7 +7093,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
* @ctxt: an XML parser context
* @ExternalID: the external identifier
* @SystemID: the system identifier (or URL)
- *
+ *
* parse Markup declarations from an external subset
*
* [30] extSubset ::= textDecl? extSubsetDecl
@@ -5840,18 +7105,38 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
const xmlChar *SystemID) {
xmlDetectSAX2(ctxt);
GROW;
+
+ if ((ctxt->encoding == NULL) &&
+ (ctxt->input->end - ctxt->input->cur >= 4)) {
+ xmlChar start[4];
+ xmlCharEncoding enc;
+
+ start[0] = RAW;
+ start[1] = NXT(1);
+ start[2] = NXT(2);
+ start[3] = NXT(3);
+ enc = xmlDetectCharEncoding(start, 4);
+ if (enc != XML_CHAR_ENCODING_NONE)
+ xmlSwitchEncoding(ctxt, enc);
+ }
+
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
xmlParseTextDecl(ctxt);
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
/*
* The XML REC instructs us to stop parsing right here
*/
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
return;
}
}
if (ctxt->myDoc == NULL) {
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
+ if (ctxt->myDoc == NULL) {
+ xmlErrMemory(ctxt, "New Doc failed");
+ return;
+ }
+ ctxt->myDoc->properties = XML_DOC_INTERNAL;
}
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
@@ -5885,7 +7170,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
break;
}
}
-
+
if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
}
@@ -5895,7 +7180,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
/**
* xmlParseReference:
* @ctxt: an XML parser context
- *
+ *
* parse and handle entity references in content, depending on the SAX
* interface, this may end-up in a call to character() if this is a
* CharRef, a predefined entity, if there is no reference() callback.
@@ -5907,14 +7192,25 @@ void
xmlParseReference(xmlParserCtxtPtr ctxt) {
xmlEntityPtr ent;
xmlChar *val;
- if (RAW != '&') return;
+ int was_checked;
+ xmlNodePtr list = NULL;
+ xmlParserErrors ret = XML_ERR_OK;
+
+
+ if (RAW != '&')
+ return;
+ /*
+ * Simple case of a CharRef
+ */
if (NXT(1) == '#') {
int i = 0;
xmlChar out[10];
int hex = NXT(2);
int value = xmlParseCharRef(ctxt);
-
+
+ if (value == 0)
+ return;
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
/*
* So we are using non-UTF-8 buffers
@@ -5946,359 +7242,367 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
(!ctxt->disableSAX))
ctxt->sax->characters(ctxt->userData, out, i);
}
- } else {
- int was_checked;
+ return;
+ }
- ent = xmlParseEntityRef(ctxt);
- if (ent == NULL) return;
- if (!ctxt->wellFormed)
- return;
- was_checked = ent->checked;
- if ((ent->name != NULL) &&
- (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
- xmlNodePtr list = NULL;
- xmlParserErrors ret = XML_ERR_OK;
+ /*
+ * We are seeing an entity reference
+ */
+ ent = xmlParseEntityRef(ctxt);
+ if (ent == NULL) return;
+ if (!ctxt->wellFormed)
+ return;
+ was_checked = ent->checked;
+
+ /* special case of predefined entities */
+ if ((ent->name == NULL) ||
+ (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
+ val = ent->content;
+ if (val == NULL) return;
+ /*
+ * inline the entity.
+ */
+ if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
+ (!ctxt->disableSAX))
+ ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
+ return;
+ }
+ /*
+ * The first reference to the entity trigger a parsing phase
+ * where the ent->children is filled with the result from
+ * the parsing.
+ * Note: external parsed entities will not be loaded, it is not
+ * required for a non-validating parser, unless the parsing option
+ * of validating, or substituting entities were given. Doing so is
+ * far more secure as the parser will only process data coming from
+ * the document entity by default.
+ */
+ if (((ent->checked == 0) ||
+ ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
+ ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
+ (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
+ unsigned long oldnbent = ctxt->nbentities;
- /*
- * The first reference to the entity trigger a parsing phase
- * where the ent->children is filled with the result from
- * the parsing.
- */
- if (ent->checked == 0) {
- xmlChar *value;
+ /*
+ * This is a bit hackish but this seems the best
+ * way to make sure both SAX and DOM entity support
+ * behaves okay.
+ */
+ void *user_data;
+ if (ctxt->userData == ctxt)
+ user_data = NULL;
+ else
+ user_data = ctxt->userData;
- value = ent->content;
+ /*
+ * Check that this entity is well formed
+ * 4.3.2: An internal general parsed entity is well-formed
+ * if its replacement text matches the production labeled
+ * content.
+ */
+ if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
+ ctxt->depth++;
+ ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
+ user_data, &list);
+ ctxt->depth--;
+
+ } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
+ ctxt->depth++;
+ ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
+ user_data, ctxt->depth, ent->URI,
+ ent->ExternalID, &list);
+ ctxt->depth--;
+ } else {
+ ret = XML_ERR_ENTITY_PE_INTERNAL;
+ xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "invalid entity type found\n", NULL);
+ }
- /*
- * Check that this entity is well formed
- */
- if ((value != NULL) && (value[0] != 0) &&
- (value[1] == 0) && (value[0] == '<') &&
- (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
- /*
- * DONE: get definite answer on this !!!
- * Lots of entity decls are used to declare a single
- * char
- * <!ENTITY lt "<">
- * Which seems to be valid since
- * 2.4: The ampersand character (&) and the left angle
- * bracket (<) may appear in their literal form only
- * when used ... They are also legal within the literal
- * entity value of an internal entity declaration;i
- * see "4.3.2 Well-Formed Parsed Entities".
- * IMHO 2.4 and 4.3.2 are directly in contradiction.
- * Looking at the OASIS test suite and James Clark
- * tests, this is broken. However the XML REC uses
- * it. Is the XML REC not well-formed ????
- * This is a hack to avoid this problem
- *
- * ANSWER: since lt gt amp .. are already defined,
- * this is a redefinition and hence the fact that the
- * content is not well balanced is not a Wf error, this
- * is lousy but acceptable.
- */
- list = xmlNewDocText(ctxt->myDoc, value);
- if (list != NULL) {
- if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
- (ent->children == NULL)) {
- ent->children = list;
- ent->last = list;
- ent->owner = 1;
- list->parent = (xmlNodePtr) ent;
- } else {
- xmlFreeNodeList(list);
- }
- } else if (list != NULL) {
- xmlFreeNodeList(list);
- }
- } else {
- /*
- * 4.3.2: An internal general parsed entity is well-formed
- * if its replacement text matches the production labeled
- * content.
- */
+ /*
+ * Store the number of entities needing parsing for this entity
+ * content and do checkings
+ */
+ ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
+ if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
+ ent->checked |= 1;
+ if (ret == XML_ERR_ENTITY_LOOP) {
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
+ xmlFreeNodeList(list);
+ return;
+ }
+ if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
+ xmlFreeNodeList(list);
+ return;
+ }
- void *user_data;
+ if ((ret == XML_ERR_OK) && (list != NULL)) {
+ if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
+ (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
+ (ent->children == NULL)) {
+ ent->children = list;
+ if (ctxt->replaceEntities) {
/*
- * This is a bit hackish but this seems the best
- * way to make sure both SAX and DOM entity support
- * behaves okay.
+ * Prune it directly in the generated document
+ * except for single text nodes.
*/
- if (ctxt->userData == ctxt)
- user_data = NULL;
- else
- user_data = ctxt->userData;
-
- if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
- ctxt->depth++;
- ret = xmlParseBalancedChunkMemoryInternal(ctxt,
- value, user_data, &list);
- ctxt->depth--;
- } else if (ent->etype ==
- XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
- ctxt->depth++;
- ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
- ctxt->sax, user_data, ctxt->depth,
- ent->URI, ent->ExternalID, &list);
- ctxt->depth--;
+ if (((list->type == XML_TEXT_NODE) &&
+ (list->next == NULL)) ||
+ (ctxt->parseMode == XML_PARSE_READER)) {
+ list->parent = (xmlNodePtr) ent;
+ list = NULL;
+ ent->owner = 1;
} else {
- ret = XML_ERR_ENTITY_PE_INTERNAL;
- xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
- "invalid entity type found\n", NULL);
- }
- if (ret == XML_ERR_ENTITY_LOOP) {
- xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
- return;
- } else if ((ret == XML_ERR_OK) && (list != NULL)) {
- if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
- (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
- (ent->children == NULL)) {
- ent->children = list;
- if (ctxt->replaceEntities) {
- /*
- * Prune it directly in the generated document
- * except for single text nodes.
- */
- if (((list->type == XML_TEXT_NODE) &&
- (list->next == NULL)) ||
- (ctxt->parseMode == XML_PARSE_READER)) {
- list->parent = (xmlNodePtr) ent;
- list = NULL;
- ent->owner = 1;
- } else {
- ent->owner = 0;
- while (list != NULL) {
- list->parent = (xmlNodePtr) ctxt->node;
- list->doc = ctxt->myDoc;
- if (list->next == NULL)
- ent->last = list;
- list = list->next;
- }
- list = ent->children;
+ ent->owner = 0;
+ while (list != NULL) {
+ list->parent = (xmlNodePtr) ctxt->node;
+ list->doc = ctxt->myDoc;
+ if (list->next == NULL)
+ ent->last = list;
+ list = list->next;
+ }
+ list = ent->children;
#ifdef LIBXML_LEGACY_ENABLED
- if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
- xmlAddEntityReference(ent, list, NULL);
+ if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
+ xmlAddEntityReference(ent, list, NULL);
#endif /* LIBXML_LEGACY_ENABLED */
- }
- } else {
- ent->owner = 1;
- while (list != NULL) {
- list->parent = (xmlNodePtr) ent;
- if (list->next == NULL)
- ent->last = list;
- list = list->next;
- }
- }
- } else {
- xmlFreeNodeList(list);
- list = NULL;
- }
- } else if ((ret != XML_ERR_OK) &&
- (ret != XML_WAR_UNDECLARED_ENTITY)) {
- xmlFatalErr(ctxt, ret, NULL);
- } else if (list != NULL) {
- xmlFreeNodeList(list);
- list = NULL;
+ }
+ } else {
+ ent->owner = 1;
+ while (list != NULL) {
+ list->parent = (xmlNodePtr) ent;
+ xmlSetTreeDoc(list, ent->doc);
+ if (list->next == NULL)
+ ent->last = list;
+ list = list->next;
}
}
- ent->checked = 1;
+ } else {
+ xmlFreeNodeList(list);
+ list = NULL;
}
+ } else if ((ret != XML_ERR_OK) &&
+ (ret != XML_WAR_UNDECLARED_ENTITY)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
+ "Entity '%s' failed to parse\n", ent->name);
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
+ } else if (list != NULL) {
+ xmlFreeNodeList(list);
+ list = NULL;
+ }
+ if (ent->checked == 0)
+ ent->checked = 2;
+ } else if (ent->checked != 1) {
+ ctxt->nbentities += ent->checked / 2;
+ }
- if (ent->children == NULL) {
- /*
- * Probably running in SAX mode and the callbacks don't
- * build the entity content. So unless we already went
- * though parsing for first checking go though the entity
- * content to generate callbacks associated to the entity
- */
- if (was_checked == 1) {
- void *user_data;
- /*
- * This is a bit hackish but this seems the best
- * way to make sure both SAX and DOM entity support
- * behaves okay.
- */
- if (ctxt->userData == ctxt)
- user_data = NULL;
- else
- user_data = ctxt->userData;
-
- if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
- ctxt->depth++;
- ret = xmlParseBalancedChunkMemoryInternal(ctxt,
- ent->content, user_data, NULL);
- ctxt->depth--;
- } else if (ent->etype ==
- XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
- ctxt->depth++;
- ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
- ctxt->sax, user_data, ctxt->depth,
- ent->URI, ent->ExternalID, NULL);
- ctxt->depth--;
- } else {
- ret = XML_ERR_ENTITY_PE_INTERNAL;
- xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
- "invalid entity type found\n", NULL);
- }
- if (ret == XML_ERR_ENTITY_LOOP) {
- xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
- return;
- }
- }
- if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
- (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
- /*
- * Entity reference callback comes second, it's somewhat
- * superfluous but a compatibility to historical behaviour
- */
- ctxt->sax->reference(ctxt->userData, ent->name);
- }
+ /*
+ * Now that the entity content has been gathered
+ * provide it to the application, this can take different forms based
+ * on the parsing modes.
+ */
+ if (ent->children == NULL) {
+ /*
+ * Probably running in SAX mode and the callbacks don't
+ * build the entity content. So unless we already went
+ * though parsing for first checking go though the entity
+ * content to generate callbacks associated to the entity
+ */
+ if (was_checked != 0) {
+ void *user_data;
+ /*
+ * This is a bit hackish but this seems the best
+ * way to make sure both SAX and DOM entity support
+ * behaves okay.
+ */
+ if (ctxt->userData == ctxt)
+ user_data = NULL;
+ else
+ user_data = ctxt->userData;
+
+ if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
+ ctxt->depth++;
+ ret = xmlParseBalancedChunkMemoryInternal(ctxt,
+ ent->content, user_data, NULL);
+ ctxt->depth--;
+ } else if (ent->etype ==
+ XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
+ ctxt->depth++;
+ ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
+ ctxt->sax, user_data, ctxt->depth,
+ ent->URI, ent->ExternalID, NULL);
+ ctxt->depth--;
+ } else {
+ ret = XML_ERR_ENTITY_PE_INTERNAL;
+ xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "invalid entity type found\n", NULL);
+ }
+ if (ret == XML_ERR_ENTITY_LOOP) {
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
return;
}
- if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
- (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
+ }
+ if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
+ (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
+ /*
+ * Entity reference callback comes second, it's somewhat
+ * superfluous but a compatibility to historical behaviour
+ */
+ ctxt->sax->reference(ctxt->userData, ent->name);
+ }
+ return;
+ }
+
+ /*
+ * If we didn't get any children for the entity being built
+ */
+ if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
+ (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
+ /*
+ * Create a node.
+ */
+ ctxt->sax->reference(ctxt->userData, ent->name);
+ return;
+ }
+
+ if ((ctxt->replaceEntities) || (ent->children == NULL)) {
+ /*
+ * There is a problem on the handling of _private for entities
+ * (bug 155816): Should we copy the content of the field from
+ * the entity (possibly overwriting some value set by the user
+ * when a copy is created), should we leave it alone, or should
+ * we try to take care of different situations? The problem
+ * is exacerbated by the usage of this field by the xmlReader.
+ * To fix this bug, we look at _private on the created node
+ * and, if it's NULL, we copy in whatever was in the entity.
+ * If it's not NULL we leave it alone. This is somewhat of a
+ * hack - maybe we should have further tests to determine
+ * what to do.
+ */
+ if ((ctxt->node != NULL) && (ent->children != NULL)) {
+ /*
+ * Seems we are generating the DOM content, do
+ * a simple tree copy for all references except the first
+ * In the first occurrence list contains the replacement.
+ */
+ if (((list == NULL) && (ent->owner == 0)) ||
+ (ctxt->parseMode == XML_PARSE_READER)) {
+ xmlNodePtr nw = NULL, cur, firstChild = NULL;
+
/*
- * Create a node.
+ * We are copying here, make sure there is no abuse
*/
- ctxt->sax->reference(ctxt->userData, ent->name);
- return;
- }
- if ((ctxt->replaceEntities) || (ent->children == NULL)) {
+ ctxt->sizeentcopy += ent->length + 5;
+ if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
+ return;
+
/*
- * There is a problem on the handling of _private for entities
- * (bug 155816): Should we copy the content of the field from
- * the entity (possibly overwriting some value set by the user
- * when a copy is created), should we leave it alone, or should
- * we try to take care of different situations? The problem
- * is exacerbated by the usage of this field by the xmlReader.
- * To fix this bug, we look at _private on the created node
- * and, if it's NULL, we copy in whatever was in the entity.
- * If it's not NULL we leave it alone. This is somewhat of a
- * hack - maybe we should have further tests to determine
- * what to do.
+ * when operating on a reader, the entities definitions
+ * are always owning the entities subtree.
+ if (ctxt->parseMode == XML_PARSE_READER)
+ ent->owner = 1;
*/
- if ((ctxt->node != NULL) && (ent->children != NULL)) {
- /*
- * Seems we are generating the DOM content, do
- * a simple tree copy for all references except the first
- * In the first occurrence list contains the replacement.
- * progressive == 2 means we are operating on the Reader
- * and since nodes are discarded we must copy all the time.
- */
- if (((list == NULL) && (ent->owner == 0)) ||
- (ctxt->parseMode == XML_PARSE_READER)) {
- xmlNodePtr nw = NULL, cur, firstChild = NULL;
+ cur = ent->children;
+ while (cur != NULL) {
+ nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
+ if (nw != NULL) {
+ if (nw->_private == NULL)
+ nw->_private = cur->_private;
+ if (firstChild == NULL){
+ firstChild = nw;
+ }
+ nw = xmlAddChild(ctxt->node, nw);
+ }
+ if (cur == ent->last) {
/*
- * when operating on a reader, the entities definitions
- * are always owning the entities subtree.
- if (ctxt->parseMode == XML_PARSE_READER)
- ent->owner = 1;
+ * needed to detect some strange empty
+ * node cases in the reader tests
*/
+ if ((ctxt->parseMode == XML_PARSE_READER) &&
+ (nw != NULL) &&
+ (nw->type == XML_ELEMENT_NODE) &&
+ (nw->children == NULL))
+ nw->extra = 1;
- cur = ent->children;
- while (cur != NULL) {
- nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
- if (nw != NULL) {
- if (nw->_private == NULL)
- nw->_private = cur->_private;
- if (firstChild == NULL){
- firstChild = nw;
- }
- nw = xmlAddChild(ctxt->node, nw);
- }
- if (cur == ent->last) {
- /*
- * needed to detect some strange empty
- * node cases in the reader tests
- */
- if ((ctxt->parseMode == XML_PARSE_READER) &&
- (nw != NULL) &&
- (nw->type == XML_ELEMENT_NODE) &&
- (nw->children == NULL))
- nw->extra = 1;
-
- break;
- }
- cur = cur->next;
- }
+ break;
+ }
+ cur = cur->next;
+ }
#ifdef LIBXML_LEGACY_ENABLED
- if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
- xmlAddEntityReference(ent, firstChild, nw);
+ if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
+ xmlAddEntityReference(ent, firstChild, nw);
#endif /* LIBXML_LEGACY_ENABLED */
- } else if (list == NULL) {
- xmlNodePtr nw = NULL, cur, next, last,
- firstChild = NULL;
- /*
- * Copy the entity child list and make it the new
- * entity child list. The goal is to make sure any
- * ID or REF referenced will be the one from the
- * document content and not the entity copy.
- */
- cur = ent->children;
- ent->children = NULL;
- last = ent->last;
- ent->last = NULL;
- while (cur != NULL) {
- next = cur->next;
- cur->next = NULL;
- cur->parent = NULL;
- nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
- if (nw != NULL) {
- if (nw->_private == NULL)
- nw->_private = cur->_private;
- if (firstChild == NULL){
- firstChild = cur;
- }
- xmlAddChild((xmlNodePtr) ent, nw);
- xmlAddChild(ctxt->node, cur);
- }
- if (cur == last)
- break;
- cur = next;
+ } else if ((list == NULL) || (ctxt->inputNr > 0)) {
+ xmlNodePtr nw = NULL, cur, next, last,
+ firstChild = NULL;
+
+ /*
+ * We are copying here, make sure there is no abuse
+ */
+ ctxt->sizeentcopy += ent->length + 5;
+ if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
+ return;
+
+ /*
+ * Copy the entity child list and make it the new
+ * entity child list. The goal is to make sure any
+ * ID or REF referenced will be the one from the
+ * document content and not the entity copy.
+ */
+ cur = ent->children;
+ ent->children = NULL;
+ last = ent->last;
+ ent->last = NULL;
+ while (cur != NULL) {
+ next = cur->next;
+ cur->next = NULL;
+ cur->parent = NULL;
+ nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
+ if (nw != NULL) {
+ if (nw->_private == NULL)
+ nw->_private = cur->_private;
+ if (firstChild == NULL){
+ firstChild = cur;
}
- ent->owner = 1;
+ xmlAddChild((xmlNodePtr) ent, nw);
+ xmlAddChild(ctxt->node, cur);
+ }
+ if (cur == last)
+ break;
+ cur = next;
+ }
+ if (ent->owner == 0)
+ ent->owner = 1;
#ifdef LIBXML_LEGACY_ENABLED
- if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
- xmlAddEntityReference(ent, firstChild, nw);
+ if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
+ xmlAddEntityReference(ent, firstChild, nw);
#endif /* LIBXML_LEGACY_ENABLED */
- } else {
- const xmlChar *nbktext;
-
- /*
- * the name change is to avoid coalescing of the
- * node with a possible previous text one which
- * would make ent->children a dangling pointer
- */
- nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
- -1);
- if (ent->children->type == XML_TEXT_NODE)
- ent->children->name = nbktext;
- if ((ent->last != ent->children) &&
- (ent->last->type == XML_TEXT_NODE))
- ent->last->name = nbktext;
- xmlAddChildList(ctxt->node, ent->children);
- }
+ } else {
+ const xmlChar *nbktext;
- /*
- * This is to avoid a nasty side effect, see
- * characters() in SAX.c
- */
- ctxt->nodemem = 0;
- ctxt->nodelen = 0;
- return;
- }
+ /*
+ * the name change is to avoid coalescing of the
+ * node with a possible previous text one which
+ * would make ent->children a dangling pointer
+ */
+ nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
+ -1);
+ if (ent->children->type == XML_TEXT_NODE)
+ ent->children->name = nbktext;
+ if ((ent->last != ent->children) &&
+ (ent->last->type == XML_TEXT_NODE))
+ ent->last->name = nbktext;
+ xmlAddChildList(ctxt->node, ent->children);
}
- } else {
- val = ent->content;
- if (val == NULL) return;
+
/*
- * inline the entity.
+ * This is to avoid a nasty side effect, see
+ * characters() in SAX.c
*/
- if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
+ ctxt->nodemem = 0;
+ ctxt->nodelen = 0;
+ return;
}
}
}
@@ -6337,132 +7641,153 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
xmlEntityPtr ent = NULL;
GROW;
-
- if (RAW == '&') {
- NEXT;
- name = xmlParseName(ctxt);
- if (name == NULL) {
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
- "xmlParseEntityRef: no name\n");
- } else {
- if (RAW == ';') {
- NEXT;
- /*
- * Ask first SAX for entity resolution, otherwise try the
- * predefined set.
- */
- if (ctxt->sax != NULL) {
- if (ctxt->sax->getEntity != NULL)
- ent = ctxt->sax->getEntity(ctxt->userData, name);
- if ((ctxt->wellFormed == 1 ) && (ent == NULL))
- ent = xmlGetPredefinedEntity(name);
- if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
- (ctxt->userData==ctxt)) {
- ent = xmlSAX2GetEntity(ctxt, name);
- }
- }
- /*
- * [ WFC: Entity Declared ]
- * In a document without any DTD, a document with only an
- * internal DTD subset which contains no parameter entity
- * references, or a document with "standalone='yes'", the
- * Name given in the entity reference must match that in an
- * entity declaration, except that well-formed documents
- * need not declare any of the following entities: amp, lt,
- * gt, apos, quot.
- * The declaration of a parameter entity must precede any
- * reference to it.
- * Similarly, the declaration of a general entity must
- * precede any reference to it which appears in a default
- * value in an attribute-list declaration. Note that if
- * entities are declared in the external subset or in
- * external parameter entities, a non-validating processor
- * is not obligated to read and process their declarations;
- * for such documents, the rule that an entity must be
- * declared is a well-formedness constraint only if
- * standalone='yes'.
- */
- if (ent == NULL) {
- if ((ctxt->standalone == 1) ||
- ((ctxt->hasExternalSubset == 0) &&
- (ctxt->hasPErefs == 0))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
- "Entity '%s' not defined\n", name);
- } else {
- xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
- "Entity '%s' not defined\n", name);
- if ((ctxt->inSubset == 0) &&
- (ctxt->sax != NULL) &&
- (ctxt->sax->reference != NULL)) {
- ctxt->sax->reference(ctxt->userData, name);
- }
- }
- ctxt->valid = 0;
- }
-
- /*
- * [ WFC: Parsed Entity ]
- * An entity reference must not contain the name of an
- * unparsed entity
- */
- else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
- "Entity reference to unparsed entity %s\n", name);
- }
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
- /*
- * [ WFC: No External Entity References ]
- * Attribute values cannot contain direct or indirect
- * entity references to external entities.
- */
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
- (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
- "Attribute references external entity '%s'\n", name);
- }
- /*
- * [ WFC: No < in Attribute Values ]
- * The replacement text of any entity referred to directly or
- * indirectly in an attribute value (other than "&lt;") must
- * not contain a <.
- */
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
- (ent != NULL) &&
- (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
- (ent->content != NULL) &&
- (xmlStrchr(ent->content, '<'))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
- "'<' in entity '%s' is not allowed in attributes values\n", name);
- }
+ if (RAW != '&')
+ return(NULL);
+ NEXT;
+ name = xmlParseName(ctxt);
+ if (name == NULL) {
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
+ "xmlParseEntityRef: no name\n");
+ return(NULL);
+ }
+ if (RAW != ';') {
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+ return(NULL);
+ }
+ NEXT;
- /*
- * Internal check, no parameter entities here ...
- */
- else {
- switch (ent->etype) {
- case XML_INTERNAL_PARAMETER_ENTITY:
- case XML_EXTERNAL_PARAMETER_ENTITY:
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
- "Attempt to reference the parameter entity '%s'\n",
- name);
- break;
- default:
- break;
- }
- }
+ /*
+ * Predefined entities override any extra definition
+ */
+ if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
+ ent = xmlGetPredefinedEntity(name);
+ if (ent != NULL)
+ return(ent);
+ }
- /*
- * [ WFC: No Recursion ]
- * A parsed entity must not contain a recursive reference
- * to itself, either directly or indirectly.
- * Done somewhere else
- */
+ /*
+ * Increase the number of entity references parsed
+ */
+ ctxt->nbentities++;
- } else {
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+ /*
+ * Ask first SAX for entity resolution, otherwise try the
+ * entities which may have stored in the parser context.
+ */
+ if (ctxt->sax != NULL) {
+ if (ctxt->sax->getEntity != NULL)
+ ent = ctxt->sax->getEntity(ctxt->userData, name);
+ if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
+ (ctxt->options & XML_PARSE_OLDSAX))
+ ent = xmlGetPredefinedEntity(name);
+ if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
+ (ctxt->userData==ctxt)) {
+ ent = xmlSAX2GetEntity(ctxt, name);
+ }
+ }
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ /*
+ * [ WFC: Entity Declared ]
+ * In a document without any DTD, a document with only an
+ * internal DTD subset which contains no parameter entity
+ * references, or a document with "standalone='yes'", the
+ * Name given in the entity reference must match that in an
+ * entity declaration, except that well-formed documents
+ * need not declare any of the following entities: amp, lt,
+ * gt, apos, quot.
+ * The declaration of a parameter entity must precede any
+ * reference to it.
+ * Similarly, the declaration of a general entity must
+ * precede any reference to it which appears in a default
+ * value in an attribute-list declaration. Note that if
+ * entities are declared in the external subset or in
+ * external parameter entities, a non-validating processor
+ * is not obligated to read and process their declarations;
+ * for such documents, the rule that an entity must be
+ * declared is a well-formedness constraint only if
+ * standalone='yes'.
+ */
+ if (ent == NULL) {
+ if ((ctxt->standalone == 1) ||
+ ((ctxt->hasExternalSubset == 0) &&
+ (ctxt->hasPErefs == 0))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
+ "Entity '%s' not defined\n", name);
+ } else {
+ xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
+ "Entity '%s' not defined\n", name);
+ if ((ctxt->inSubset == 0) &&
+ (ctxt->sax != NULL) &&
+ (ctxt->sax->reference != NULL)) {
+ ctxt->sax->reference(ctxt->userData, name);
}
}
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
+ ctxt->valid = 0;
+ }
+
+ /*
+ * [ WFC: Parsed Entity ]
+ * An entity reference must not contain the name of an
+ * unparsed entity
+ */
+ else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
+ "Entity reference to unparsed entity %s\n", name);
}
+
+ /*
+ * [ WFC: No External Entity References ]
+ * Attribute values cannot contain direct or indirect
+ * entity references to external entities.
+ */
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
+ (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
+ "Attribute references external entity '%s'\n", name);
+ }
+ /*
+ * [ WFC: No < in Attribute Values ]
+ * The replacement text of any entity referred to directly or
+ * indirectly in an attribute value (other than "&lt;") must
+ * not contain a <.
+ */
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
+ (ent != NULL) &&
+ (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
+ if (((ent->checked & 1) || (ent->checked == 0)) &&
+ (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
+ "'<' in entity '%s' is not allowed in attributes values\n", name);
+ }
+ }
+
+ /*
+ * Internal check, no parameter entities here ...
+ */
+ else {
+ switch (ent->etype) {
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
+ "Attempt to reference the parameter entity '%s'\n",
+ name);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * [ WFC: No Recursion ]
+ * A parsed entity must not contain a recursive reference
+ * to itself, either directly or indirectly.
+ * Done somewhere else
+ */
return(ent);
}
@@ -6497,7 +7822,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
* is updated to the current location in the string.
*/
-xmlEntityPtr
+static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
xmlChar *name;
const xmlChar *ptr;
@@ -6508,129 +7833,156 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
return(NULL);
ptr = *str;
cur = *ptr;
- if (cur == '&') {
- ptr++;
- cur = *ptr;
- name = xmlParseStringName(ctxt, &ptr);
- if (name == NULL) {
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
- "xmlParseStringEntityRef: no name\n");
- } else {
- if (*ptr == ';') {
- ptr++;
- /*
- * Ask first SAX for entity resolution, otherwise try the
- * predefined set.
- */
- if (ctxt->sax != NULL) {
- if (ctxt->sax->getEntity != NULL)
- ent = ctxt->sax->getEntity(ctxt->userData, name);
- if (ent == NULL)
- ent = xmlGetPredefinedEntity(name);
- if ((ent == NULL) && (ctxt->userData==ctxt)) {
- ent = xmlSAX2GetEntity(ctxt, name);
- }
- }
- /*
- * [ WFC: Entity Declared ]
- * In a document without any DTD, a document with only an
- * internal DTD subset which contains no parameter entity
- * references, or a document with "standalone='yes'", the
- * Name given in the entity reference must match that in an
- * entity declaration, except that well-formed documents
- * need not declare any of the following entities: amp, lt,
- * gt, apos, quot.
- * The declaration of a parameter entity must precede any
- * reference to it.
- * Similarly, the declaration of a general entity must
- * precede any reference to it which appears in a default
- * value in an attribute-list declaration. Note that if
- * entities are declared in the external subset or in
- * external parameter entities, a non-validating processor
- * is not obligated to read and process their declarations;
- * for such documents, the rule that an entity must be
- * declared is a well-formedness constraint only if
- * standalone='yes'.
- */
- if (ent == NULL) {
- if ((ctxt->standalone == 1) ||
- ((ctxt->hasExternalSubset == 0) &&
- (ctxt->hasPErefs == 0))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
- "Entity '%s' not defined\n", name);
- } else {
- xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
- "Entity '%s' not defined\n",
- name);
- }
- /* TODO ? check regressions ctxt->valid = 0; */
- }
+ if (cur != '&')
+ return(NULL);
- /*
- * [ WFC: Parsed Entity ]
- * An entity reference must not contain the name of an
- * unparsed entity
- */
- else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
- "Entity reference to unparsed entity %s\n", name);
- }
+ ptr++;
+ name = xmlParseStringName(ctxt, &ptr);
+ if (name == NULL) {
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
+ "xmlParseStringEntityRef: no name\n");
+ *str = ptr;
+ return(NULL);
+ }
+ if (*ptr != ';') {
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+ xmlFree(name);
+ *str = ptr;
+ return(NULL);
+ }
+ ptr++;
- /*
- * [ WFC: No External Entity References ]
- * Attribute values cannot contain direct or indirect
- * entity references to external entities.
- */
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
- (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
- "Attribute references external entity '%s'\n", name);
- }
- /*
- * [ WFC: No < in Attribute Values ]
- * The replacement text of any entity referred to directly or
- * indirectly in an attribute value (other than "&lt;") must
- * not contain a <.
- */
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
- (ent != NULL) &&
- (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
- (ent->content != NULL) &&
- (xmlStrchr(ent->content, '<'))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
- "'<' in entity '%s' is not allowed in attributes values\n",
- name);
- }
- /*
- * Internal check, no parameter entities here ...
- */
- else {
- switch (ent->etype) {
- case XML_INTERNAL_PARAMETER_ENTITY:
- case XML_EXTERNAL_PARAMETER_ENTITY:
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
- "Attempt to reference the parameter entity '%s'\n",
- name);
- break;
- default:
- break;
- }
- }
+ /*
+ * Predefined entities override any extra definition
+ */
+ if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
+ ent = xmlGetPredefinedEntity(name);
+ if (ent != NULL) {
+ xmlFree(name);
+ *str = ptr;
+ return(ent);
+ }
+ }
- /*
- * [ WFC: No Recursion ]
- * A parsed entity must not contain a recursive reference
- * to itself, either directly or indirectly.
- * Done somewhere else
- */
+ /*
+ * Increate the number of entity references parsed
+ */
+ ctxt->nbentities++;
- } else {
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
- }
- xmlFree(name);
+ /*
+ * Ask first SAX for entity resolution, otherwise try the
+ * entities which may have stored in the parser context.
+ */
+ if (ctxt->sax != NULL) {
+ if (ctxt->sax->getEntity != NULL)
+ ent = ctxt->sax->getEntity(ctxt->userData, name);
+ if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
+ ent = xmlGetPredefinedEntity(name);
+ if ((ent == NULL) && (ctxt->userData==ctxt)) {
+ ent = xmlSAX2GetEntity(ctxt, name);
+ }
+ }
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(name);
+ return(NULL);
+ }
+
+ /*
+ * [ WFC: Entity Declared ]
+ * In a document without any DTD, a document with only an
+ * internal DTD subset which contains no parameter entity
+ * references, or a document with "standalone='yes'", the
+ * Name given in the entity reference must match that in an
+ * entity declaration, except that well-formed documents
+ * need not declare any of the following entities: amp, lt,
+ * gt, apos, quot.
+ * The declaration of a parameter entity must precede any
+ * reference to it.
+ * Similarly, the declaration of a general entity must
+ * precede any reference to it which appears in a default
+ * value in an attribute-list declaration. Note that if
+ * entities are declared in the external subset or in
+ * external parameter entities, a non-validating processor
+ * is not obligated to read and process their declarations;
+ * for such documents, the rule that an entity must be
+ * declared is a well-formedness constraint only if
+ * standalone='yes'.
+ */
+ if (ent == NULL) {
+ if ((ctxt->standalone == 1) ||
+ ((ctxt->hasExternalSubset == 0) &&
+ (ctxt->hasPErefs == 0))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
+ "Entity '%s' not defined\n", name);
+ } else {
+ xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
+ "Entity '%s' not defined\n",
+ name);
+ }
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
+ /* TODO ? check regressions ctxt->valid = 0; */
+ }
+
+ /*
+ * [ WFC: Parsed Entity ]
+ * An entity reference must not contain the name of an
+ * unparsed entity
+ */
+ else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
+ "Entity reference to unparsed entity %s\n", name);
+ }
+
+ /*
+ * [ WFC: No External Entity References ]
+ * Attribute values cannot contain direct or indirect
+ * entity references to external entities.
+ */
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
+ (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
+ "Attribute references external entity '%s'\n", name);
+ }
+ /*
+ * [ WFC: No < in Attribute Values ]
+ * The replacement text of any entity referred to directly or
+ * indirectly in an attribute value (other than "&lt;") must
+ * not contain a <.
+ */
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
+ (ent != NULL) && (ent->content != NULL) &&
+ (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
+ (xmlStrchr(ent->content, '<'))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
+ "'<' in entity '%s' is not allowed in attributes values\n",
+ name);
+ }
+
+ /*
+ * Internal check, no parameter entities here ...
+ */
+ else {
+ switch (ent->etype) {
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
+ "Attempt to reference the parameter entity '%s'\n",
+ name);
+ break;
+ default:
+ break;
}
}
+
+ /*
+ * [ WFC: No Recursion ]
+ * A parsed entity must not contain a recursive reference
+ * to itself, either directly or indirectly.
+ * Done somewhere else
+ */
+
+ xmlFree(name);
*str = ptr;
return(ent);
}
@@ -6647,7 +7999,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
@@ -6671,90 +8023,200 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
xmlEntityPtr entity = NULL;
xmlParserInputPtr input;
- if (RAW == '%') {
- NEXT;
- name = xmlParseName(ctxt);
- if (name == NULL) {
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
- "xmlParsePEReference: no name\n");
- } else {
- if (RAW == ';') {
- NEXT;
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->getParameterEntity != NULL))
- entity = ctxt->sax->getParameterEntity(ctxt->userData,
- name);
- if (entity == NULL) {
- /*
- * [ WFC: Entity Declared ]
- * In a document without any DTD, a document with only an
- * internal DTD subset which contains no parameter entity
- * references, or a document with "standalone='yes'", ...
- * ... The declaration of a parameter entity must precede
- * any reference to it...
- */
- if ((ctxt->standalone == 1) ||
- ((ctxt->hasExternalSubset == 0) &&
- (ctxt->hasPErefs == 0))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
- "PEReference: %%%s; not found\n",
- name);
- } else {
- /*
- * [ VC: Entity Declared ]
- * In a document with an external subset or external
- * parameter entities with "standalone='no'", ...
- * ... The declaration of a parameter entity must
- * precede any reference to it...
- */
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
- "PEReference: %%%s; not found\n",
- name, NULL);
- ctxt->valid = 0;
- }
- } else {
- /*
- * Internal checking in case the entity quest barfed
- */
- if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
- (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
- "Internal: %%%s; is not a parameter entity\n",
- name, NULL);
- } else if (ctxt->input->free != deallocblankswrapper) {
- input =
- xmlNewBlanksWrapperInputStream(ctxt, entity);
- xmlPushInput(ctxt, input);
- } else {
- /*
- * TODO !!!
- * handle the extra spaces added before and after
- * c.f. http://www.w3.org/TR/REC-xml#as-PE
- */
- input = xmlNewEntityInputStream(ctxt, entity);
- xmlPushInput(ctxt, input);
- if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
- (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
- (IS_BLANK_CH(NXT(5)))) {
- xmlParseTextDecl(ctxt);
- if (ctxt->errNo ==
- XML_ERR_UNSUPPORTED_ENCODING) {
- /*
- * The XML REC instructs us to stop parsing
- * right here
- */
- ctxt->instate = XML_PARSER_EOF;
- return;
- }
- }
- }
- }
- ctxt->hasPErefs = 1;
- } else {
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+ if (RAW != '%')
+ return;
+ NEXT;
+ name = xmlParseName(ctxt);
+ if (name == NULL) {
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
+ "xmlParsePEReference: no name\n");
+ return;
+ }
+ if (RAW != ';') {
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+ return;
+ }
+
+ NEXT;
+
+ /*
+ * Increate the number of entity references parsed
+ */
+ ctxt->nbentities++;
+
+ /*
+ * Request the entity from SAX
+ */
+ if ((ctxt->sax != NULL) &&
+ (ctxt->sax->getParameterEntity != NULL))
+ entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
+ if (entity == NULL) {
+ /*
+ * [ WFC: Entity Declared ]
+ * In a document without any DTD, a document with only an
+ * internal DTD subset which contains no parameter entity
+ * references, or a document with "standalone='yes'", ...
+ * ... The declaration of a parameter entity must precede
+ * any reference to it...
+ */
+ if ((ctxt->standalone == 1) ||
+ ((ctxt->hasExternalSubset == 0) &&
+ (ctxt->hasPErefs == 0))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
+ "PEReference: %%%s; not found\n",
+ name);
+ } else {
+ /*
+ * [ VC: Entity Declared ]
+ * In a document with an external subset or external
+ * parameter entities with "standalone='no'", ...
+ * ... The declaration of a parameter entity must
+ * precede any reference to it...
+ */
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
+ "PEReference: %%%s; not found\n",
+ name, NULL);
+ ctxt->valid = 0;
+ }
+ xmlParserEntityCheck(ctxt, 0, NULL, 0);
+ } else {
+ /*
+ * Internal checking in case the entity quest barfed
+ */
+ if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
+ (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
+ "Internal: %%%s; is not a parameter entity\n",
+ name, NULL);
+ } else if (ctxt->input->free != deallocblankswrapper) {
+ input = xmlNewBlanksWrapperInputStream(ctxt, entity);
+ if (xmlPushInput(ctxt, input) < 0)
+ return;
+ } else {
+ /*
+ * TODO !!!
+ * handle the extra spaces added before and after
+ * c.f. http://www.w3.org/TR/REC-xml#as-PE
+ */
+ input = xmlNewEntityInputStream(ctxt, entity);
+ if (xmlPushInput(ctxt, input) < 0)
+ return;
+ if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
+ (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
+ (IS_BLANK_CH(NXT(5)))) {
+ xmlParseTextDecl(ctxt);
+ if (ctxt->errNo ==
+ XML_ERR_UNSUPPORTED_ENCODING) {
+ /*
+ * The XML REC instructs us to stop parsing
+ * right here
+ */
+ xmlHaltParser(ctxt);
+ return;
+ }
+ }
+ }
+ }
+ ctxt->hasPErefs = 1;
+}
+
+/**
+ * xmlLoadEntityContent:
+ * @ctxt: an XML parser context
+ * @entity: an unloaded system entity
+ *
+ * Load the original content of the given system entity from the
+ * ExternalID/SystemID given. This is to be used for Included in Literal
+ * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
+ *
+ * Returns 0 in case of success and -1 in case of failure
+ */
+static int
+xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
+ xmlParserInputPtr input;
+ xmlBufferPtr buf;
+ int l, c;
+ int count = 0;
+
+ if ((ctxt == NULL) || (entity == NULL) ||
+ ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
+ (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
+ (entity->content != NULL)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "xmlLoadEntityContent parameter error");
+ return(-1);
+ }
+
+ if (xmlParserDebugEntities)
+ xmlGenericError(xmlGenericErrorContext,
+ "Reading %s entity content input\n", entity->name);
+
+ buf = xmlBufferCreate();
+ if (buf == NULL) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "xmlLoadEntityContent parameter error");
+ return(-1);
+ }
+
+ input = xmlNewEntityInputStream(ctxt, entity);
+ if (input == NULL) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "xmlLoadEntityContent input error");
+ xmlBufferFree(buf);
+ return(-1);
+ }
+
+ /*
+ * Push the entity as the current input, read char by char
+ * saving to the buffer until the end of the entity or an error
+ */
+ if (xmlPushInput(ctxt, input) < 0) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
+
+ GROW;
+ c = CUR_CHAR(l);
+ while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
+ (IS_CHAR(c))) {
+ xmlBufferAdd(buf, ctxt->input->cur, l);
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
}
- }
+ }
+ NEXTL(l);
+ c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
+ c = CUR_CHAR(l);
+ }
}
+
+ if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
+ xmlPopInput(ctxt);
+ } else if (!IS_CHAR(c)) {
+ xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
+ "xmlLoadEntityContent: invalid char value %d\n",
+ c);
+ xmlBufferFree(buf);
+ return(-1);
+ }
+ entity->content = buf->content;
+ buf->content = NULL;
+ xmlBufferFree(buf);
+
+ return(0);
}
/**
@@ -6768,7 +8230,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
@@ -6788,7 +8250,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
* Returns the string of the entity content.
* str is updated to the current value of the index
*/
-xmlEntityPtr
+static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
const xmlChar *ptr;
xmlChar cur;
@@ -6798,67 +8260,80 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
if ((str == NULL) || (*str == NULL)) return(NULL);
ptr = *str;
cur = *ptr;
- if (cur == '%') {
- ptr++;
- cur = *ptr;
- name = xmlParseStringName(ctxt, &ptr);
- if (name == NULL) {
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
- "xmlParseStringPEReference: no name\n");
+ if (cur != '%')
+ return(NULL);
+ ptr++;
+ name = xmlParseStringName(ctxt, &ptr);
+ if (name == NULL) {
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
+ "xmlParseStringPEReference: no name\n");
+ *str = ptr;
+ return(NULL);
+ }
+ cur = *ptr;
+ if (cur != ';') {
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+ xmlFree(name);
+ *str = ptr;
+ return(NULL);
+ }
+ ptr++;
+
+ /*
+ * Increate the number of entity references parsed
+ */
+ ctxt->nbentities++;
+
+ /*
+ * Request the entity from SAX
+ */
+ if ((ctxt->sax != NULL) &&
+ (ctxt->sax->getParameterEntity != NULL))
+ entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(name);
+ return(NULL);
+ }
+ if (entity == NULL) {
+ /*
+ * [ WFC: Entity Declared ]
+ * In a document without any DTD, a document with only an
+ * internal DTD subset which contains no parameter entity
+ * references, or a document with "standalone='yes'", ...
+ * ... The declaration of a parameter entity must precede
+ * any reference to it...
+ */
+ if ((ctxt->standalone == 1) ||
+ ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
+ "PEReference: %%%s; not found\n", name);
} else {
- cur = *ptr;
- if (cur == ';') {
- ptr++;
- cur = *ptr;
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->getParameterEntity != NULL))
- entity = ctxt->sax->getParameterEntity(ctxt->userData,
- name);
- if (entity == NULL) {
- /*
- * [ WFC: Entity Declared ]
- * In a document without any DTD, a document with only an
- * internal DTD subset which contains no parameter entity
- * references, or a document with "standalone='yes'", ...
- * ... The declaration of a parameter entity must precede
- * any reference to it...
- */
- if ((ctxt->standalone == 1) ||
- ((ctxt->hasExternalSubset == 0) &&
- (ctxt->hasPErefs == 0))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
- "PEReference: %%%s; not found\n", name);
- } else {
- /*
- * [ VC: Entity Declared ]
- * In a document with an external subset or external
- * parameter entities with "standalone='no'", ...
- * ... The declaration of a parameter entity must
- * precede any reference to it...
- */
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
- "PEReference: %%%s; not found\n",
- name, NULL);
- ctxt->valid = 0;
- }
- } else {
- /*
- * Internal checking in case the entity quest barfed
- */
- if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
- (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
- "%%%s; is not a parameter entity\n",
- name, NULL);
- }
- }
- ctxt->hasPErefs = 1;
- } else {
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
- }
- xmlFree(name);
+ /*
+ * [ VC: Entity Declared ]
+ * In a document with an external subset or external
+ * parameter entities with "standalone='no'", ...
+ * ... The declaration of a parameter entity must
+ * precede any reference to it...
+ */
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
+ "PEReference: %%%s; not found\n",
+ name, NULL);
+ ctxt->valid = 0;
+ }
+ xmlParserEntityCheck(ctxt, 0, NULL, 0);
+ } else {
+ /*
+ * Internal checking in case the entity quest barfed
+ */
+ if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
+ (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
+ "%%%s; is not a parameter entity\n",
+ name, NULL);
}
}
+ ctxt->hasPErefs = 1;
+ xmlFree(name);
*str = ptr;
return(entity);
}
@@ -6869,12 +8344,12 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*
* parse a DOCTYPE declaration
*
- * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
+ * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match the element
- * type of the root element.
+ * type of the root element.
*/
void
@@ -6921,6 +8396,8 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
/*
* Is there any internal subset declarations ?
@@ -6956,11 +8433,11 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
ctxt->instate = XML_PARSER_DTD;
NEXT;
/*
- * Parse the succession of Markup declarations and
+ * Parse the succession of Markup declarations and
* PEReferences.
* Subsequence (markupdecl | PEReference | S)*
*/
- while (RAW != ']') {
+ while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -6980,7 +8457,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
break;
}
}
- if (RAW == ']') {
+ if (RAW == ']') {
NEXT;
SKIP_BLANKS;
}
@@ -7011,8 +8488,8 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
*
* [ WFC: No < in Attribute Values ]
* The replacement text of any entity referred to directly or indirectly in
- * an attribute value (other than "&lt;") must not contain a <.
- *
+ * an attribute value (other than "&lt;") must not contain a <.
+ *
* [ VC: Attribute Value Type ]
* The attribute must have been declared; the value must be of the type
* declared for it.
@@ -7093,7 +8570,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
/**
* xmlParseStartTag:
* @ctxt: an XML parser context
- *
+ *
* parse a start of tag either for rule element or
* EmptyElement. In both case we don't parse the tag closing chars.
*
@@ -7101,13 +8578,13 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* With namespace:
*
@@ -7146,9 +8623,9 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS;
GROW;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -7157,7 +8634,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
+ * start-tag or empty-element tag.
*/
for (i = 0; i < nbatts;i += 2) {
if (xmlStrEqual(atts[i], attname)) {
@@ -7206,7 +8683,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
xmlFree(attvalue);
}
-failed:
+failed:
GROW
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
@@ -7288,7 +8765,7 @@ xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
/*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
if (name != (xmlChar*)1) {
@@ -7335,38 +8812,6 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) {
* *
************************************************************************/
-static const xmlChar *
-xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
- int len = 0, l;
- int c;
- int count = 0;
-
- /*
- * Handler for more complex cases
- */
- GROW;
- c = CUR_CHAR(l);
- if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
- (!IS_LETTER(c) && (c != '_'))) {
- return(NULL);
- }
-
- while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
- ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') || (c == '_') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c)))) {
- if (count++ > 100) {
- count = 0;
- GROW;
- }
- len += l;
- NEXTL(l);
- c = CUR_CHAR(l);
- }
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
-}
-
/*
* xmlGetNamespace:
* @ctxt: an XML parser context
@@ -7392,56 +8837,6 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
}
/**
- * xmlParseNCName:
- * @ctxt: an XML parser context
- * @len: lenght of the string parsed
- *
- * parse an XML name.
- *
- * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
- * CombiningChar | Extender
- *
- * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
- *
- * Returns the Name parsed or NULL
- */
-
-static const xmlChar *
-xmlParseNCName(xmlParserCtxtPtr ctxt) {
- const xmlChar *in;
- const xmlChar *ret;
- int count = 0;
-
- /*
- * Accelerator for simple ASCII names
- */
- in = ctxt->input->cur;
- if (((*in >= 0x61) && (*in <= 0x7A)) ||
- ((*in >= 0x41) && (*in <= 0x5A)) ||
- (*in == '_')) {
- in++;
- while (((*in >= 0x61) && (*in <= 0x7A)) ||
- ((*in >= 0x41) && (*in <= 0x5A)) ||
- ((*in >= 0x30) && (*in <= 0x39)) ||
- (*in == '_') || (*in == '-') ||
- (*in == '.'))
- in++;
- if ((*in > 0) && (*in < 0x80)) {
- count = in - ctxt->input->cur;
- ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
- ctxt->input->cur = in;
- ctxt->nbChars += count;
- ctxt->input->col += count;
- if (ret == NULL) {
- xmlErrMemory(ctxt, NULL);
- }
- return(ret);
- }
- }
- return(xmlParseNCNameComplex(ctxt));
-}
-
-/**
* xmlParseQName:
* @ctxt: an XML parser context
* @prefix: pointer to store the prefix part
@@ -7466,7 +8861,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
if (CUR == ':') {
l = xmlParseName(ctxt);
if (l != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_QNAME,
+ xmlNsErr(ctxt, XML_NS_ERR_QNAME,
"Failed to parse QName '%s'\n", l, NULL, NULL);
*prefix = NULL;
return(l);
@@ -7483,7 +8878,13 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
"Failed to parse QName '%s:'\n", p, NULL, NULL);
- tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
+ l = xmlParseNmtoken(ctxt);
+ if (l == NULL)
+ tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
+ else {
+ tmp = xmlBuildQName(l, p, NULL, 0);
+ xmlFree((char *)l);
+ }
p = xmlDictLookup(ctxt->dict, tmp, -1);
if (tmp != NULL) xmlFree(tmp);
*prefix = NULL;
@@ -7531,7 +8932,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
xmlChar const *prefix) {
- const xmlChar *cmp = name;
+ const xmlChar *cmp;
const xmlChar *in;
const xmlChar *ret;
const xmlChar *prefix2;
@@ -7540,10 +8941,10 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
GROW;
in = ctxt->input->cur;
-
+
cmp = prefix;
while (*in != 0 && *in == *cmp) {
- ++in;
+ ++in;
++cmp;
}
if ((*cmp == 0) && (*in == ':')) {
@@ -7581,20 +8982,20 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
*
* 3.3.3 Attribute-Value Normalization:
* Before the value of an attribute is passed to the application or
- * checked for validity, the XML processor must normalize it as follows:
+ * checked for validity, the XML processor must normalize it as follows:
* - a character reference is processed by appending the referenced
* character to the attribute value
* - an entity reference is processed by recursively processing the
- * replacement text of the entity
+ * replacement text of the entity
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
* appending #x20 to the normalized value, except that only a single
* #x20 is appended for a "#xD#xA" sequence that is part of an external
- * parsed entity or the literal entity value of an internal parsed entity
- * - other characters are processed by appending them to the normalized value
+ * parsed entity or the literal entity value of an internal parsed entity
+ * - other characters are processed by appending them to the normalized value
* If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
- * (#x20) characters by a single space (#x20) character.
+ * (#x20) characters by a single space (#x20) character.
* All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA.
*
@@ -7609,9 +9010,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
xmlChar limit = 0;
const xmlChar *in = NULL, *start, *end, *last;
xmlChar *ret = NULL;
+ int line, col;
GROW;
in = (xmlChar *) CUR_PTR;
+ line = ctxt->input->line;
+ col = ctxt->input->col;
if (*in != '"' && *in != '\'') {
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
return (NULL);
@@ -7624,6 +9028,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
* pure ASCII.
*/
limit = *in++;
+ col++;
end = ctxt->input->end;
start = in;
if (in >= end) {
@@ -7640,34 +9045,56 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
/*
* Skip any leading spaces
*/
- while ((in < end) && (*in != limit) &&
+ while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
+ if (*in == 0xA) {
+ line++; col = 1;
+ } else {
+ col++;
+ }
in++;
start = in;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
+ col++;
if ((*in++ == 0x20) && (*in == 0x20)) break;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
last = in;
@@ -7675,13 +9102,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
* skip the trailing blanks
*/
while ((last[-1] == 0x20) && (last > start)) last--;
- while ((in < end) && (*in != limit) &&
+ while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
+ if (*in == 0xA) {
+ line++, col = 1;
+ } else {
+ col++;
+ }
in++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
@@ -7689,28 +9123,56 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
last = last + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
if (*in != limit) goto need_complex;
} else {
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
in++;
+ col++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
last = in;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
if (*in != limit) goto need_complex;
}
in++;
+ col++;
if (len != NULL) {
*len = last - start;
ret = (xmlChar *) start;
@@ -7719,6 +9181,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
ret = xmlStrndup(start, last - start);
}
CUR_PTR = in;
+ ctxt->input->line = line;
+ ctxt->input->col = col;
if (alloc) *alloc = 0;
return ret;
need_complex:
@@ -7743,9 +9207,10 @@ need_complex:
static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
- const xmlChar *pref, const xmlChar *elem,
- const xmlChar **prefix, xmlChar **value,
- int *len, int *alloc) {
+ const xmlChar * pref, const xmlChar * elem,
+ const xmlChar ** prefix, xmlChar ** value,
+ int *len, int *alloc)
+{
const xmlChar *name;
xmlChar *val, *internal_val = NULL;
int normalize = 0;
@@ -7754,9 +9219,9 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
GROW;
name = xmlParseQName(ctxt, prefix);
if (name == NULL) {
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
- "error parsing attribute name\n");
- return(NULL);
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
+ "error parsing attribute name\n");
+ return (NULL);
}
/*
@@ -7766,8 +9231,9 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
int type;
type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
- pref, elem, *prefix, name);
- if (type != 0) normalize = 1;
+ pref, elem, *prefix, name);
+ if (type != 0)
+ normalize = 1;
}
/*
@@ -7776,58 +9242,75 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
SKIP_BLANKS;
if (RAW == '=') {
NEXT;
- SKIP_BLANKS;
- val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
- ctxt->instate = XML_PARSER_CONTENT;
+ SKIP_BLANKS;
+ val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
+ if (normalize) {
+ /*
+ * Sometimes a second normalisation pass for spaces is needed
+ * but that only happens if charrefs or entities refernces
+ * have been used in the attribute value, i.e. the attribute
+ * value have been extracted in an allocated string already.
+ */
+ if (*alloc) {
+ const xmlChar *val2;
+
+ val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
+ if ((val2 != NULL) && (val2 != val)) {
+ xmlFree(val);
+ val = (xmlChar *) val2;
+ }
+ }
+ }
+ ctxt->instate = XML_PARSER_CONTENT;
} else {
- xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
- "Specification mandate value for attribute %s\n", name);
- return(NULL);
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
+ "Specification mandate value for attribute %s\n",
+ name);
+ return (NULL);
}
- if (*prefix == ctxt->str_xml) {
- /*
- * Check that xml:lang conforms to the specification
- * No more registered as an error, just generate a warning now
- * since this was deprecated in XML second edition
- */
- if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
- internal_val = xmlStrndup(val, *len);
- if (!xmlCheckLanguageID(internal_val)) {
- xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
- "Malformed value for xml:lang : %s\n",
- internal_val, NULL);
- }
- }
+ if (*prefix == ctxt->str_xml) {
+ /*
+ * Check that xml:lang conforms to the specification
+ * No more registered as an error, just generate a warning now
+ * since this was deprecated in XML second edition
+ */
+ if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
+ internal_val = xmlStrndup(val, *len);
+ if (!xmlCheckLanguageID(internal_val)) {
+ xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
+ "Malformed value for xml:lang : %s\n",
+ internal_val, NULL);
+ }
+ }
- /*
- * Check that xml:space conforms to the specification
- */
- if (xmlStrEqual(name, BAD_CAST "space")) {
- internal_val = xmlStrndup(val, *len);
- if (xmlStrEqual(internal_val, BAD_CAST "default"))
- *(ctxt->space) = 0;
- else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
- *(ctxt->space) = 1;
- else {
- xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
-"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
- internal_val, NULL);
- }
- }
- if (internal_val) {
- xmlFree(internal_val);
- }
- }
+ /*
+ * Check that xml:space conforms to the specification
+ */
+ if (xmlStrEqual(name, BAD_CAST "space")) {
+ internal_val = xmlStrndup(val, *len);
+ if (xmlStrEqual(internal_val, BAD_CAST "default"))
+ *(ctxt->space) = 0;
+ else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
+ *(ctxt->space) = 1;
+ else {
+ xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
+ "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
+ internal_val, NULL);
+ }
+ }
+ if (internal_val) {
+ xmlFree(internal_val);
+ }
+ }
*value = val;
- return(name);
+ return (name);
}
-
/**
* xmlParseStartTag2:
* @ctxt: an XML parser context
- *
+ *
* parse a start of tag either for rule element or
* EmptyElement. In both case we don't parse the tag closing chars.
* This routine is called when running SAX2 parsing
@@ -7836,13 +9319,13 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* With namespace:
*
@@ -7865,7 +9348,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
const xmlChar **atts = ctxt->atts;
int maxatts = ctxt->maxatts;
int nratts, nbatts, nbdef;
- int i, j, nbNs, attval, oldline, oldcol;
+ int i, j, nbNs, attval, oldline, oldcol, inputNr;
const xmlChar *base;
unsigned long cur;
int nsNr = ctxt->nsNr;
@@ -7884,6 +9367,7 @@ reparse:
SHRINK;
base = ctxt->input->base;
cur = ctxt->input->cur - ctxt->input->base;
+ inputNr = ctxt->inputNr;
oldline = ctxt->input->line;
oldcol = ctxt->input->col;
nbatts = 0;
@@ -7909,18 +9393,19 @@ reparse:
*/
SKIP_BLANKS;
GROW;
- if (ctxt->input->base != base) goto base_changed;
+ if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
+ goto base_changed;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
int len = -1, alloc = 0;
attname = xmlParseAttribute2(ctxt, prefix, localname,
&aprefix, &attvalue, &len, &alloc);
- if (ctxt->input->base != base) {
+ if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
if ((attvalue != NULL) && (alloc != 0))
xmlFree(attvalue);
attvalue = NULL;
@@ -7932,20 +9417,42 @@ reparse:
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
xmlURIPtr uri;
+ if (URL == NULL) {
+ xmlErrMemory(ctxt, "dictionary allocation failure");
+ if ((attvalue != NULL) && (alloc != 0))
+ xmlFree(attvalue);
+ return(NULL);
+ }
if (*URL != 0) {
uri = xmlParseURI((const char *) URL);
if (uri == NULL) {
- xmlWarningMsg(ctxt, XML_WAR_NS_URI,
- "xmlns: %s not a valid URI\n",
- URL, NULL);
+ xmlNsErr(ctxt, XML_WAR_NS_URI,
+ "xmlns: '%s' is not a valid URI\n",
+ URL, NULL, NULL);
} else {
if (uri->scheme == NULL) {
- xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
- "xmlns: URI %s is not absolute\n",
- URL, NULL);
+ xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
+ "xmlns: URI %s is not absolute\n",
+ URL, NULL, NULL);
}
xmlFreeURI(uri);
}
+ if (URL == ctxt->str_xml_ns) {
+ if (attname != ctxt->str_xml) {
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
+ "xml namespace URI cannot be the default namespace\n",
+ NULL, NULL, NULL);
+ }
+ goto skip_default_ns;
+ }
+ if ((len == 29) &&
+ (xmlStrEqual(URL,
+ BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
+ "reuse of the xmlns namespace name is forbidden\n",
+ NULL, NULL, NULL);
+ goto skip_default_ns;
+ }
}
/*
* check that it's not a defined namespace
@@ -7957,7 +9464,15 @@ reparse:
xmlErrAttributeDup(ctxt, NULL, attname);
else
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
+skip_default_ns:
if (alloc != 0) xmlFree(attvalue);
+ if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
+ break;
+ if (!IS_BLANK_CH(RAW)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
+ "attributes construct error\n");
+ break;
+ }
SKIP_BLANKS;
continue;
}
@@ -7974,22 +9489,49 @@ reparse:
/*
* Do not keep a namespace definition node
*/
- if (alloc != 0) xmlFree(attvalue);
- SKIP_BLANKS;
- continue;
+ goto skip_ns;
+ }
+ if (URL == ctxt->str_xml_ns) {
+ if (attname != ctxt->str_xml) {
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
+ "xml namespace URI mapped to wrong prefix\n",
+ NULL, NULL, NULL);
+ }
+ goto skip_ns;
+ }
+ if (attname == ctxt->str_xmlns) {
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
+ "redefinition of the xmlns prefix is forbidden\n",
+ NULL, NULL, NULL);
+ goto skip_ns;
}
- uri = xmlParseURI((const char *) URL);
- if (uri == NULL) {
- xmlWarningMsg(ctxt, XML_WAR_NS_URI,
- "xmlns:%s: '%s' is not a valid URI\n",
- attname, URL);
+ if ((len == 29) &&
+ (xmlStrEqual(URL,
+ BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
+ "reuse of the xmlns namespace name is forbidden\n",
+ NULL, NULL, NULL);
+ goto skip_ns;
+ }
+ if ((URL == NULL) || (URL[0] == 0)) {
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
+ "xmlns:%s: Empty XML namespace is not allowed\n",
+ attname, NULL, NULL);
+ goto skip_ns;
} else {
- if ((ctxt->pedantic) && (uri->scheme == NULL)) {
- xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
+ uri = xmlParseURI((const char *) URL);
+ if (uri == NULL) {
+ xmlNsErr(ctxt, XML_WAR_NS_URI,
+ "xmlns:%s: '%s' is not a valid URI\n",
+ attname, URL, NULL);
+ } else {
+ if ((ctxt->pedantic) && (uri->scheme == NULL)) {
+ xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
"xmlns:%s: URI %s is not absolute\n",
- attname, URL);
+ attname, URL, NULL);
+ }
+ xmlFreeURI(uri);
}
- xmlFreeURI(uri);
}
/*
@@ -8002,9 +9544,18 @@ reparse:
xmlErrAttributeDup(ctxt, aprefix, attname);
else
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
+skip_ns:
if (alloc != 0) xmlFree(attvalue);
+ if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
+ break;
+ if (!IS_BLANK_CH(RAW)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
+ "attributes construct error\n");
+ break;
+ }
SKIP_BLANKS;
- if (ctxt->input->base != base) goto base_changed;
+ if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
+ goto base_changed;
continue;
}
@@ -8036,10 +9587,13 @@ reparse:
xmlFree(attvalue);
}
-failed:
+failed:
GROW
- if (ctxt->input->base != base) goto base_changed;
+ if (ctxt->instate == XML_PARSER_EOF)
+ break;
+ if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
+ goto base_changed;
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
break;
if (!IS_BLANK_CH(RAW)) {
@@ -8055,7 +9609,8 @@ failed:
break;
}
GROW;
- if (ctxt->input->base != base) goto base_changed;
+ if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
+ goto base_changed;
}
/*
@@ -8067,8 +9622,8 @@ failed:
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
if (defaults != NULL) {
for (i = 0;i < defaults->nbAttrs;i++) {
- attname = defaults->values[4 * i];
- aprefix = defaults->values[4 * i + 1];
+ attname = defaults->values[5 * i];
+ aprefix = defaults->values[5 * i + 1];
/*
* special work for namespaces defaulted defs
@@ -8083,9 +9638,9 @@ failed:
if (j <= nbNs) continue;
nsname = xmlGetNamespace(ctxt, NULL);
- if (nsname != defaults->values[4 * i + 2]) {
+ if (nsname != defaults->values[5 * i + 2]) {
if (nsPush(ctxt, NULL,
- defaults->values[4 * i + 2]) > 0)
+ defaults->values[5 * i + 2]) > 0)
nbNs++;
}
} else if (aprefix == ctxt->str_xmlns) {
@@ -8100,7 +9655,7 @@ failed:
nsname = xmlGetNamespace(ctxt, attname);
if (nsname != defaults->values[2]) {
if (nsPush(ctxt, attname,
- defaults->values[4 * i + 2]) > 0)
+ defaults->values[5 * i + 2]) > 0)
nbNs++;
}
} else {
@@ -8126,8 +9681,14 @@ failed:
atts[nbatts++] = NULL;
else
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
- atts[nbatts++] = defaults->values[4 * i + 2];
- atts[nbatts++] = defaults->values[4 * i + 3];
+ atts[nbatts++] = defaults->values[5 * i + 2];
+ atts[nbatts++] = defaults->values[5 * i + 3];
+ if ((ctxt->standalone == 1) &&
+ (defaults->values[5 * i + 4] != NULL)) {
+ xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
+ "standalone: attribute %s on %s defaulted from external subset\n",
+ attname, localname);
+ }
nbdef++;
}
}
@@ -8154,7 +9715,7 @@ failed:
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
+ * start-tag or empty-element tag.
* As extended by the Namespace in XML REC.
*/
for (j = 0; j < i;j += 5) {
@@ -8216,6 +9777,17 @@ base_changed:
if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
xmlFree((xmlChar *) atts[i]);
}
+
+ /*
+ * We can't switch from one entity to another in the middle
+ * of a start tag
+ */
+ if (inputNr != ctxt->inputNr) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
+ "Start tag doesn't start and stop in the same entity\n");
+ return(NULL);
+ }
+
ctxt->input->cur = ctxt->input->base + cur;
ctxt->input->line = oldline;
ctxt->input->col = oldcol;
@@ -8255,9 +9827,11 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
if (ctxt->input->cur[tlen] == '>') {
ctxt->input->cur += tlen + 1;
+ ctxt->input->col += tlen + 1;
goto done;
}
ctxt->input->cur += tlen;
+ ctxt->input->col += tlen;
name = (xmlChar*)1;
} else {
if (prefix == NULL)
@@ -8270,6 +9844,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
* We should definitely be at the ending "S? '>'" part
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
SKIP_BLANKS;
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
@@ -8279,11 +9855,13 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
/*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
if (name != (xmlChar*)1) {
if (name == NULL) name = BAD_CAST "unparseable";
+ if ((line == 0) && (ctxt->node != NULL))
+ line = ctxt->node->line;
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
"Opening and ending tag mismatch: %s line %d and %s\n",
ctxt->name, line, name);
@@ -8306,7 +9884,7 @@ done:
/**
* xmlParseCDSect:
* @ctxt: an XML parser context
- *
+ *
* Parse escaped pure raw content.
*
* [18] CDSect ::= CDStart CData CDEnd
@@ -8359,14 +9937,21 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
if (len + 5 >= size) {
xmlChar *tmp;
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ if ((size > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
+ "CData section too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
+ tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
if (tmp == NULL) {
xmlFree(buf);
xmlErrMemory(ctxt, NULL);
return;
}
buf = tmp;
+ size *= 2;
}
COPY_BUF(rl,buf,len,r);
r = s;
@@ -8376,6 +9961,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
}
NEXTL(l);
@@ -8455,7 +10044,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
/*
* Fifth case : a reference. If if has not been resolved,
- * parsing returns it's Name, create the node
+ * parsing returns it's Name, create the node
*/
else if (*cur == '&') {
@@ -8480,7 +10069,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"detected an error in element content\n");
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
break;
}
}
@@ -8496,25 +10085,26 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
void
xmlParseElement(xmlParserCtxtPtr ctxt) {
const xmlChar *name;
- const xmlChar *prefix;
- const xmlChar *URI;
+ const xmlChar *prefix = NULL;
+ const xmlChar *URI = NULL;
xmlParserNodeInfo node_info;
- int line, tlen;
+ int line, tlen = 0;
xmlNodePtr ret;
int nsNr = ctxt->nsNr;
- if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
- xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
- "Excessive depth in document: change xmlParserMaxDepth = %d\n",
- xmlParserMaxDepth);
- ctxt->instate = XML_PARSER_EOF;
+ if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
+ "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
+ xmlParserMaxDepth);
+ xmlHaltParser(ctxt);
return;
}
@@ -8541,6 +10131,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
else
name = xmlParseStartTag(ctxt);
#endif /* LIBXML_SAX1_ENABLED */
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if (name == NULL) {
spacePop(ctxt);
return;
@@ -8552,7 +10144,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match the element
- * type of the root element.
+ * type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
@@ -8621,6 +10213,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
* Parse the content of the element:
*/
xmlParseContent(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if (!IS_BYTE_CHAR(RAW)) {
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
"Premature end of data in tag %s line %d\n",
@@ -8667,7 +10261,9 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
*
* parse the XML version value.
*
- * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
+ * [26] VersionNum ::= '1.' [0-9]+
+ *
+ * In practice allow [0-9].[0-9]+ at that level
*
* Returns the string giving the XML version number, or NULL
*/
@@ -8684,17 +10280,28 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
return(NULL);
}
cur = CUR;
- while (((cur >= 'a') && (cur <= 'z')) ||
- ((cur >= 'A') && (cur <= 'Z')) ||
- ((cur >= '0') && (cur <= '9')) ||
- (cur == '_') || (cur == '.') ||
- (cur == ':') || (cur == '-')) {
+ if (!((cur >= '0') && (cur <= '9'))) {
+ xmlFree(buf);
+ return(NULL);
+ }
+ buf[len++] = cur;
+ NEXT;
+ cur=CUR;
+ if (cur != '.') {
+ xmlFree(buf);
+ return(NULL);
+ }
+ buf[len++] = cur;
+ NEXT;
+ cur=CUR;
+ while ((cur >= '0') && (cur <= '9')) {
if (len + 1 >= size) {
xmlChar *tmp;
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
+ xmlFree(buf);
xmlErrMemory(ctxt, NULL);
return(NULL);
}
@@ -8711,11 +10318,11 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
/**
* xmlParseVersionInfo:
* @ctxt: an XML parser context
- *
+ *
* parse the XML version.
*
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
- *
+ *
* [25] Eq ::= S? '=' S?
*
* Returns the version string, e.g. "1.0"
@@ -8780,7 +10387,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
xmlErrMemory(ctxt, NULL);
return(NULL);
}
-
+
buf[len++] = cur;
NEXT;
cur = CUR;
@@ -8820,7 +10427,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
/**
* xmlParseEncodingDecl:
* @ctxt: an XML parser context
- *
+ *
* parse the XML encoding declaration
*
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
@@ -8849,6 +10456,8 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
encoding = xmlParseEncName(ctxt);
if (RAW != '"') {
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
+ xmlFree((xmlChar *) encoding);
+ return(NULL);
} else
NEXT;
} else if (RAW == '\''){
@@ -8856,11 +10465,22 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
encoding = xmlParseEncName(ctxt);
if (RAW != '\'') {
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
+ xmlFree((xmlChar *) encoding);
+ return(NULL);
} else
NEXT;
} else {
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
}
+
+ /*
+ * Non standard parsing, allowing the user to ignore encoding
+ */
+ if (ctxt->options & XML_PARSE_IGNORE_ENC) {
+ xmlFree((xmlChar *) encoding);
+ return(NULL);
+ }
+
/*
* UTF-16 encoding stwich has already taken place at this stage,
* more over the little-endian/big-endian selection is already done
@@ -8868,6 +10488,18 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
if ((encoding != NULL) &&
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
+ /*
+ * If no encoding was passed to the parser, that we are
+ * using UTF-16 and no decoder is present i.e. the
+ * document is apparently UTF-8 compatible, then raise an
+ * encoding mismatch fatal error
+ */
+ if ((ctxt->encoding == NULL) &&
+ (ctxt->input->buf != NULL) &&
+ (ctxt->input->buf->encoder == NULL)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
+ "Document labelled UTF-16 but has UTF-8 content\n");
+ }
if (ctxt->encoding != NULL)
xmlFree((xmlChar *) ctxt->encoding);
ctxt->encoding = encoding;
@@ -8891,7 +10523,11 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
handler = xmlFindCharEncodingHandler((const char *) encoding);
if (handler != NULL) {
- xmlSwitchToEncoding(ctxt, handler);
+ if (xmlSwitchToEncoding(ctxt, handler) < 0) {
+ /* failed to convert */
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ return(NULL);
+ }
} else {
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"Unsupported encoding %s\n", encoding);
@@ -8909,7 +10545,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
* parse the XML standalone declaration
*
* [32] SDDecl ::= S 'standalone' Eq
- * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
+ * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
*
* [ VC: Standalone Document Declaration ]
* TODO The standalone document declaration must have the value "no"
@@ -8925,12 +10561,17 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
* - element types with element content, if white space occurs directly
* within any instance of those types.
*
- * Returns 1 if standalone, 0 otherwise
+ * Returns:
+ * 1 if standalone="yes"
+ * 0 if standalone="no"
+ * -2 if standalone attribute is missing or invalid
+ * (A standalone value of -2 means that the XML declaration was found,
+ * but no value was specified for the standalone attribute).
*/
int
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
- int standalone = -1;
+ int standalone = -2;
SKIP_BLANKS;
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
@@ -8984,7 +10625,7 @@ xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
/**
* xmlParseXMLDecl:
* @ctxt: an XML parser context
- *
+ *
* parse an XML declaration header
*
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
@@ -9021,11 +10662,23 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
} else {
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
/*
- * TODO: Blueberry should be detected here
+ * Changed here for XML-1.0 5th edition
*/
- xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
- "Unsupported version '%s'\n",
- version, NULL);
+ if (ctxt->options & XML_PARSE_OLD10) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
+ "Unsupported version '%s'\n",
+ version);
+ } else {
+ if ((version[0] == '1') && ((version[1] == '.'))) {
+ xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
+ "Unsupported version '%s'\n",
+ version, NULL);
+ } else {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
+ "Unsupported version '%s'\n",
+ version);
+ }
+ }
}
if (ctxt->version != NULL)
xmlFree((void *) ctxt->version);
@@ -9043,7 +10696,8 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
}
xmlParseEncodingDecl(ctxt);
- if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+ if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
+ (ctxt->instate == XML_PARSER_EOF)) {
/*
* The XML REC instructs us to stop parsing right here
*/
@@ -9060,6 +10714,12 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
}
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
}
+
+ /*
+ * We can grow the input buffer freely at that point
+ */
+ GROW;
+
SKIP_BLANKS;
ctxt->input->standalone = xmlParseSDDecl(ctxt);
@@ -9080,7 +10740,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
/**
* xmlParseMisc:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Misc* optional field.
*
* [27] Misc ::= Comment | PI | S
@@ -9088,9 +10748,10 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
void
xmlParseMisc(xmlParserCtxtPtr ctxt) {
- while (((RAW == '<') && (NXT(1) == '?')) ||
- (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
- IS_BLANK_CH(CUR)) {
+ while ((ctxt->instate != XML_PARSER_EOF) &&
+ (((RAW == '<') && (NXT(1) == '?')) ||
+ (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
+ IS_BLANK_CH(CUR))) {
if ((RAW == '<') && (NXT(1) == '?')) {
xmlParsePI(ctxt);
} else if (IS_BLANK_CH(CUR)) {
@@ -9103,7 +10764,7 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) {
/**
* xmlParseDocument:
* @ctxt: an XML parser context
- *
+ *
* parse an XML document (and build a tree if using the standard SAX
* interface).
*
@@ -9137,10 +10798,12 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
- if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
+ if ((ctxt->encoding == NULL) &&
((ctxt->input->end - ctxt->input->cur) >= 4)) {
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -9158,19 +10821,26 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
if (CUR == 0) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
+ return(-1);
}
/*
* Check for the XMLDecl in the Prolog.
+ * do not GROW here to avoid the detected encoder to decode more
+ * than just the first line, unless the amount of data is really
+ * too small to hold "<?xml version="1.0" encoding="foo"
*/
- GROW;
+ if ((ctxt->input->end - ctxt->input->cur) < 35) {
+ GROW;
+ }
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
/*
* Note that we will switch encoding on the fly.
*/
xmlParseXMLDecl(ctxt);
- if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+ if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
+ (ctxt->instate == XML_PARSER_EOF)) {
/*
* The XML REC instructs us to stop parsing right here
*/
@@ -9183,6 +10853,12 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
}
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
+ if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
+ (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
+ ctxt->myDoc->compression = ctxt->input->buf->compressed;
+ }
/*
* The Misc part of the Prolog
@@ -9202,6 +10878,8 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
if (RAW == '[') {
ctxt->instate = XML_PARSER_DTD;
xmlParseInternalSubset(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
}
/*
@@ -9212,8 +10890,11 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
(!ctxt->disableSAX))
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
ctxt->extSubSystem, ctxt->extSubURI);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
ctxt->inSubset = 0;
+ xmlCleanSpecialAttr(ctxt);
ctxt->instate = XML_PARSER_PROLOG;
xmlParseMisc(ctxt);
@@ -9258,6 +10939,15 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
ctxt->myDoc = NULL;
}
+ if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
+ ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
+ if (ctxt->valid)
+ ctxt->myDoc->properties |= XML_DOC_DTDVALID;
+ if (ctxt->nsWellFormed)
+ ctxt->myDoc->properties |= XML_DOC_NSVALID;
+ if (ctxt->options & XML_PARSE_OLD10)
+ ctxt->myDoc->properties |= XML_DOC_OLD10;
+ }
if (! ctxt->wellFormed) {
ctxt->valid = 0;
return(-1);
@@ -9268,7 +10958,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
/**
* xmlParseExtParsedEnt:
* @ctxt: an XML parser context
- *
+ *
* parse a general parsed entity
* An external general parsed entity is well-formed if it matches the
* production labeled extParsedEnt.
@@ -9299,7 +10989,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -9342,6 +11032,8 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
}
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
/*
* Doing validity checking on chunk doesn't make sense
@@ -9352,7 +11044,9 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
ctxt->depth = 0;
xmlParseContent(ctxt);
-
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
+
if ((RAW == '<') && (NXT(1) == '/')) {
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
} else if (RAW != 0) {
@@ -9372,7 +11066,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
#ifdef LIBXML_PUSH_ENABLED
/************************************************************************
* *
- * Progressive parsing interfaces *
+ * Progressive parsing interfaces *
* *
************************************************************************/
@@ -9409,8 +11103,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
buf = in->base;
len = in->length;
} else {
- buf = in->buf->buffer->content;
- len = in->buf->buffer->use;
+ buf = xmlBufContent(in->buf->buffer);
+ len = xmlBufUse(in->buf->buffer);
}
/* take into account the sequence length */
if (third) len -= 2;
@@ -9433,7 +11127,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c' found at %d\n",
first, next, base);
- else
+ else
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c%c' found at %d\n",
first, next, third, base);
@@ -9449,7 +11143,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
else if (third == 0)
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c' failed\n", first, next);
- else
+ else
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c%c' failed\n", first, next, third);
#endif
@@ -9531,7 +11225,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) {
if ((utf == NULL) || (len <= 0))
return(0);
-
+
for (ix = 0; ix < len;) { /* string is 0-terminated */
c = utf[ix];
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
@@ -9542,7 +11236,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) {
else
return(-ix);
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
- if (ix + 2 > len) return(ix);
+ if (ix + 2 > len) return(-ix);
if ((utf[ix+1] & 0xc0 ) != 0x80)
return(-ix);
codepoint = (utf[ix] & 0x1f) << 6;
@@ -9551,7 +11245,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) {
return(-ix);
ix += 2;
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
- if (ix + 3 > len) return(ix);
+ if (ix + 3 > len) return(-ix);
if (((utf[ix+1] & 0xc0) != 0x80) ||
((utf[ix+2] & 0xc0) != 0x80))
return(-ix);
@@ -9562,7 +11256,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) {
return(-ix);
ix += 3;
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
- if (ix + 4 > len) return(ix);
+ if (ix + 4 > len) return(-ix);
if (((utf[ix+1] & 0xc0) != 0x80) ||
((utf[ix+2] & 0xc0) != 0x80) ||
((utf[ix+3] & 0xc0) != 0x80))
@@ -9659,11 +11353,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
}
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
- while (1) {
+ while (ctxt->instate != XML_PARSER_EOF) {
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(0);
-
+
/*
* Pop-up of finished entities.
*/
@@ -9678,22 +11372,22 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
/*
* If we are operating on converted input, try to flush
* remainng chars to avoid them stalling in the non-converted
- * buffer.
+ * buffer. But do not do this in document start where
+ * encoding="..." may not have been read and we work on a
+ * guessed encoding.
*/
- if ((ctxt->input->buf->raw != NULL) &&
- (ctxt->input->buf->raw->use > 0)) {
- int base = ctxt->input->base -
- ctxt->input->buf->buffer->content;
- int current = ctxt->input->cur - ctxt->input->base;
+ if ((ctxt->instate != XML_PARSER_START) &&
+ (ctxt->input->buf->raw != NULL) &&
+ (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
+ ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + current;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[
- ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
+ base, current);
}
- avail = ctxt->input->buf->buffer->use -
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
}
if (avail < 1)
@@ -9715,7 +11409,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (avail < 4)
goto done;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines,
@@ -9740,7 +11434,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->setDocumentLocator(ctxt->userData,
&xmlDefaultSAXLocator);
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EOF\n");
@@ -9773,7 +11467,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
* The XML REC instructs us to stop parsing right
* here
*/
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
return(0);
}
ctxt->standalone = ctxt->input->standalone;
@@ -9820,8 +11514,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
break;
case XML_PARSER_START_TAG: {
const xmlChar *name;
- const xmlChar *prefix;
- const xmlChar *URI;
+ const xmlChar *prefix = NULL;
+ const xmlChar *URI = NULL;
int nsNr = ctxt->nsNr;
if ((avail < 2) && (ctxt->inputNr == 1))
@@ -9829,7 +11523,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
cur = ctxt->input->cur[0];
if (cur != '<') {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
goto done;
@@ -9857,9 +11551,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
else
name = xmlParseStartTag(ctxt);
#endif /* LIBXML_SAX1_ENABLED */
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
if (name == NULL) {
spacePop(ctxt);
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
goto done;
@@ -9868,7 +11564,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match
- * the element type of the root element.
+ * the element type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
@@ -9897,12 +11593,15 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->endElement(ctxt->userData, name);
#endif /* LIBXML_SAX1_ENABLED */
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
spacePop(ctxt);
if (ctxt->nameNr == 0) {
ctxt->instate = XML_PARSER_EPILOG;
} else {
ctxt->instate = XML_PARSER_CONTENT;
}
+ ctxt->progressive = 1;
break;
}
if (RAW == '>') {
@@ -9922,6 +11621,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
#endif /* LIBXML_SAX1_ENABLED */
ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
break;
}
case XML_PARSER_CONTENT: {
@@ -9939,9 +11639,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
break;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
xmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next != '!')) {
ctxt->instate = XML_PARSER_START_TAG;
break;
@@ -9955,10 +11659,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->input->cur += 4;
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
ctxt->input->cur -= 4;
- if ((!terminate) && (term < 0))
+ if ((!terminate) && (term < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
(ctxt->input->cur[2] == '[') &&
(ctxt->input->cur[3] == 'C') &&
@@ -10015,7 +11722,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"detected an error in element content\n");
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
break;
}
break;
@@ -10043,7 +11750,9 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
else
xmlParseEndTag1(ctxt, 0);
#endif /* LIBXML_SAX1_ENABLED */
- if (ctxt->nameNr == 0) {
+ if (ctxt->instate == XML_PARSER_EOF) {
+ /* Nothing */
+ } else if (ctxt->nameNr == 0) {
ctxt->instate = XML_PARSER_EPILOG;
} else {
ctxt->instate = XML_PARSER_CONTENT;
@@ -10051,7 +11760,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
break;
case XML_PARSER_CDATA_SECTION: {
/*
- * The Push mode need to have the SAX callback for
+ * The Push mode need to have the SAX callback for
* cdataBlock merge back contiguous callbacks.
*/
int base;
@@ -10061,7 +11770,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
int tmp;
- tmp = xmlCheckCdataPush(ctxt->input->cur,
+ tmp = xmlCheckCdataPush(ctxt->input->cur,
XML_PARSER_BIG_BUFFER_SIZE);
if (tmp < 0) {
tmp = -tmp;
@@ -10076,6 +11785,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->characters(ctxt->userData,
ctxt->input->cur, tmp);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
SKIPL(tmp);
ctxt->checkIndex = 0;
}
@@ -10089,7 +11800,20 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->input->cur += tmp;
goto encoding_error;
}
- if ((ctxt->sax != NULL) && (base > 0) &&
+ if ((ctxt->sax != NULL) && (base == 0) &&
+ (ctxt->sax->cdataBlock != NULL) &&
+ (!ctxt->disableSAX)) {
+ /*
+ * Special case to provide identical behaviour
+ * between pull and push parsers on enpty CDATA
+ * sections
+ */
+ if ((ctxt->input->cur - ctxt->input->base >= 9) &&
+ (!strncmp((const char *)&ctxt->input->cur[-9],
+ "<![CDATA[", 9)))
+ ctxt->sax->cdataBlock(ctxt->userData,
+ BAD_CAST "", 0);
+ } else if ((ctxt->sax != NULL) && (base > 0) &&
(!ctxt->disableSAX)) {
if (ctxt->sax->cdataBlock != NULL)
ctxt->sax->cdataBlock(ctxt->userData,
@@ -10098,6 +11822,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->characters(ctxt->userData,
ctxt->input->cur, base);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
SKIPL(base + 3);
ctxt->checkIndex = 0;
ctxt->instate = XML_PARSER_CONTENT;
@@ -10114,7 +11840,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use -
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
@@ -10122,25 +11848,38 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
+ ctxt->instate = XML_PARSER_MISC;
+ ctxt->progressive = 1;
+ ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_MISC;
+ ctxt->progressive = 1;
+ ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == 'D') &&
(ctxt->input->cur[3] == 'O') &&
@@ -10150,14 +11889,20 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
(ctxt->input->cur[7] == 'P') &&
(ctxt->input->cur[8] == 'E')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
+ ctxt->progressive = XML_PARSER_DTD;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing internal subset\n");
#endif
ctxt->inSubset = 1;
+ ctxt->progressive = 0;
+ ctxt->checkIndex = 0;
xmlParseDocTypeDecl(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
if (RAW == '[') {
ctxt->instate = XML_PARSER_DTD;
#ifdef DEBUG_PUSH
@@ -10175,6 +11920,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->intSubName, ctxt->extSubSystem,
ctxt->extSubURI);
ctxt->inSubset = 0;
+ xmlCleanSpecialAttr(ctxt);
ctxt->instate = XML_PARSER_PROLOG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -10186,7 +11932,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
- ctxt->progressive = 1;
+ ctxt->progressive = XML_PARSER_START_TAG;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -10199,38 +11945,50 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 2)
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
+ if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
+ ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
if (ctxt->progressive == 0)
- ctxt->progressive = 1;
+ ctxt->progressive = XML_PARSER_START_TAG;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -10243,38 +12001,49 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_EPILOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_EPILOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
} else {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
- ctxt->instate = XML_PARSER_EOF;
+ xmlHaltParser(ctxt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EOF\n");
@@ -10298,29 +12067,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int base, i;
xmlChar *buf;
xmlChar quote = 0;
+ size_t use;
base = ctxt->input->cur - ctxt->input->base;
if (base < 0) return(0);
if (ctxt->checkIndex > base)
base = ctxt->checkIndex;
- buf = ctxt->input->buf->buffer->content;
- for (;(unsigned int) base < ctxt->input->buf->buffer->use;
- base++) {
+ buf = xmlBufContent(ctxt->input->buf->buffer);
+ use = xmlBufUse(ctxt->input->buf->buffer);
+ for (;(unsigned int) base < use; base++) {
if (quote != 0) {
if (buf[base] == quote)
quote = 0;
- continue;
+ continue;
}
if ((quote == 0) && (buf[base] == '<')) {
int found = 0;
/* special handling of comments */
- if (((unsigned int) base + 4 <
- ctxt->input->buf->buffer->use) &&
+ if (((unsigned int) base + 4 < use) &&
(buf[base + 1] == '!') &&
(buf[base + 2] == '-') &&
(buf[base + 3] == '-')) {
- for (;(unsigned int) base + 3 <
- ctxt->input->buf->buffer->use; base++) {
+ for (;(unsigned int) base + 3 < use; base++) {
if ((buf[base] == '-') &&
(buf[base + 1] == '-') &&
(buf[base + 2] == '>')) {
@@ -10351,17 +12119,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "%c%c%c%c: ", buf[base],
buf[base + 1], buf[base + 2], buf[base + 3]);
#endif
- if ((unsigned int) base +1 >=
- ctxt->input->buf->buffer->use)
+ if ((unsigned int) base +1 >= use)
break;
if (buf[base + 1] == ']') {
/* conditional crap, skip both ']' ! */
base++;
continue;
}
- for (i = 1;
- (unsigned int) base + i < ctxt->input->buf->buffer->use;
- i++) {
+ for (i = 1; (unsigned int) base + i < use; i++) {
if (buf[base + i] == '>') {
#if 0
fprintf(stderr, "found\n");
@@ -10379,7 +12144,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "end of stream\n");
#endif
break;
-
+
}
not_end_of_int_subset:
continue; /* for */
@@ -10387,6 +12152,10 @@ not_end_of_int_subset:
/*
* We didn't found the end of the Internal subset
*/
+ if (quote == 0)
+ ctxt->checkIndex = base;
+ else
+ ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
if (next == 0)
xmlGenericError(xmlGenericErrorContext,
@@ -10395,13 +12164,19 @@ not_end_of_int_subset:
goto done;
found_end_int_subset:
+ ctxt->checkIndex = 0;
xmlParseInternalSubset(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->inSubset = 2;
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
(ctxt->sax->externalSubset != NULL))
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
ctxt->extSubSystem, ctxt->extSubURI);
ctxt->inSubset = 0;
+ xmlCleanSpecialAttr(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_PROLOG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
@@ -10484,7 +12259,7 @@ found_end_int_subset:
break;
}
}
-done:
+done:
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
#endif
@@ -10504,6 +12279,55 @@ encoding_error:
}
/**
+ * xmlParseCheckTransition:
+ * @ctxt: an XML parser context
+ * @chunk: a char array
+ * @size: the size in byte of the chunk
+ *
+ * Check depending on the current parser state if the chunk given must be
+ * processed immediately or one need more data to advance on parsing.
+ *
+ * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
+ */
+static int
+xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
+ if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
+ return(-1);
+ if (ctxt->instate == XML_PARSER_START_TAG) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->progressive == XML_PARSER_COMMENT) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->progressive == XML_PARSER_PI) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->instate == XML_PARSER_END_TAG) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if ((ctxt->progressive == XML_PARSER_DTD) ||
+ (ctxt->instate == XML_PARSER_DTD)) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ return(1);
+}
+
+/**
* xmlParseChunk:
* @ctxt: an XML parser context
* @chunk: an char array
@@ -10518,11 +12342,16 @@ int
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
int terminate) {
int end_in_lf = 0;
+ int remain = 0;
+ size_t old_avail = 0;
+ size_t avail = 0;
if (ctxt == NULL)
return(XML_ERR_INTERNAL_ERROR);
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(ctxt->errNo);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
if (ctxt->instate == XML_PARSER_START)
xmlDetectSAX2(ctxt);
if ((size > 0) && (chunk != NULL) && (!terminate) &&
@@ -10530,22 +12359,58 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
end_in_lf = 1;
size--;
}
+
+xmldecl_done:
+
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
int res;
-
- res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+
+ old_avail = xmlBufUse(ctxt->input->buf->buffer);
+ /*
+ * Specific handling if we autodetected an encoding, we should not
+ * push more than the first line ... which depend on the encoding
+ * And only push the rest once the final encoding was detected
+ */
+ if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
+ (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
+ unsigned int len = 45;
+
+ if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+ BAD_CAST "UTF-16")) ||
+ (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+ BAD_CAST "UTF16")))
+ len = 90;
+ else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+ BAD_CAST "UCS-4")) ||
+ (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+ BAD_CAST "UCS4")))
+ len = 180;
+
+ if (ctxt->input->buf->rawconsumed < len)
+ len -= ctxt->input->buf->rawconsumed;
+
+ /*
+ * Change size for reading the initial declaration only
+ * if size is greater than len. Otherwise, memmove in xmlBufferAdd
+ * will blindly copy extra bytes from memory.
+ */
+ if ((unsigned int) size > len) {
+ remain = size - len;
+ size = len;
+ } else {
+ remain = 0;
+ }
+ }
+ res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
if (res < 0) {
ctxt->errNo = XML_PARSER_EOF;
- ctxt->disableSAX = 1;
+ xmlHaltParser(ctxt);
return (XML_PARSER_EOF);
}
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@@ -10556,44 +12421,88 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
if ((in->encoder != NULL) && (in->buffer != NULL) &&
(in->raw != NULL)) {
int nbchars;
-
- nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+ size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
+
+ nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
/* TODO 2.6.0 */
xmlGenericError(xmlGenericErrorContext,
"xmlParseChunk: encoder error\n");
return(XML_ERR_INVALID_ENCODING);
}
+ xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
}
}
}
- xmlParseTryOrFinish(ctxt, terminate);
+ if (remain != 0) {
+ xmlParseTryOrFinish(ctxt, 0);
+ } else {
+ if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
+ avail = xmlBufUse(ctxt->input->buf->buffer);
+ /*
+ * Depending on the current state it may not be such
+ * a good idea to try parsing if there is nothing in the chunk
+ * which would be worth doing a parser state transition and we
+ * need to wait for more data
+ */
+ if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
+ (old_avail == 0) || (avail == 0) ||
+ (xmlParseCheckTransition(ctxt,
+ (const char *)&ctxt->input->base[old_avail],
+ avail - old_avail)))
+ xmlParseTryOrFinish(ctxt, terminate);
+ }
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(ctxt->errNo);
+
+ if ((ctxt->input != NULL) &&
+ (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
+ ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
+ xmlHaltParser(ctxt);
+ }
+ if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
+ return(ctxt->errNo);
+
+ if (remain != 0) {
+ chunk += size;
+ size = remain;
+ remain = 0;
+ goto xmldecl_done;
+ }
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
+ ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
+
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
+
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
+ base, current);
}
- if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
- return(ctxt->errNo);
if (terminate) {
/*
* Check for termination
*/
- int avail = 0;
+ int cur_avail = 0;
if (ctxt->input != NULL) {
if (ctxt->input->buf == NULL)
- avail = ctxt->input->length -
- (ctxt->input->cur - ctxt->input->base);
+ cur_avail = ctxt->input->length -
+ (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use -
- (ctxt->input->cur - ctxt->input->base);
+ cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
}
-
+
if ((ctxt->instate != XML_PARSER_EOF) &&
(ctxt->instate != XML_PARSER_EPILOG)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
- }
- if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
+ }
+ if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
}
if (ctxt->instate != XML_PARSER_EOF) {
@@ -10602,12 +12511,15 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
}
ctxt->instate = XML_PARSER_EOF;
}
- return((xmlParserErrors) ctxt->errNo);
+ if (ctxt->wellFormed == 0)
+ return((xmlParserErrors) ctxt->errNo);
+ else
+ return(0);
}
/************************************************************************
* *
- * I/O front end functions to the parser *
+ * I/O front end functions to the parser *
* *
************************************************************************/
@@ -10631,7 +12543,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
*/
xmlParserCtxtPtr
-xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
+xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
const char *chunk, int size, const char *filename) {
xmlParserCtxtPtr ctxt;
xmlParserInputPtr inputStream;
@@ -10680,7 +12592,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
if (user_data != NULL)
ctxt->userData = user_data;
- }
+ }
if (filename == NULL) {
ctxt->directory = NULL;
} else {
@@ -10706,11 +12618,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
}
}
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
-
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
inputPush(ctxt, inputStream);
/*
@@ -10721,15 +12629,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
if ((size == 0) || (chunk == NULL)) {
ctxt->charset = XML_CHAR_ENCODING_NONE;
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
- xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+ xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@@ -10744,24 +12649,47 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
#endif /* LIBXML_PUSH_ENABLED */
/**
- * xmlStopParser:
+ * xmlHaltParser:
* @ctxt: an XML parser context
*
- * Blocks further parser processing
+ * Blocks further parser processing don't override error
+ * for internal use
*/
-void
-xmlStopParser(xmlParserCtxtPtr ctxt) {
+static void
+xmlHaltParser(xmlParserCtxtPtr ctxt) {
if (ctxt == NULL)
return;
ctxt->instate = XML_PARSER_EOF;
ctxt->disableSAX = 1;
if (ctxt->input != NULL) {
+ /*
+ * in case there was a specific allocation deallocate before
+ * overriding base
+ */
+ if (ctxt->input->free != NULL) {
+ ctxt->input->free((xmlChar *) ctxt->input->base);
+ ctxt->input->free = NULL;
+ }
ctxt->input->cur = BAD_CAST"";
ctxt->input->base = ctxt->input->cur;
}
}
/**
+ * xmlStopParser:
+ * @ctxt: an XML parser context
+ *
+ * Blocks further parser processing
+ */
+void
+xmlStopParser(xmlParserCtxtPtr ctxt) {
+ if (ctxt == NULL)
+ return;
+ xmlHaltParser(ctxt);
+ ctxt->errNo = XML_ERR_USER_STOP;
+}
+
+/**
* xmlCreateIOParserCtxt:
* @sax: a SAX handler
* @user_data: The user data returned on SAX callbacks
@@ -10782,11 +12710,15 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
xmlParserCtxtPtr ctxt;
xmlParserInputPtr inputStream;
xmlParserInputBufferPtr buf;
-
+
if (ioread == NULL) return(NULL);
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
- if (buf == NULL) return(NULL);
+ if (buf == NULL) {
+ if (ioclose != NULL)
+ ioclose(ioctx);
+ return (NULL);
+ }
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
@@ -10811,7 +12743,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
if (user_data != NULL)
ctxt->userData = user_data;
- }
+ }
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
if (inputStream == NULL) {
@@ -10826,7 +12758,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
#ifdef LIBXML_VALID_ENABLED
/************************************************************************
* *
- * Front ends when parsing a DTD *
+ * Front ends when parsing a DTD *
* *
************************************************************************/
@@ -10837,7 +12769,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
* @enc: the charset encoding if known
*
* Load and parse a DTD
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
* @input will be freed by the function in any case.
*/
@@ -10859,10 +12791,13 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
return(NULL);
}
+ /* We are loading a DTD */
+ ctxt->options |= XML_PARSE_DTDLOAD;
+
/*
* Set-up the SAX context
*/
- if (sax != NULL) {
+ if (sax != NULL) {
if (ctxt->sax != NULL)
xmlFree(ctxt->sax);
ctxt->sax = sax;
@@ -10885,7 +12820,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
/*
* plug some encoding conversion routines here.
*/
- xmlPushInput(ctxt, pinput);
+ if (xmlPushInput(ctxt, pinput) < 0) {
+ if (sax != NULL) ctxt->sax = NULL;
+ xmlFreeParserCtxt(ctxt);
+ return(NULL);
+ }
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
@@ -10902,12 +12841,17 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
*/
ctxt->inSubset = 2;
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
+ if (ctxt->myDoc == NULL) {
+ xmlErrMemory(ctxt, "New Doc failed");
+ return(NULL);
+ }
+ ctxt->myDoc->properties = XML_DOC_INTERNAL;
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
BAD_CAST "none", BAD_CAST "none");
if ((enc == XML_CHAR_ENCODING_NONE) &&
((ctxt->input->end - ctxt->input->cur) >= 4)) {
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -10946,7 +12890,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
}
if (sax != NULL) ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -10957,7 +12901,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
* @SystemID: a NAME* containing the URL to the DTD
*
* Load and parse an external subset.
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
*/
@@ -10977,16 +12921,19 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
return(NULL);
}
+ /* We are loading a DTD */
+ ctxt->options |= XML_PARSE_DTDLOAD;
+
/*
* Set-up the SAX context
*/
- if (sax != NULL) {
+ if (sax != NULL) {
if (ctxt->sax != NULL)
xmlFree(ctxt->sax);
ctxt->sax = sax;
ctxt->userData = ctxt;
}
-
+
/*
* Canonicalise the system ID
*/
@@ -11014,7 +12961,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
/*
* plug some encoding conversion routines here.
*/
- xmlPushInput(ctxt, input);
+ if (xmlPushInput(ctxt, input) < 0) {
+ if (sax != NULL) ctxt->sax = NULL;
+ xmlFreeParserCtxt(ctxt);
+ if (systemIdCanonic != NULL)
+ xmlFree(systemIdCanonic);
+ return(NULL);
+ }
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
xmlSwitchEncoding(ctxt, enc);
@@ -11035,6 +12988,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
*/
ctxt->inSubset = 2;
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
+ if (ctxt->myDoc == NULL) {
+ xmlErrMemory(ctxt, "New Doc failed");
+ if (sax != NULL) ctxt->sax = NULL;
+ xmlFreeParserCtxt(ctxt);
+ return(NULL);
+ }
+ ctxt->myDoc->properties = XML_DOC_INTERNAL;
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
ExternalID, SystemID);
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
@@ -11061,7 +13021,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
}
if (sax != NULL) ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -11072,7 +13032,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
* @SystemID: a NAME* containing the URL to the DTD
*
* Load and parse an external subset.
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
*/
@@ -11084,7 +13044,7 @@ xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
/************************************************************************
* *
- * Front ends when parsing an Entity *
+ * Front ends when parsing an Entity *
* *
************************************************************************/
@@ -11115,12 +13075,11 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
int ret = 0;
xmlChar start[4];
xmlCharEncoding enc;
- xmlParserInputPtr inputStream;
- char *directory = NULL;
if (ctx == NULL) return(-1);
- if (ctx->depth > 40) {
+ if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
+ (ctx->depth > 1024)) {
return(XML_ERR_ENTITY_LOOP);
}
@@ -11131,27 +13090,11 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
return(-1);
- ctxt = xmlNewParserCtxt();
+ ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
if (ctxt == NULL) {
return(-1);
}
-
- ctxt->userData = ctxt;
- ctxt->_private = ctx->_private;
-
- inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
- if (inputStream == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(-1);
- }
-
- inputPush(ctxt, inputStream);
- if ((ctxt->directory == NULL) && (directory == NULL))
- directory = xmlParserGetDirectory((char *)URL);
- if ((ctxt->directory == NULL) && (directory != NULL))
- ctxt->directory = directory;
-
oldsax = ctxt->sax;
ctxt->sax = ctx->sax;
xmlDetectSAX2(ctxt);
@@ -11160,6 +13103,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
xmlFreeParserCtxt(ctxt);
return(-1);
}
+ newDoc->properties = XML_DOC_INTERNAL;
if (ctx->myDoc->dict) {
newDoc->dict = ctx->myDoc->dict;
xmlDictReference(newDoc->dict);
@@ -11189,7 +13133,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
newDoc->children->doc = ctx->myDoc;
}
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -11211,9 +13155,27 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
*/
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
xmlParseTextDecl(ctxt);
+ /*
+ * An XML-1.0 document can't reference an entity not XML-1.0
+ */
+ if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
+ (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
+ xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
+ "Version mismatch between document and entity\n");
+ }
}
/*
+ * If the user provided its own SAX callbacks then reuse the
+ * useData callback field, otherwise the expected setup in a
+ * DOM builder is to have userData == ctxt
+ */
+ if (ctx->userData == ctx)
+ ctxt->userData = ctxt;
+ else
+ ctxt->userData = ctx->userData;
+
+ /*
* Doing validity checking on chunk doesn't make sense
*/
ctxt->instate = XML_PARSER_CONTENT;
@@ -11244,7 +13206,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
ctxt->linenumbers = ctx->linenumbers;
xmlParseContent(ctxt);
-
+
ctx->validate = ctxt->validate;
ctx->valid = ctxt->valid;
if ((RAW == '<') && (NXT(1) == '/')) {
@@ -11287,7 +13249,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
newDoc->intSubset = NULL;
newDoc->extSubset = NULL;
xmlFreeDoc(newDoc);
-
+
return(ret);
}
@@ -11321,12 +13283,12 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
xmlChar start[4];
xmlCharEncoding enc;
- if (depth > 40) {
+ if (((depth > 40) &&
+ ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
+ (depth > 1024)) {
return(XML_ERR_ENTITY_LOOP);
}
-
-
if (list != NULL)
*list = NULL;
if ((URL == NULL) && (ID == NULL))
@@ -11335,7 +13297,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
return(XML_ERR_INTERNAL_ERROR);
- ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
+ ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
ctxt->userData = ctxt;
if (oldctxt != NULL) {
@@ -11372,6 +13334,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
xmlFreeParserCtxt(ctxt);
return(XML_ERR_INTERNAL_ERROR);
}
+ newDoc->properties = XML_DOC_INTERNAL;
newDoc->intSubset = doc->intSubset;
newDoc->extSubset = doc->extSubset;
newDoc->dict = doc->dict;
@@ -11398,7 +13361,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
ctxt->myDoc = doc;
newRoot->doc = doc;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -11426,7 +13389,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
ctxt->depth = depth;
xmlParseContent(ctxt);
-
+
if ((RAW == '<') && (NXT(1) == '/')) {
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
} else if (RAW != 0) {
@@ -11459,11 +13422,34 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
}
ret = XML_ERR_OK;
}
- if (sax != NULL)
+
+ /*
+ * Record in the parent context the number of entities replacement
+ * done when parsing that reference.
+ */
+ if (oldctxt != NULL)
+ oldctxt->nbentities += ctxt->nbentities;
+
+ /*
+ * Also record the size of the entity parsed
+ */
+ if (ctxt->input != NULL && oldctxt != NULL) {
+ oldctxt->sizeentities += ctxt->input->consumed;
+ oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
+ }
+ /*
+ * And record the last error if any
+ */
+ if (ctxt->lastError.code != XML_ERR_OK)
+ xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
+
+ if (sax != NULL)
ctxt->sax = oldsax;
- oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
- oldctxt->node_seq.length = ctxt->node_seq.length;
- oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
+ if (oldctxt != NULL) {
+ oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
+ oldctxt->node_seq.length = ctxt->node_seq.length;
+ oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
+ }
ctxt->node_seq.maximum = 0;
ctxt->node_seq.length = 0;
ctxt->node_seq.buffer = NULL;
@@ -11471,7 +13457,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
newDoc->intSubset = NULL;
newDoc->extSubset = NULL;
xmlFreeDoc(newDoc);
-
+
return(ret);
}
@@ -11548,9 +13534,9 @@ xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
*
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
* error code otherwise
- *
+ *
* In case recover is set to 1, the nodelist will not be empty even if
- * the parsed chunk is not well balanced.
+ * the parsed chunk is not well balanced.
*/
static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
@@ -11563,8 +13549,12 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
xmlNodePtr last = NULL;
int size;
xmlParserErrors ret = XML_ERR_OK;
+#ifdef SAX2
+ int i;
+#endif
- if (oldctxt->depth > 40) {
+ if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
+ (oldctxt->depth > 1024)) {
return(XML_ERR_ENTITY_LOOP);
}
@@ -11588,12 +13578,19 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
+#ifdef SAX2
+ /* propagate namespaces down the entity */
+ for (i = 0;i < oldctxt->nsNr;i += 2) {
+ nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
+ }
+#endif
+
oldsax = ctxt->sax;
ctxt->sax = oldctxt->sax;
xmlDetectSAX2(ctxt);
ctxt->replaceEntities = oldctxt->replaceEntities;
ctxt->options = oldctxt->options;
-
+
ctxt->_private = oldctxt->_private;
if (oldctxt->myDoc == NULL) {
newDoc = xmlNewDoc(BAD_CAST "1.0");
@@ -11603,6 +13600,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
xmlFreeParserCtxt(ctxt);
return(XML_ERR_INTERNAL_ERROR);
}
+ newDoc->properties = XML_DOC_INTERNAL;
newDoc->dict = ctxt->dict;
xmlDictReference(newDoc->dict);
ctxt->myDoc = newDoc;
@@ -11658,7 +13656,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
} else {
ret = XML_ERR_OK;
}
-
+
if ((lst != NULL) && (ret == XML_ERR_OK)) {
xmlNodePtr cur;
@@ -11687,7 +13685,20 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
ctxt->myDoc->children = content;
ctxt->myDoc->last = last;
}
-
+
+ /*
+ * Record in the parent context the number of entities replacement
+ * done when parsing that reference.
+ */
+ if (oldctxt != NULL)
+ oldctxt->nbentities += ctxt->nbentities;
+
+ /*
+ * Also record the last error if any
+ */
+ if (ctxt->lastError.code != XML_ERR_OK)
+ xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
+
ctxt->sax = oldsax;
ctxt->dict = NULL;
ctxt->attsDefault = NULL;
@@ -11696,7 +13707,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
if (newDoc != NULL) {
xmlFreeDoc(newDoc);
}
-
+
return(ret);
}
@@ -11770,22 +13781,22 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
if (doc->type == XML_DOCUMENT_NODE)
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
#ifdef LIBXML_HTML_ENABLED
- else if (doc->type == XML_HTML_DOCUMENT_NODE)
+ else if (doc->type == XML_HTML_DOCUMENT_NODE) {
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
+ /*
+ * When parsing in context, it makes no sense to add implied
+ * elements like html/body/etc...
+ */
+ options |= HTML_PARSE_NOIMPLIED;
+ }
#endif
else
return(XML_ERR_INTERNAL_ERROR);
if (ctxt == NULL)
return(XML_ERR_NO_MEMORY);
- fake = xmlNewComment(NULL);
- if (fake == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(XML_ERR_NO_MEMORY);
- }
- xmlAddChild(node, fake);
- /*
+ /*
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
* we must wait until the last moment to free the original one.
@@ -11797,9 +13808,33 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
} else
options |= XML_PARSE_NODICT;
- xmlCtxtUseOptions(ctxt, options);
+ if (doc->encoding != NULL) {
+ xmlCharEncodingHandlerPtr hdlr;
+
+ if (ctxt->encoding != NULL)
+ xmlFree((xmlChar *) ctxt->encoding);
+ ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
+
+ hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
+ if (hdlr != NULL) {
+ xmlSwitchToEncoding(ctxt, hdlr);
+ } else {
+ return(XML_ERR_UNSUPPORTED_ENCODING);
+ }
+ }
+
+ xmlCtxtUseOptionsInternal(ctxt, options, NULL);
xmlDetectSAX2(ctxt);
ctxt->myDoc = doc;
+ /* parsing in context, i.e. as within existing content */
+ ctxt->instate = XML_PARSER_CONTENT;
+
+ fake = xmlNewComment(NULL);
+ if (fake == NULL) {
+ xmlFreeParserCtxt(ctxt);
+ return(XML_ERR_NO_MEMORY);
+ }
+ xmlAddChild(node, fake);
if (node->type == XML_ELEMENT_NODE) {
nodePush(ctxt, node);
@@ -11828,8 +13863,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
}
cur = cur->parent;
}
- ctxt->instate = XML_PARSER_CONTENT;
- }
+ }
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
/*
@@ -11864,12 +13898,12 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
} else {
ret = XML_ERR_OK;
}
-
+
/*
* Return the newly created nodeset after unlinking it from
* the pseudo sibling.
*/
-
+
cur = fake->next;
fake->next = NULL;
node->last = fake;
@@ -11897,7 +13931,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
if (doc->dict != NULL)
ctxt->dict = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
#else /* !SAX2 */
return(XML_ERR_INTERNAL_ERROR);
@@ -11925,13 +13959,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
*
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
* the parser error code otherwise
- *
+ *
* In case recover is set to 1, the nodelist will not be empty even if
- * the parsed chunk is not well balanced.
+ * the parsed chunk is not well balanced, assuming the parsing succeeded to
+ * some extent.
*/
int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
- void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
+ void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
int recover) {
xmlParserCtxtPtr ctxt;
xmlDocPtr newDoc;
@@ -11966,6 +14001,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
xmlFreeParserCtxt(ctxt);
return(-1);
}
+ newDoc->properties = XML_DOC_INTERNAL;
if ((doc != NULL) && (doc->dict != NULL)) {
xmlDictFree(ctxt->dict);
ctxt->dict = doc->dict;
@@ -11975,7 +14011,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
ctxt->dictNames = 1;
} else {
- xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
+ xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
}
if (doc != NULL) {
newDoc->intSubset = doc->intSubset;
@@ -12038,7 +14074,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
} else {
ret = 0;
}
-
+
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
xmlNodePtr cur;
@@ -12055,15 +14091,15 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
}
newDoc->children->children = NULL;
}
-
- if (sax != NULL)
+
+ if (sax != NULL)
ctxt->sax = oldsax;
xmlFreeParserCtxt(ctxt);
newDoc->intSubset = NULL;
newDoc->extSubset = NULL;
newDoc->oldNs = NULL;
xmlFreeDoc(newDoc);
-
+
return(ret);
}
@@ -12111,7 +14147,7 @@ xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -12135,10 +14171,11 @@ xmlParseEntity(const char *filename) {
#endif /* LIBXML_SAX1_ENABLED */
/**
- * xmlCreateEntityParserCtxt:
+ * xmlCreateEntityParserCtxtInternal:
* @URL: the entity URL
* @ID: the entity PUBLIC ID
* @base: a possible base for the target URI
+ * @pctx: parser context used to set options on new context
*
* Create a parser context for an external entity
* Automatic support for ZLIB/Compress compressed document is provided
@@ -12146,19 +14183,24 @@ xmlParseEntity(const char *filename) {
*
* Returns the new parser context or NULL
*/
-xmlParserCtxtPtr
-xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
- const xmlChar *base) {
+static xmlParserCtxtPtr
+xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
+ const xmlChar *base, xmlParserCtxtPtr pctx) {
xmlParserCtxtPtr ctxt;
xmlParserInputPtr inputStream;
char *directory = NULL;
xmlChar *uri;
-
+
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
return(NULL);
}
+ if (pctx != NULL) {
+ ctxt->options = pctx->options;
+ ctxt->_private = pctx->_private;
+ }
+
uri = xmlBuildURI(URL, base);
if (uri == NULL) {
@@ -12193,9 +14235,28 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
return(ctxt);
}
+/**
+ * xmlCreateEntityParserCtxt:
+ * @URL: the entity URL
+ * @ID: the entity PUBLIC ID
+ * @base: a possible base for the target URI
+ *
+ * Create a parser context for an external entity
+ * Automatic support for ZLIB/Compress compressed document is provided
+ * by default if found at compile-time.
+ *
+ * Returns the new parser context or NULL
+ */
+xmlParserCtxtPtr
+xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
+ const xmlChar *base) {
+ return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
+
+}
+
/************************************************************************
* *
- * Front ends when parsing from a file *
+ * Front ends when parsing from a file *
* *
************************************************************************/
@@ -12204,7 +14265,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
* @filename: the filename or URL
* @options: a combination of xmlParserOption
*
- * Create a parser context for a file or URL content.
+ * Create a parser context for a file or URL content.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time and for file accesses
*
@@ -12224,9 +14285,9 @@ xmlCreateURLParserCtxt(const char *filename, int options)
}
if (options)
- xmlCtxtUseOptions(ctxt, options);
+ xmlCtxtUseOptionsInternal(ctxt, options, NULL);
ctxt->linenumbers = 1;
-
+
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
if (inputStream == NULL) {
xmlFreeParserCtxt(ctxt);
@@ -12246,7 +14307,7 @@ xmlCreateURLParserCtxt(const char *filename, int options)
* xmlCreateFileParserCtxt:
* @filename: the filename
*
- * Create a parser context for a file content.
+ * Create a parser context for a file content.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time.
*
@@ -12283,7 +14344,6 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
int recovery, void *data) {
xmlDocPtr ret;
xmlParserCtxtPtr ctxt;
- char *directory = NULL;
xmlInitParser();
@@ -12301,10 +14361,8 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
ctxt->_private = data;
}
- if ((ctxt->directory == NULL) && (directory == NULL))
- directory = xmlParserGetDirectory(filename);
- if ((ctxt->directory == NULL) && (directory != NULL))
- ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
+ if (ctxt->directory == NULL)
+ ctxt->directory = xmlParserGetDirectory(filename);
ctxt->recovery = recovery;
@@ -12327,7 +14385,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -12357,13 +14415,14 @@ xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
* @cur: a pointer to an array of xmlChar
*
* parse an XML in-memory document and build a tree.
- * In the case the document is not Well Formed, a tree is built anyway
- *
- * Returns the resulting document tree
+ * In the case the document is not Well Formed, a attempt to build a
+ * tree is tried anyway
+ *
+ * Returns the resulting document tree or NULL in case of failure
*/
xmlDocPtr
-xmlRecoverDoc(xmlChar *cur) {
+xmlRecoverDoc(const xmlChar *cur) {
return(xmlSAXParseDoc(NULL, cur, 1));
}
@@ -12389,9 +14448,10 @@ xmlParseFile(const char *filename) {
*
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
* compressed document is provided by default if found at compile-time.
- * In the case the document is not Well Formed, a tree is built anyway
+ * In the case the document is not Well Formed, it attempts to build
+ * a tree anyway
*
- * Returns the resulting document tree
+ * Returns the resulting document tree or NULL in case of failure
*/
xmlDocPtr
@@ -12425,7 +14485,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
xmlClearParserCtxt(ctxt);
return;
}
-
+
xmlClearParserCtxt(ctxt);
if (filename != NULL)
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
@@ -12443,7 +14503,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
*
* parse an XML file and call the given SAX handler routines.
* Automatic support for ZLIB/Compress compressed document is provided
- *
+ *
* Returns 0 in case of success or a error number otherwise
*/
int
@@ -12451,21 +14511,19 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
const char *filename) {
int ret = 0;
xmlParserCtxtPtr ctxt;
-
+
ctxt = xmlCreateFileParserCtxt(filename);
if (ctxt == NULL) return -1;
-#ifdef LIBXML_SAX1_ENABLED
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
-#endif /* LIBXML_SAX1_ENABLED */
xmlFree(ctxt->sax);
ctxt->sax = sax;
xmlDetectSAX2(ctxt);
if (user_data != NULL)
ctxt->userData = user_data;
-
+
xmlParseDocument(ctxt);
-
+
if (ctxt->wellFormed)
ret = 0;
else {
@@ -12481,14 +14539,14 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
ctxt->myDoc = NULL;
}
xmlFreeParserCtxt(ctxt);
-
+
return ret;
}
#endif /* LIBXML_SAX1_ENABLED */
/************************************************************************
* *
- * Front ends when parsing from memory *
+ * Front ends when parsing from memory *
* *
************************************************************************/
@@ -12532,9 +14590,7 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size) {
input->filename = NULL;
input->buf = buf;
- input->base = input->buf->buffer->content;
- input->cur = input->buf->buffer->content;
- input->end = &input->buf->buffer->content[input->buf->buffer->use];
+ xmlBufResetInput(input->buf->buffer, input);
inputPush(ctxt, input);
return(ctxt);
@@ -12566,6 +14622,8 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
xmlDocPtr ret;
xmlParserCtxtPtr ctxt;
+ xmlInitParser();
+
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
if (ctxt == NULL) return(NULL);
if (sax != NULL) {
@@ -12588,10 +14646,10 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = NULL;
}
- if (sax != NULL)
+ if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -12606,7 +14664,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
* parse an XML in-memory block and use the given SAX function block
* to handle the parsing callback. If sax is NULL, fallback to the default
* DOM tree building routines.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -12621,7 +14679,7 @@ xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
* @size: the size of the array
*
* parse an XML in-memory block and build a tree.
- *
+ *
* Returns the resulting document tree
*/
@@ -12635,9 +14693,10 @@ xmlDocPtr xmlParseMemory(const char *buffer, int size) {
* @size: the size of the array
*
* parse an XML in-memory block and build a tree.
- * In the case the document is not Well Formed, a tree is built anyway
- *
- * Returns the resulting document tree
+ * In the case the document is not Well Formed, an attempt to
+ * build a tree is tried anyway
+ *
+ * Returns the resulting document tree or NULL in case of error
*/
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
@@ -12653,26 +14712,28 @@ xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
*
* A better SAX parsing routine.
* parse an XML in-memory buffer and call the given SAX handler routines.
- *
+ *
* Returns 0 in case of success or a error number otherwise
*/
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
const char *buffer, int size) {
int ret = 0;
xmlParserCtxtPtr ctxt;
- xmlSAXHandlerPtr oldsax = NULL;
-
- if (sax == NULL) return -1;
+
+ xmlInitParser();
+
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
if (ctxt == NULL) return -1;
- oldsax = ctxt->sax;
+ if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
+ xmlFree(ctxt->sax);
ctxt->sax = sax;
xmlDetectSAX2(ctxt);
+
if (user_data != NULL)
ctxt->userData = user_data;
-
+
xmlParseDocument(ctxt);
-
+
if (ctxt->wellFormed)
ret = 0;
else {
@@ -12681,13 +14742,14 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
else
ret = -1;
}
- ctxt->sax = oldsax;
+ if (sax != NULL)
+ ctxt->sax = NULL;
if (ctxt->myDoc != NULL) {
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = NULL;
}
xmlFreeParserCtxt(ctxt);
-
+
return ret;
}
#endif /* LIBXML_SAX1_ENABLED */
@@ -12721,7 +14783,7 @@ xmlCreateDocParserCtxt(const xmlChar *cur) {
* parse an XML in-memory document and build a tree.
* It use the given SAX function block to handle the parsing callback.
* If sax is NULL, fallback to the default DOM tree building routines.
- *
+ *
* Returns the resulting document tree
*/
@@ -12736,7 +14798,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
ctxt = xmlCreateDocParserCtxt(cur);
if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
+ if (sax != NULL) {
oldsax = ctxt->sax;
ctxt->sax = sax;
ctxt->userData = NULL;
@@ -12753,7 +14815,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
if (sax != NULL)
ctxt->sax = oldsax;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -12762,7 +14824,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
* @cur: a pointer to an array of xmlChar
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
@@ -12775,8 +14837,8 @@ xmlParseDoc(const xmlChar *cur) {
#ifdef LIBXML_LEGACY_ENABLED
/************************************************************************
* *
- * Specific function to keep track of entities references *
- * and used by the XSLT debugger *
+ * Specific function to keep track of entities references *
+ * and used by the XSLT debugger *
* *
************************************************************************/
@@ -12786,7 +14848,7 @@ static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
* xmlAddEntityReference:
* @ent : A valid entity
* @firstNode : A valid first node for children of entity
- * @lastNode : A valid last node of children entity
+ * @lastNode : A valid last node of children entity
*
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
*/
@@ -12815,7 +14877,7 @@ xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
/************************************************************************
* *
- * Miscellaneous *
+ * Miscellaneous *
* *
************************************************************************/
@@ -12839,37 +14901,57 @@ xmlInitParser(void) {
if (xmlParserInitialized != 0)
return;
- if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
- (xmlGenericError == NULL))
- initGenericErrorDefaultFunc(NULL);
- xmlInitGlobals();
- xmlInitThreads();
- xmlInitMemory();
- xmlInitCharEncodingHandlers();
- xmlDefaultSAXHandlerInit();
- xmlRegisterDefaultInputCallbacks();
+#ifdef LIBXML_THREAD_ENABLED
+ __xmlGlobalInitMutexLock();
+ if (xmlParserInitialized == 0) {
+#endif
+ xmlInitThreads();
+ xmlInitGlobals();
+ if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
+ (xmlGenericError == NULL))
+ initGenericErrorDefaultFunc(NULL);
+ xmlInitMemory();
+ xmlInitializeDict();
+ xmlInitCharEncodingHandlers();
+ xmlDefaultSAXHandlerInit();
+ xmlRegisterDefaultInputCallbacks();
#ifdef LIBXML_OUTPUT_ENABLED
- xmlRegisterDefaultOutputCallbacks();
+ xmlRegisterDefaultOutputCallbacks();
#endif /* LIBXML_OUTPUT_ENABLED */
#ifdef LIBXML_HTML_ENABLED
- htmlInitAutoClose();
- htmlDefaultSAXHandlerInit();
+ htmlInitAutoClose();
+ htmlDefaultSAXHandlerInit();
#endif
#ifdef LIBXML_XPATH_ENABLED
- xmlXPathInit();
+ xmlXPathInit();
+#endif
+ xmlParserInitialized = 1;
+#ifdef LIBXML_THREAD_ENABLED
+ }
+ __xmlGlobalInitMutexUnlock();
#endif
- xmlParserInitialized = 1;
}
/**
* xmlCleanupParser:
*
- * Cleanup function for the XML library. It tries to reclaim all
- * parsing related global memory allocated for the library processing.
- * It doesn't deallocate any document related memory. Calling this
- * function should not prevent reusing the library but one should
- * call xmlCleanupParser() only when the process has
- * finished using the library or XML document built with it.
+ * This function name is somewhat misleading. It does not clean up
+ * parser state, it cleans up memory allocated by the library itself.
+ * It is a cleanup function for the XML library. It tries to reclaim all
+ * related global memory allocated for the library processing.
+ * It doesn't deallocate any document related memory. One should
+ * call xmlCleanupParser() only when the process has finished using
+ * the library and all XML/HTML documents built with it.
+ * See also xmlInitParser() which has the opposite function of preparing
+ * the library for operations.
+ *
+ * WARNING: if your application is multithreaded or has plugin support
+ * calling this may crash the application if another thread or
+ * a plugin is still using libxml2. It's sometimes very hard to
+ * guess if libxml2 is in use in the application, some libraries
+ * or plugins may use it without notice. In case of doubt abstain
+ * from calling this function or do it just before calling exit()
+ * to avoid leak reports from valgrind !
*/
void
@@ -12890,8 +14972,8 @@ xmlCleanupParser(void) {
xmlSchemaCleanupTypes();
xmlRelaxNGCleanupTypes();
#endif
- xmlCleanupGlobals();
xmlResetLastError();
+ xmlCleanupGlobals();
xmlCleanupThreads(); /* must be last if called not from the main thread */
xmlCleanupMemory();
xmlParserInitialized = 0;
@@ -12911,7 +14993,7 @@ xmlCleanupParser(void) {
* current scope
*/
#define DICT_FREE(str) \
- if ((str) && ((!dict) || \
+ if ((str) && ((!dict) || \
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
xmlFree((char *)(str));
@@ -12926,7 +15008,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr input;
xmlDictPtr dict;
-
+
if (ctxt == NULL)
return;
@@ -12939,8 +15021,12 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->input = NULL;
ctxt->spaceNr = 0;
- ctxt->spaceTab[0] = -1;
- ctxt->space = &ctxt->spaceTab[0];
+ if (ctxt->spaceTab != NULL) {
+ ctxt->spaceTab[0] = -1;
+ ctxt->space = &ctxt->spaceTab[0];
+ } else {
+ ctxt->space = NULL;
+ }
ctxt->nodeNr = 0;
@@ -12988,6 +15074,9 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->depth = 0;
ctxt->charset = XML_CHAR_ENCODING_UTF8;
ctxt->catalogs = NULL;
+ ctxt->nbentities = 0;
+ ctxt->sizeentities = 0;
+ ctxt->sizeentcopy = 0;
xmlInitNodeInfoSeq(&ctxt->node_seq);
if (ctxt->attsDefault != NULL) {
@@ -13072,25 +15161,18 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
inputStream->filename = (char *)
xmlCanonicPath((const xmlChar *) filename);
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
+ xmlBufResetInput(buf->buffer, inputStream);
inputPush(ctxt, inputStream);
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
- use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@@ -13099,6 +15181,10 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
if (encoding != NULL) {
xmlCharEncodingHandlerPtr hdlr;
+ if (ctxt->encoding != NULL)
+ xmlFree((xmlChar *) ctxt->encoding);
+ ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
+
hdlr = xmlFindCharEncodingHandler(encoding);
if (hdlr != NULL) {
xmlSwitchToEncoding(ctxt, hdlr);
@@ -13113,50 +15199,63 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
return(0);
}
+
/**
- * xmlCtxtUseOptions:
+ * xmlCtxtUseOptionsInternal:
* @ctxt: an XML parser context
* @options: a combination of xmlParserOption
+ * @encoding: the user provided encoding to use
*
* Applies the options to the parser context
*
* Returns 0 in case of success, the set of unknown or unimplemented options
* in case of error.
*/
-int
-xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
+static int
+xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
{
if (ctxt == NULL)
return(-1);
+ if (encoding != NULL) {
+ if (ctxt->encoding != NULL)
+ xmlFree((xmlChar *) ctxt->encoding);
+ ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
+ }
if (options & XML_PARSE_RECOVER) {
ctxt->recovery = 1;
options -= XML_PARSE_RECOVER;
+ ctxt->options |= XML_PARSE_RECOVER;
} else
ctxt->recovery = 0;
if (options & XML_PARSE_DTDLOAD) {
ctxt->loadsubset = XML_DETECT_IDS;
options -= XML_PARSE_DTDLOAD;
+ ctxt->options |= XML_PARSE_DTDLOAD;
} else
ctxt->loadsubset = 0;
if (options & XML_PARSE_DTDATTR) {
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
options -= XML_PARSE_DTDATTR;
+ ctxt->options |= XML_PARSE_DTDATTR;
}
if (options & XML_PARSE_NOENT) {
ctxt->replaceEntities = 1;
/* ctxt->loadsubset |= XML_DETECT_IDS; */
options -= XML_PARSE_NOENT;
+ ctxt->options |= XML_PARSE_NOENT;
} else
ctxt->replaceEntities = 0;
if (options & XML_PARSE_PEDANTIC) {
ctxt->pedantic = 1;
options -= XML_PARSE_PEDANTIC;
+ ctxt->options |= XML_PARSE_PEDANTIC;
} else
ctxt->pedantic = 0;
if (options & XML_PARSE_NOBLANKS) {
ctxt->keepBlanks = 0;
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
options -= XML_PARSE_NOBLANKS;
+ ctxt->options |= XML_PARSE_NOBLANKS;
} else
ctxt->keepBlanks = 1;
if (options & XML_PARSE_DTDVALID) {
@@ -13166,6 +15265,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
if (options & XML_PARSE_NOERROR)
ctxt->vctxt.error = NULL;
options -= XML_PARSE_DTDVALID;
+ ctxt->options |= XML_PARSE_DTDVALID;
} else
ctxt->validate = 0;
if (options & XML_PARSE_NOWARNING) {
@@ -13185,17 +15285,20 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
ctxt->sax->endElementNs = NULL;
ctxt->sax->initialized = 1;
options -= XML_PARSE_SAX1;
+ ctxt->options |= XML_PARSE_SAX1;
}
#endif /* LIBXML_SAX1_ENABLED */
if (options & XML_PARSE_NODICT) {
ctxt->dictNames = 0;
options -= XML_PARSE_NODICT;
+ ctxt->options |= XML_PARSE_NODICT;
} else {
ctxt->dictNames = 1;
}
if (options & XML_PARSE_NOCDATA) {
ctxt->sax->cdataBlock = NULL;
options -= XML_PARSE_NOCDATA;
+ ctxt->options |= XML_PARSE_NOCDATA;
}
if (options & XML_PARSE_NSCLEAN) {
ctxt->options |= XML_PARSE_NSCLEAN;
@@ -13209,11 +15312,53 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
ctxt->options |= XML_PARSE_COMPACT;
options -= XML_PARSE_COMPACT;
}
+ if (options & XML_PARSE_OLD10) {
+ ctxt->options |= XML_PARSE_OLD10;
+ options -= XML_PARSE_OLD10;
+ }
+ if (options & XML_PARSE_NOBASEFIX) {
+ ctxt->options |= XML_PARSE_NOBASEFIX;
+ options -= XML_PARSE_NOBASEFIX;
+ }
+ if (options & XML_PARSE_HUGE) {
+ ctxt->options |= XML_PARSE_HUGE;
+ options -= XML_PARSE_HUGE;
+ if (ctxt->dict != NULL)
+ xmlDictSetLimit(ctxt->dict, 0);
+ }
+ if (options & XML_PARSE_OLDSAX) {
+ ctxt->options |= XML_PARSE_OLDSAX;
+ options -= XML_PARSE_OLDSAX;
+ }
+ if (options & XML_PARSE_IGNORE_ENC) {
+ ctxt->options |= XML_PARSE_IGNORE_ENC;
+ options -= XML_PARSE_IGNORE_ENC;
+ }
+ if (options & XML_PARSE_BIG_LINES) {
+ ctxt->options |= XML_PARSE_BIG_LINES;
+ options -= XML_PARSE_BIG_LINES;
+ }
ctxt->linenumbers = 1;
return (options);
}
/**
+ * xmlCtxtUseOptions:
+ * @ctxt: an XML parser context
+ * @options: a combination of xmlParserOption
+ *
+ * Applies the options to the parser context
+ *
+ * Returns 0 in case of success, the set of unknown or unimplemented options
+ * in case of error.
+ */
+int
+xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
+{
+ return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
+}
+
+/**
* xmlDoRead:
* @ctxt: an XML parser context
* @URL: the base URL to use for the document
@@ -13222,7 +15367,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
* @reuse: keep the context for reuse
*
* Common front-end for the xmlRead functions
- *
+ *
* Returns the resulting document tree or NULL
*/
static xmlDocPtr
@@ -13230,8 +15375,8 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
int options, int reuse)
{
xmlDocPtr ret;
-
- xmlCtxtUseOptions(ctxt, options);
+
+ xmlCtxtUseOptionsInternal(ctxt, options, encoding);
if (encoding != NULL) {
xmlCharEncodingHandlerPtr hdlr;
@@ -13267,7 +15412,7 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
* @options: a combination of xmlParserOption
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13277,6 +15422,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio
if (cur == NULL)
return (NULL);
+ xmlInitParser();
ctxt = xmlCreateDocParserCtxt(cur);
if (ctxt == NULL)
@@ -13291,7 +15437,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio
* @options: a combination of xmlParserOption
*
* parse an XML file from the filesystem or the network.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13299,6 +15445,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
{
xmlParserCtxtPtr ctxt;
+ xmlInitParser();
ctxt = xmlCreateURLParserCtxt(filename, options);
if (ctxt == NULL)
return (NULL);
@@ -13314,7 +15461,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
* @options: a combination of xmlParserOption
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13322,6 +15469,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin
{
xmlParserCtxtPtr ctxt;
+ xmlInitParser();
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
if (ctxt == NULL)
return (NULL);
@@ -13338,7 +15486,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin
* parse an XML from a file descriptor and build a tree.
* NOTE that the file descriptor will not be closed when the
* reader is closed or reset.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13350,6 +15498,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options)
if (fd < 0)
return (NULL);
+ xmlInitParser();
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
if (input == NULL)
@@ -13380,7 +15529,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options)
* @options: a combination of xmlParserOption
*
* parse an XML document from I/O functions and source and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13393,11 +15542,15 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
if (ioread == NULL)
return (NULL);
+ xmlInitParser();
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
XML_CHAR_ENCODING_NONE);
- if (input == NULL)
+ if (input == NULL) {
+ if (ioclose != NULL)
+ ioclose(ioctx);
return (NULL);
+ }
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
xmlFreeParserInputBuffer(input);
@@ -13423,7 +15576,7 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
*
* parse an XML in-memory document and build a tree.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13436,6 +15589,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -13456,7 +15610,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
*
* parse an XML file from the filesystem or the network.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13469,6 +15623,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -13491,7 +15646,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
*
* parse an XML in-memory document and build a tree.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13505,6 +15660,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
return (NULL);
if (buffer == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -13535,7 +15691,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
* This reuses the existing @ctxt parser context
* NOTE that the file descriptor will not be closed when the
* reader is closed or reset.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13549,6 +15705,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -13578,7 +15735,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
*
* parse an XML document from I/O functions and source and build a tree.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13594,13 +15751,17 @@ xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
XML_CHAR_ENCODING_NONE);
- if (input == NULL)
+ if (input == NULL) {
+ if (ioclose != NULL)
+ ioclose(ioctx);
return (NULL);
+ }
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
if (stream == NULL) {
xmlFreeParserInputBuffer(input);