summaryrefslogtreecommitdiffstats
path: root/third_party/libxml/src/HTMLparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libxml/src/HTMLparser.c')
-rw-r--r--third_party/libxml/src/HTMLparser.c324
1 files changed, 236 insertions, 88 deletions
diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c
index 42dc776..d329d3b 100644
--- a/third_party/libxml/src/HTMLparser.c
+++ b/third_party/libxml/src/HTMLparser.c
@@ -44,6 +44,9 @@
#include <libxml/globals.h>
#include <libxml/uri.h>
+#include "buf.h"
+#include "enc.h"
+
#define HTML_MAX_NAMELEN 1000
#define HTML_PARSER_BIG_BUFFER_SIZE 1000
#define HTML_PARSER_BUFFER_SIZE 100
@@ -727,7 +730,7 @@ static const char* const map_contents[] = { BLOCK, "area", NULL } ;
static const char* const name_attr[] = { "name", NULL } ;
static const char* const action_attr[] = { "action", NULL } ;
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
-static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;
+static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;
static const char* const content_attr[] = { "content", NULL } ;
static const char* const type_attr[] = { "type", NULL } ;
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
@@ -1080,9 +1083,9 @@ static const char * const htmlStartClose[] = {
"menu", "p", "head", "ul", NULL,
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
"div", "p", "head", NULL,
-"noscript", "p", "head", NULL,
+"noscript", "p", NULL,
"center", "font", "b", "i", "p", "head", NULL,
-"a", "a", NULL,
+"a", "a", "head", NULL,
"caption", "p", NULL,
"colgroup", "caption", "colgroup", "col", "p", NULL,
"col", "caption", "col", "p", NULL,
@@ -1100,6 +1103,43 @@ static const char * const htmlStartClose[] = {
"option", "option", NULL,
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
"pre", "listing", "xmp", "a", NULL,
+/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
+"tt", "head", NULL,
+"i", "head", NULL,
+"b", "head", NULL,
+"u", "head", NULL,
+"s", "head", NULL,
+"strike", "head", NULL,
+"big", "head", NULL,
+"small", "head", NULL,
+
+"em", "head", NULL,
+"strong", "head", NULL,
+"dfn", "head", NULL,
+"code", "head", NULL,
+"samp", "head", NULL,
+"kbd", "head", NULL,
+"var", "head", NULL,
+"cite", "head", NULL,
+"abbr", "head", NULL,
+"acronym", "head", NULL,
+
+/* "a" */
+"img", "head", NULL,
+/* "applet" */
+/* "embed" */
+/* "object" */
+"font", "head", NULL,
+/* "basefont" */
+"br", "head", NULL,
+/* "script" */
+"map", "head", NULL,
+"q", "head", NULL,
+"sub", "head", NULL,
+"sup", "head", NULL,
+"span", "head", NULL,
+"bdo", "head", NULL,
+"iframe", "head", NULL,
NULL
};
@@ -1137,7 +1177,7 @@ static const char *const htmlScriptAttributes[] = {
"onfocus",
"onblur",
"onsubmit",
- "onrest",
+ "onreset",
"onchange",
"onselect"
};
@@ -2887,9 +2927,11 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
}
if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Invalid char in CDATA 0x%X\n", cur);
- NEXT;
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Invalid char in CDATA 0x%X\n", cur);
+ if (ctxt->input->cur < ctxt->input->end) {
+ NEXT;
+ }
}
if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
@@ -2939,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
- if (ctxt->sax->ignorableWhitespace != NULL)
- ctxt->sax->ignorableWhitespace(ctxt->userData,
- buf, nbchar);
+ if (ctxt->keepBlanks) {
+ if (ctxt->sax->characters != NULL)
+ ctxt->sax->characters(ctxt->userData, buf, nbchar);
+ } else {
+ if (ctxt->sax->ignorableWhitespace != NULL)
+ ctxt->sax->ignorableWhitespace(ctxt->userData,
+ buf, nbchar);
+ }
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -2972,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
- if (ctxt->sax->ignorableWhitespace != NULL)
- ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
+ if (ctxt->keepBlanks) {
+ if (ctxt->sax->characters != NULL)
+ ctxt->sax->characters(ctxt->userData, buf, nbchar);
+ } else {
+ if (ctxt->sax->ignorableWhitespace != NULL)
+ ctxt->sax->ignorableWhitespace(ctxt->userData,
+ buf, nbchar);
+ }
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -3275,7 +3328,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
val = val * 16 + (CUR - 'A') + 10;
else {
htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
- "htmlParseCharRef: missing semicolumn\n",
+ "htmlParseCharRef: missing semicolon\n",
NULL, NULL);
break;
}
@@ -3290,7 +3343,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
val = val * 10 + (CUR - '0');
else {
htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
- "htmlParseCharRef: missing semicolumn\n",
+ "htmlParseCharRef: missing semicolon\n",
NULL, NULL);
break;
}
@@ -3433,34 +3486,26 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
}
/**
- * htmlCheckEncoding:
+ * htmlCheckEncodingDirect:
* @ctxt: an HTML parser context
* @attvalue: the attribute value
*
- * Checks an http-equiv attribute from a Meta tag to detect
+ * Checks an attribute value to detect
* the encoding
* If a new encoding is detected the parser is switched to decode
* it and pass UTF8
*/
static void
-htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
- const xmlChar *encoding;
+htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
- if ((ctxt == NULL) || (attvalue == NULL))
+ if ((ctxt == NULL) || (encoding == NULL) ||
+ (ctxt->options & HTML_PARSE_IGNORE_ENC))
return;
/* do not change encoding */
if (ctxt->input->encoding != NULL)
return;
- encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
- if (encoding != NULL) {
- encoding += 8;
- } else {
- encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");
- if (encoding != NULL)
- encoding += 9;
- }
if (encoding != NULL) {
xmlCharEncoding enc;
xmlCharEncodingHandlerPtr handler;
@@ -3498,7 +3543,9 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
xmlSwitchToEncoding(ctxt, handler);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
} else {
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
+ "htmlCheckEncoding: unknown encoding %s\n",
+ encoding, NULL);
}
}
@@ -3513,24 +3560,51 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
* convert as much as possible to the parser reading buffer.
*/
processed = ctxt->input->cur - ctxt->input->base;
- xmlBufferShrink(ctxt->input->buf->buffer, processed);
- nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
- ctxt->input->buf->buffer,
- ctxt->input->buf->raw);
+ xmlBufShrink(ctxt->input->buf->buffer, processed);
+ nbchars = xmlCharEncInput(ctxt->input->buf, 1);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"htmlCheckEncoding: encoder error\n",
NULL, NULL);
}
- ctxt->input->base =
- ctxt->input->cur = ctxt->input->buf->buffer->content;
- ctxt->input->end =
- &ctxt->input->base[ctxt->input->buf->buffer->use];
+ xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
}
}
}
/**
+ * htmlCheckEncoding:
+ * @ctxt: an HTML parser context
+ * @attvalue: the attribute value
+ *
+ * Checks an http-equiv attribute from a Meta tag to detect
+ * the encoding
+ * If a new encoding is detected the parser is switched to decode
+ * it and pass UTF8
+ */
+static void
+htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
+ const xmlChar *encoding;
+
+ if (!attvalue)
+ return;
+
+ encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
+ if (encoding != NULL) {
+ encoding += 7;
+ }
+ /*
+ * skip blank
+ */
+ if (encoding && IS_BLANK_CH(*encoding))
+ encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
+ if (encoding && *encoding == '=') {
+ encoding ++;
+ htmlCheckEncodingDirect(ctxt, encoding);
+ }
+}
+
+/**
* htmlCheckMeta:
* @ctxt: an HTML parser context
* @atts: the attributes values
@@ -3554,6 +3628,8 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
http = 1;
+ else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))
+ htmlCheckEncodingDirect(ctxt, value);
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
content = value;
att = atts[i++];
@@ -3595,13 +3671,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
int i;
int discardtag = 0;
- if (ctxt->instate == XML_PARSER_EOF)
- return(-1);
if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
"htmlParseStartTag: context error\n", NULL, NULL);
return -1;
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
if (CUR != '<') return -1;
NEXT;
@@ -3883,6 +3959,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
ctxt->sax->endElement(ctxt->userData, name);
+ htmlNodeInfoPop(ctxt);
htmlnamePop(ctxt);
ret = 1;
} else {
@@ -4289,7 +4366,7 @@ static void
htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
const xmlChar *name;
const htmlElemDesc * info;
- htmlParserNodeInfo node_info;
+ htmlParserNodeInfo node_info = { 0, };
int failed;
if ((ctxt == NULL) || (ctxt->input == NULL)) {
@@ -4670,7 +4747,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
- if (ctxt->myDoc != NULL) {
+ if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd == NULL)
ctxt->myDoc->intSubset =
@@ -4875,9 +4952,7 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) {
input->filename = NULL;
input->buf = buf;
- input->base = input->buf->buffer->content;
- input->cur = input->buf->buffer->content;
- input->end = &input->buf->buffer->content[input->buf->buffer->use];
+ xmlBufResetInput(buf->buffer, input);
inputPush(ctxt, input);
return(ctxt);
@@ -4994,8 +5069,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
buf = in->base;
len = in->length;
} else {
- buf = in->buf->buffer->content;
- len = in->buf->buffer->use;
+ buf = xmlBufContent(in->buf->buffer);
+ len = xmlBufUse(in->buf->buffer);
}
/* take into account the sequence length */
@@ -5087,13 +5162,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
* @stop: Array of chars, which stop the lookup.
* @stopLen: Length of stop-Array
*
- * Try to find if any char of the stop-Array is available in the input
+ * Try to find if any char of the stop-Array is available in the input
* stream.
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
* to avoid rescanning sequences of bytes, it DOES change the state of the
* parser, do not use liberally.
*
- * Returns the index to the current parsing point if a stopChar
+ * Returns the index to the current parsing point if a stopChar
* is available, -1 otherwise.
*/
static int
@@ -5121,8 +5196,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
buf = in->base;
len = in->length;
} else {
- buf = in->buf->buffer->content;
- len = in->buf->buffer->use;
+ buf = xmlBufContent(in->buf->buffer);
+ len = xmlBufUse(in->buf->buffer);
}
for (; base < len; base++) {
@@ -5171,6 +5246,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
int avail = 0;
xmlChar cur, next;
+ htmlParserNodeInfo node_info;
+
#ifdef DEBUG_PUSH
switch (ctxt->instate) {
case XML_PARSER_EOF:
@@ -5231,7 +5308,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = in->buf->buffer->use - (in->cur - in->base);
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
if ((avail == 0) && (terminate)) {
htmlAutoCloseOnEnd(ctxt);
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
@@ -5267,7 +5344,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = in->buf->buffer->use - (in->cur - in->base);
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
}
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
@@ -5309,11 +5386,24 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = in->buf->buffer->use - (in->cur - in->base);
- if (avail < 2)
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ /*
+ * no chars in buffer
+ */
+ if (avail < 1)
goto done;
+ /*
+ * not enouth chars in buffer
+ */
+ if (avail < 2) {
+ if (!terminate)
+ goto done;
+ else
+ next = ' ';
+ } else {
+ next = in->cur[1];
+ }
cur = in->cur[0];
- next = in->cur[1];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
@@ -5369,7 +5459,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = in->buf->buffer->use - (in->cur - in->base);
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
if (avail < 2)
goto done;
cur = in->cur[0];
@@ -5410,7 +5500,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = in->buf->buffer->use - (in->cur - in->base);
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
if (avail < 1)
goto done;
cur = in->cur[0];
@@ -5463,8 +5553,22 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
int failed;
const htmlElemDesc * info;
- if (avail < 2)
+ /*
+ * no chars in buffer
+ */
+ if (avail < 1)
goto done;
+ /*
+ * not enouth chars in buffer
+ */
+ if (avail < 2) {
+ if (!terminate)
+ goto done;
+ else
+ next = ' ';
+ } else {
+ next = in->cur[1];
+ }
cur = in->cur[0];
if (cur != '<') {
ctxt->instate = XML_PARSER_CONTENT;
@@ -5474,7 +5578,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
break;
}
- if (in->cur[1] == '/') {
+ if (next == '/') {
ctxt->instate = XML_PARSER_END_TAG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
@@ -5487,6 +5591,14 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
goto done;
+ /* Capture start position */
+ if (ctxt->record_info) {
+ node_info.begin_pos = ctxt->input->consumed +
+ (CUR_PTR - ctxt->input->base);
+ node_info.begin_line = ctxt->input->line;
+ }
+
+
failed = htmlParseStartTag(ctxt);
name = ctxt->name;
if ((failed == -1) ||
@@ -5536,6 +5648,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
htmlnamePop(ctxt);
}
+ if (ctxt->record_info)
+ htmlNodeInfoPush(ctxt, &node_info);
+
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5552,6 +5667,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->endElement(ctxt->userData, name);
htmlnamePop(ctxt);
}
+
+ if (ctxt->record_info)
+ htmlNodeInfoPush(ctxt, &node_info);
+
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5579,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((cur != '<') && (cur != '&')) {
if (ctxt->sax != NULL) {
if (IS_BLANK_CH(cur)) {
- if (ctxt->sax->ignorableWhitespace != NULL)
- ctxt->sax->ignorableWhitespace(
- ctxt->userData, &cur, 1);
+ if (ctxt->keepBlanks) {
+ if (ctxt->sax->characters != NULL)
+ ctxt->sax->characters(
+ ctxt->userData, &cur, 1);
+ } else {
+ if (ctxt->sax->ignorableWhitespace != NULL)
+ ctxt->sax->ignorableWhitespace(
+ ctxt->userData, &cur, 1);
+ }
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -5609,7 +5734,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
int idx;
xmlChar val;
- idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1);
+ idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);
if (idx < 0)
goto done;
val = in->cur[idx + 2];
@@ -5866,7 +5991,7 @@ done:
ctxt->sax->endDocument(ctxt->userData);
}
}
- if ((ctxt->myDoc != NULL) &&
+ if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
(ctxt->instate == XML_PARSER_EPILOG))) {
xmlDtdPtr dtd;
@@ -5904,8 +6029,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
}
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
int res;
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
@@ -5914,10 +6039,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
ctxt->disableSAX = 1;
return (XML_PARSER_EOF);
}
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif
@@ -5932,13 +6054,16 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
if ((in->encoder != NULL) && (in->buffer != NULL) &&
(in->raw != NULL)) {
int nbchars;
+ size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
- nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+ nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"encoder error\n", NULL, NULL);
return(XML_ERR_INVALID_ENCODING);
}
+ xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
}
}
}
@@ -6032,24 +6157,18 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
inputStream->filename = (char *)
xmlCanonicPath((const xmlChar *) filename);
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
+ xmlBufResetInput(buf->buffer, inputStream);
inputPush(ctxt, inputStream);
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif
@@ -6169,12 +6288,16 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding)
/* set encoding */
if (encoding) {
- content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);
- if (content) {
- strcpy ((char *)content, (char *)content_line);
- strcat ((char *)content, (char *)encoding);
- htmlCheckEncoding (ctxt, content);
- xmlFree (content);
+ size_t l = strlen(encoding);
+
+ if (l < 1000) {
+ content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1);
+ if (content) {
+ strcpy ((char *)content, (char *)content_line);
+ strcat ((char *)content, (char *)encoding);
+ htmlCheckEncoding (ctxt, content);
+ xmlFree (content);
+ }
}
}
@@ -6451,6 +6574,7 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
ctxt->wellFormed = 1;
ctxt->nsWellFormed = 1;
+ ctxt->disableSAX = 0;
ctxt->valid = 1;
ctxt->vctxt.userData = ctxt;
ctxt->vctxt.error = xmlParserValidityError;
@@ -6530,6 +6654,18 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
ctxt->options |= XML_PARSE_HUGE;
options -= XML_PARSE_HUGE;
}
+ if (options & HTML_PARSE_NODEFDTD) {
+ ctxt->options |= HTML_PARSE_NODEFDTD;
+ options -= HTML_PARSE_NODEFDTD;
+ }
+ if (options & HTML_PARSE_IGNORE_ENC) {
+ ctxt->options |= HTML_PARSE_IGNORE_ENC;
+ options -= HTML_PARSE_IGNORE_ENC;
+ }
+ if (options & HTML_PARSE_NOIMPLIED) {
+ ctxt->options |= HTML_PARSE_NOIMPLIED;
+ options -= HTML_PARSE_NOIMPLIED;
+ }
ctxt->dictNames = 0;
return (options);
}
@@ -6676,6 +6812,7 @@ htmlReadFd(int fd, const char *URL, const char *encoding, int options)
if (fd < 0)
return (NULL);
+ xmlInitParser();
xmlInitParser();
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
@@ -6723,8 +6860,11 @@ htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
XML_CHAR_ENCODING_NONE);
- if (input == NULL)
+ if (input == NULL) {
+ if (ioclose != NULL)
+ ioclose(ioctx);
return (NULL);
+ }
ctxt = htmlNewParserCtxt();
if (ctxt == NULL) {
xmlFreeParserInputBuffer(input);
@@ -6763,6 +6903,7 @@ htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
htmlCtxtReset(ctxt);
@@ -6796,6 +6937,7 @@ htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
htmlCtxtReset(ctxt);
@@ -6832,6 +6974,7 @@ htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
return (NULL);
if (buffer == NULL)
return (NULL);
+ xmlInitParser();
htmlCtxtReset(ctxt);
@@ -6874,6 +7017,7 @@ htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
htmlCtxtReset(ctxt);
@@ -6918,13 +7062,17 @@ htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
htmlCtxtReset(ctxt);
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
XML_CHAR_ENCODING_NONE);
- if (input == NULL)
+ if (input == NULL) {
+ if (ioclose != NULL)
+ ioclose(ioctx);
return (NULL);
+ }
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
if (stream == NULL) {
xmlFreeParserInputBuffer(input);