From 35a201cc8ef0c3f5b2df88d2e528aabee1048348 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 30 Apr 2021 18:47:09 +0200 Subject: Initial/Final commit --- libxml2-2.9.10/parser.c | 15605 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 15605 insertions(+) create mode 100644 libxml2-2.9.10/parser.c (limited to 'libxml2-2.9.10/parser.c') diff --git a/libxml2-2.9.10/parser.c b/libxml2-2.9.10/parser.c new file mode 100644 index 0000000..d1c3196 --- /dev/null +++ b/libxml2-2.9.10/parser.c @@ -0,0 +1,15605 @@ +/* + * parser.c : an XML 1.0 parser, namespaces and validity support are mostly + * implemented on top of the SAX interfaces + * + * References: + * The XML specification: + * http://www.w3.org/TR/REC-xml + * Original 1.0 version: + * http://www.w3.org/TR/1998/REC-xml-19980210 + * XML second edition working draft + * http://www.w3.org/TR/2000/WD-xml-2e-20000814 + * + * Okay this is a big file, the parser core is around 7000 lines, then it + * is followed by the progressive parser top routines, then the various + * high level APIs to call the parser and a few miscellaneous functions. + * A number of helper functions and deprecated ones have been moved to + * parserInternals.c to reduce this file size. + * As much as possible the functions are associated with their relative + * production in the XML specification. A few productions defining the + * different ranges of character are actually implanted either in + * parserInternals.h or parserInternals.c + * The DOM tree build is realized from the default SAX callbacks in + * the module SAX.c. + * The routines doing the validation checks are in valid.c and called either + * from the SAX callbacks or as standalone functions using a preparsed + * document. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +/* To avoid EBCDIC trouble when parsing on zOS */ +#if defined(__MVS__) +#pragma convert("ISO8859-1") +#endif + +#define IN_LIBXML +#include "libxml.h" + +#if defined(_WIN32) && !defined (__CYGWIN__) +#define XML_DIR_SEP '\\' +#else +#define XML_DIR_SEP '/' +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef LIBXML_CATALOG_ENABLED +#include +#endif +#ifdef LIBXML_SCHEMAS_ENABLED +#include +#include +#endif +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "buf.h" +#include "enc.h" + +static void +xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); + +static xmlParserCtxtPtr +xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, + const xmlChar *base, xmlParserCtxtPtr pctx); + +static void xmlHaltParser(xmlParserCtxtPtr ctxt); + +static int +xmlParseElementStart(xmlParserCtxtPtr ctxt); + +static void +xmlParseElementEnd(xmlParserCtxtPtr ctxt); + +/************************************************************************ + * * + * Arbitrary limits set in the parser. See XML_PARSE_HUGE * + * * + ************************************************************************/ + +#define XML_PARSER_BIG_ENTITY 1000 +#define XML_PARSER_LOT_ENTITY 5000 + +/* + * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity + * replacement over the size in byte of the input indicates that you have + * and exponential behaviour. A value of 10 correspond to at least 3 entity + * replacement per byte of input. + */ +#define XML_PARSER_NON_LINEAR 10 + +/* + * xmlParserEntityCheck + * + * Function to check non-linear entity expansion behaviour + * This is here to detect and stop exponential linear entity expansion + * This is not a limitation of the parser but a safety + * boundary feature. It can be disabled with the XML_PARSE_HUGE + * parser option. + */ +static int +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, + xmlEntityPtr ent, size_t replacement) +{ + size_t consumed = 0; + + if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) + return (0); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + return (1); + + /* + * This may look absurd but is needed to detect + * entities problems + */ + if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && + (ent->content != NULL) && (ent->checked == 0) && + (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { + unsigned long oldnbent = ctxt->nbentities, diff; + xmlChar *rep; + + ent->checked = 1; + + ++ctxt->depth; + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + --ctxt->depth; + if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { + ent->content[0] = 0; + } + + diff = ctxt->nbentities - oldnbent + 1; + if (diff > INT_MAX / 2) + diff = INT_MAX / 2; + ent->checked = diff * 2; + if (rep != NULL) { + if (xmlStrchr(rep, '<')) + ent->checked |= 1; + xmlFree(rep); + rep = NULL; + } + } + if (replacement != 0) { + if (replacement < XML_MAX_TEXT_LENGTH) + return(0); + + /* + * If the volume of entity copy reaches 10 times the + * amount of parsed data and over the large text threshold + * then that's very likely to be an abuse. + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if (replacement < XML_PARSER_NON_LINEAR * consumed) + return(0); + } else if (size != 0) { + /* + * Do the check based on the replacement size of the entity + */ + if (size < XML_PARSER_BIG_ENTITY) + return(0); + + /* + * A limit on the amount of text data reasonably used + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if ((size < XML_PARSER_NON_LINEAR * consumed) && + (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) + return (0); + } else if (ent != NULL) { + /* + * use the number of parsed entities in the replacement + */ + size = ent->checked / 2; + + /* + * The amount of data parsed counting entities size only once + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + /* + * Check the density of entities for the amount of data + * knowing an entity reference will take at least 3 bytes + */ + if (size * 3 < consumed * XML_PARSER_NON_LINEAR) + return (0); + } else { + /* + * strange we got no data for checking + */ + if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && + (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || + (ctxt->nbentities <= 10000)) + return (0); + } + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return (1); +} + +/** + * xmlParserMaxDepth: + * + * arbitrary depth limit for the XML documents that we allow to + * process. This is not a limitation of the parser but a safety + * boundary feature. It can be disabled with the XML_PARSE_HUGE + * parser option. + */ +unsigned int xmlParserMaxDepth = 256; + + + +#define SAX2 1 +#define XML_PARSER_BIG_BUFFER_SIZE 300 +#define XML_PARSER_BUFFER_SIZE 100 +#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" + +/** + * XML_PARSER_CHUNK_SIZE + * + * When calling GROW that's the minimal amount of data + * the parser expected to have received. It is not a hard + * limit but an optimization when reading strings like Names + * It is not strictly needed as long as inputs available characters + * are followed by 0, which should be provided by the I/O level + */ +#define XML_PARSER_CHUNK_SIZE 100 + +/* + * List of XML prefixed PI allowed by W3C specs + */ + +static const char *xmlW3CPIs[] = { + "xml-stylesheet", + "xml-model", + NULL +}; + + +/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ +static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, + const xmlChar **str); + +static xmlParserErrors +xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, + xmlSAXHandlerPtr sax, + void *user_data, int depth, const xmlChar *URL, + const xmlChar *ID, xmlNodePtr *list); + +static int +xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, + const char *encoding); +#ifdef LIBXML_LEGACY_ENABLED +static void +xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, + xmlNodePtr lastNode); +#endif /* LIBXML_LEGACY_ENABLED */ + +static xmlParserErrors +xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, + const xmlChar *string, void *user_data, xmlNodePtr *lst); + +static int +xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); + +/************************************************************************ + * * + * Some factorized error routines * + * * + ************************************************************************/ + +/** + * xmlErrAttributeDup: + * @ctxt: an XML parser context + * @prefix: the attribute prefix + * @localname: the attribute localname + * + * Handle a redefinition of attribute error + */ +static void +xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, + const xmlChar * localname) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; + + if (prefix == NULL) + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, + XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, + (const char *) localname, NULL, NULL, 0, 0, + "Attribute %s redefined\n", localname); + else + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, + XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, + (const char *) prefix, (const char *) localname, + NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, + localname); + if (ctxt != NULL) { + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) + ctxt->disableSAX = 1; + } +} + +/** + * xmlFatalErr: + * @ctxt: an XML parser context + * @error: the error number + * @extra: extra information string + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void +xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) +{ + const char *errmsg; + + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + switch (error) { + case XML_ERR_INVALID_HEX_CHARREF: + errmsg = "CharRef: invalid hexadecimal value"; + break; + case XML_ERR_INVALID_DEC_CHARREF: + errmsg = "CharRef: invalid decimal value"; + break; + case XML_ERR_INVALID_CHARREF: + errmsg = "CharRef: invalid value"; + break; + case XML_ERR_INTERNAL_ERROR: + errmsg = "internal error"; + break; + case XML_ERR_PEREF_AT_EOF: + errmsg = "PEReference at end of document"; + break; + case XML_ERR_PEREF_IN_PROLOG: + errmsg = "PEReference in prolog"; + break; + case XML_ERR_PEREF_IN_EPILOG: + errmsg = "PEReference in epilog"; + break; + case XML_ERR_PEREF_NO_NAME: + errmsg = "PEReference: no name"; + break; + case XML_ERR_PEREF_SEMICOL_MISSING: + errmsg = "PEReference: expecting ';'"; + break; + case XML_ERR_ENTITY_LOOP: + errmsg = "Detected an entity reference loop"; + break; + case XML_ERR_ENTITY_NOT_STARTED: + errmsg = "EntityValue: \" or ' expected"; + break; + case XML_ERR_ENTITY_PE_INTERNAL: + errmsg = "PEReferences forbidden in internal subset"; + break; + case XML_ERR_ENTITY_NOT_FINISHED: + errmsg = "EntityValue: \" or ' expected"; + break; + case XML_ERR_ATTRIBUTE_NOT_STARTED: + errmsg = "AttValue: \" or ' expected"; + break; + case XML_ERR_LT_IN_ATTRIBUTE: + errmsg = "Unescaped '<' not allowed in attributes values"; + break; + case XML_ERR_LITERAL_NOT_STARTED: + errmsg = "SystemLiteral \" or ' expected"; + break; + case XML_ERR_LITERAL_NOT_FINISHED: + errmsg = "Unfinished System or Public ID \" or ' expected"; + break; + case XML_ERR_MISPLACED_CDATA_END: + errmsg = "Sequence ']]>' not allowed in content"; + break; + case XML_ERR_URI_REQUIRED: + errmsg = "SYSTEM or PUBLIC, the URI is missing"; + break; + case XML_ERR_PUBID_REQUIRED: + errmsg = "PUBLIC, the Public Identifier is missing"; + break; + case XML_ERR_HYPHEN_IN_COMMENT: + errmsg = "Comment must not contain '--' (double-hyphen)"; + break; + case XML_ERR_PI_NOT_STARTED: + errmsg = "xmlParsePI : no target name"; + break; + case XML_ERR_RESERVED_XML_NAME: + errmsg = "Invalid PI name"; + break; + case XML_ERR_NOTATION_NOT_STARTED: + errmsg = "NOTATION: Name expected here"; + break; + case XML_ERR_NOTATION_NOT_FINISHED: + errmsg = "'>' required to close NOTATION declaration"; + break; + case XML_ERR_VALUE_REQUIRED: + errmsg = "Entity value required"; + break; + case XML_ERR_URI_FRAGMENT: + errmsg = "Fragment not allowed"; + break; + case XML_ERR_ATTLIST_NOT_STARTED: + errmsg = "'(' required to start ATTLIST enumeration"; + break; + case XML_ERR_NMTOKEN_REQUIRED: + errmsg = "NmToken expected in ATTLIST enumeration"; + break; + case XML_ERR_ATTLIST_NOT_FINISHED: + errmsg = "')' required to finish ATTLIST enumeration"; + break; + case XML_ERR_MIXED_NOT_STARTED: + errmsg = "MixedContentDecl : '|' or ')*' expected"; + break; + case XML_ERR_PCDATA_REQUIRED: + errmsg = "MixedContentDecl : '#PCDATA' expected"; + break; + case XML_ERR_ELEMCONTENT_NOT_STARTED: + errmsg = "ContentDecl : Name or '(' expected"; + break; + case XML_ERR_ELEMCONTENT_NOT_FINISHED: + errmsg = "ContentDecl : ',' '|' or ')' expected"; + break; + case XML_ERR_PEREF_IN_INT_SUBSET: + errmsg = + "PEReference: forbidden within markup decl in internal subset"; + break; + case XML_ERR_GT_REQUIRED: + errmsg = "expected '>'"; + break; + case XML_ERR_CONDSEC_INVALID: + errmsg = "XML conditional section '[' expected"; + break; + case XML_ERR_EXT_SUBSET_NOT_FINISHED: + errmsg = "Content error in the external subset"; + break; + case XML_ERR_CONDSEC_INVALID_KEYWORD: + errmsg = + "conditional section INCLUDE or IGNORE keyword expected"; + break; + case XML_ERR_CONDSEC_NOT_FINISHED: + errmsg = "XML conditional section not closed"; + break; + case XML_ERR_XMLDECL_NOT_STARTED: + errmsg = "Text declaration '' expected"; + break; + case XML_ERR_EXT_ENTITY_STANDALONE: + errmsg = "external parsed entities cannot be standalone"; + break; + case XML_ERR_ENTITYREF_SEMICOL_MISSING: + errmsg = "EntityRef: expecting ';'"; + break; + case XML_ERR_DOCTYPE_NOT_FINISHED: + errmsg = "DOCTYPE improperly terminated"; + break; + case XML_ERR_LTSLASH_REQUIRED: + errmsg = "EndTag: 'errNo = error; + if (info == NULL) { + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", + errmsg); + } else { + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", + errmsg, info); + } + if (ctxt != NULL) { + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) + ctxt->disableSAX = 1; + } +} + +/** + * xmlFatalErrMsg: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, + XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); + if (ctxt != NULL) { + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) + ctxt->disableSAX = 1; + } +} + +/** + * xmlWarningMsg: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * @str1: extra data + * @str2: extra data + * + * Handle a warning. + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, const xmlChar *str1, const xmlChar *str2) +{ + xmlStructuredErrorFunc schannel = NULL; + + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if ((ctxt != NULL) && (ctxt->sax != NULL) && + (ctxt->sax->initialized == XML_SAX2_MAGIC)) + schannel = ctxt->sax->serror; + if (ctxt != NULL) { + __xmlRaiseError(schannel, + (ctxt->sax) ? ctxt->sax->warning : NULL, + ctxt->userData, + ctxt, NULL, XML_FROM_PARSER, error, + XML_ERR_WARNING, NULL, 0, + (const char *) str1, (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); + } else { + __xmlRaiseError(schannel, NULL, NULL, + ctxt, NULL, XML_FROM_PARSER, error, + XML_ERR_WARNING, NULL, 0, + (const char *) str1, (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); + } +} + +/** + * xmlValidityError: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * @str1: extra data + * + * Handle a validity error. + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, const xmlChar *str1, const xmlChar *str2) +{ + xmlStructuredErrorFunc schannel = NULL; + + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) { + ctxt->errNo = error; + if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) + schannel = ctxt->sax->serror; + } + if (ctxt != NULL) { + __xmlRaiseError(schannel, + ctxt->vctxt.error, ctxt->vctxt.userData, + ctxt, NULL, XML_FROM_DTD, error, + XML_ERR_ERROR, NULL, 0, (const char *) str1, + (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); + ctxt->valid = 0; + } else { + __xmlRaiseError(schannel, NULL, NULL, + ctxt, NULL, XML_FROM_DTD, error, + XML_ERR_ERROR, NULL, 0, (const char *) str1, + (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); + } +} + +/** + * xmlFatalErrMsgInt: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * @val: an integer value + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, int val) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, + ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, + NULL, 0, NULL, NULL, NULL, val, 0, msg, val); + if (ctxt != NULL) { + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) + ctxt->disableSAX = 1; + } +} + +/** + * xmlFatalErrMsgStrIntStr: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * @str1: an string info + * @val: an integer value + * @str2: an string info + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, const xmlChar *str1, int val, + const xmlChar *str2) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, + ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, + NULL, 0, (const char *) str1, (const char *) str2, + NULL, val, 0, msg, str1, val, str2); + if (ctxt != NULL) { + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) + ctxt->disableSAX = 1; + } +} + +/** + * xmlFatalErrMsgStr: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * @val: a string value + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, const xmlChar * val) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, + XML_FROM_PARSER, error, XML_ERR_FATAL, + NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, + val); + if (ctxt != NULL) { + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) + ctxt->disableSAX = 1; + } +} + +/** + * xmlErrMsgStr: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the error message + * @val: a string value + * + * Handle a non fatal parser error + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, const xmlChar * val) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, + XML_FROM_PARSER, error, XML_ERR_ERROR, + NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, + val); +} + +/** + * xmlNsErr: + * @ctxt: an XML parser context + * @error: the error number + * @msg: the message + * @info1: extra information string + * @info2: extra information string + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, + const xmlChar * info1, const xmlChar * info2, + const xmlChar * info3) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, + XML_ERR_ERROR, NULL, 0, (const char *) info1, + (const char *) info2, (const char *) info3, 0, 0, msg, + info1, info2, info3); + if (ctxt != NULL) + ctxt->nsWellFormed = 0; +} + +/** + * xmlNsWarn + * @ctxt: an XML parser context + * @error: the error number + * @msg: the message + * @info1: extra information string + * @info2: extra information string + * + * Handle a namespace warning error + */ +static void LIBXML_ATTR_FORMAT(3,0) +xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, + const xmlChar * info1, const xmlChar * info2, + const xmlChar * info3) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, + XML_ERR_WARNING, NULL, 0, (const char *) info1, + (const char *) info2, (const char *) info3, 0, 0, msg, + info1, info2, info3); +} + +/************************************************************************ + * * + * Library wide options * + * * + ************************************************************************/ + +/** + * xmlHasFeature: + * @feature: the feature to be examined + * + * Examines if the library has been compiled with a given feature. + * + * Returns a non-zero value if the feature exist, otherwise zero. + * Returns zero (0) if the feature does not exist or an unknown + * unknown feature is requested, non-zero otherwise. + */ +int +xmlHasFeature(xmlFeature feature) +{ + switch (feature) { + case XML_WITH_THREAD: +#ifdef LIBXML_THREAD_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_TREE: +#ifdef LIBXML_TREE_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_OUTPUT: +#ifdef LIBXML_OUTPUT_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_PUSH: +#ifdef LIBXML_PUSH_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_READER: +#ifdef LIBXML_READER_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_PATTERN: +#ifdef LIBXML_PATTERN_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_WRITER: +#ifdef LIBXML_WRITER_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_SAX1: +#ifdef LIBXML_SAX1_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_FTP: +#ifdef LIBXML_FTP_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_HTTP: +#ifdef LIBXML_HTTP_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_VALID: +#ifdef LIBXML_VALID_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_HTML: +#ifdef LIBXML_HTML_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_LEGACY: +#ifdef LIBXML_LEGACY_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_C14N: +#ifdef LIBXML_C14N_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_CATALOG: +#ifdef LIBXML_CATALOG_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_XPATH: +#ifdef LIBXML_XPATH_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_XPTR: +#ifdef LIBXML_XPTR_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_XINCLUDE: +#ifdef LIBXML_XINCLUDE_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_ICONV: +#ifdef LIBXML_ICONV_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_ISO8859X: +#ifdef LIBXML_ISO8859X_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_UNICODE: +#ifdef LIBXML_UNICODE_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_REGEXP: +#ifdef LIBXML_REGEXP_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_AUTOMATA: +#ifdef LIBXML_AUTOMATA_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_EXPR: +#ifdef LIBXML_EXPR_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_SCHEMAS: +#ifdef LIBXML_SCHEMAS_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_SCHEMATRON: +#ifdef LIBXML_SCHEMATRON_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_MODULES: +#ifdef LIBXML_MODULES_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_DEBUG: +#ifdef LIBXML_DEBUG_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_DEBUG_MEM: +#ifdef DEBUG_MEMORY_LOCATION + return(1); +#else + return(0); +#endif + case XML_WITH_DEBUG_RUN: +#ifdef LIBXML_DEBUG_RUNTIME + return(1); +#else + return(0); +#endif + case XML_WITH_ZLIB: +#ifdef LIBXML_ZLIB_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_LZMA: +#ifdef LIBXML_LZMA_ENABLED + return(1); +#else + return(0); +#endif + case XML_WITH_ICU: +#ifdef LIBXML_ICU_ENABLED + return(1); +#else + return(0); +#endif + default: + break; + } + return(0); +} + +/************************************************************************ + * * + * SAX2 defaulted attributes handling * + * * + ************************************************************************/ + +/** + * xmlDetectSAX2: + * @ctxt: an XML parser context + * + * Do the SAX2 detection and specific initialization + */ +static void +xmlDetectSAX2(xmlParserCtxtPtr ctxt) { + if (ctxt == NULL) return; +#ifdef LIBXML_SAX1_ENABLED + if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && + ((ctxt->sax->startElementNs != NULL) || + (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; +#else + ctxt->sax2 = 1; +#endif /* LIBXML_SAX1_ENABLED */ + + ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); + ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); + ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); + if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || + (ctxt->str_xml_ns == NULL)) { + xmlErrMemory(ctxt, NULL); + } +} + +typedef struct _xmlDefAttrs xmlDefAttrs; +typedef xmlDefAttrs *xmlDefAttrsPtr; +struct _xmlDefAttrs { + int nbAttrs; /* number of defaulted attributes on that element */ + int maxAttrs; /* the size of the array */ +#if __STDC_VERSION__ >= 199901L + /* Using a C99 flexible array member avoids UBSan errors. */ + const xmlChar *values[]; /* array of localname/prefix/values/external */ +#else + const xmlChar *values[5]; +#endif +}; + +/** + * xmlAttrNormalizeSpace: + * @src: the source string + * @dst: the target string + * + * Normalize the space in non CDATA attribute values: + * If the attribute type is not CDATA, then the XML processor MUST further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by a single space (#x20) character. + * Note that the size of dst need to be at least src, and if one doesn't need + * to preserve dst (and it doesn't come from a dictionary or read-only) then + * passing src as dst is just fine. + * + * Returns a pointer to the normalized value (dst) or NULL if no conversion + * is needed. + */ +static xmlChar * +xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) +{ + if ((src == NULL) || (dst == NULL)) + return(NULL); + + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; + if (dst == src) + return(NULL); + return(dst); +} + +/** + * xmlAttrNormalizeSpace2: + * @src: the source string + * + * Normalize the space in non CDATA attribute values, a slightly more complex + * front end to avoid allocation problems when running on attribute values + * coming from the input. + * + * Returns a pointer to the normalized value (dst) or NULL if no conversion + * is needed. + */ +static const xmlChar * +xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) +{ + int i; + int remove_head = 0; + int need_realloc = 0; + const xmlChar *cur; + + if ((ctxt == NULL) || (src == NULL) || (len == NULL)) + return(NULL); + i = *len; + if (i <= 0) + return(NULL); + + cur = src; + while (*cur == 0x20) { + cur++; + remove_head++; + } + while (*cur != 0) { + if (*cur == 0x20) { + cur++; + if ((*cur == 0x20) || (*cur == 0)) { + need_realloc = 1; + break; + } + } else + cur++; + } + if (need_realloc) { + xmlChar *ret; + + ret = xmlStrndup(src + remove_head, i - remove_head + 1); + if (ret == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + xmlAttrNormalizeSpace(ret, ret); + *len = (int) strlen((const char *)ret); + return(ret); + } else if (remove_head) { + *len -= remove_head; + memmove(src, src + remove_head, 1 + *len); + return(src); + } + return(NULL); +} + +/** + * xmlAddDefAttrs: + * @ctxt: an XML parser context + * @fullname: the element fullname + * @fullattr: the attribute fullname + * @value: the attribute value + * + * Add a defaulted attribute for an element + */ +static void +xmlAddDefAttrs(xmlParserCtxtPtr ctxt, + const xmlChar *fullname, + const xmlChar *fullattr, + const xmlChar *value) { + xmlDefAttrsPtr defaults; + int len; + const xmlChar *name; + const xmlChar *prefix; + + /* + * Allows to detect attribute redefinitions + */ + if (ctxt->attsSpecial != NULL) { + if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) + return; + } + + if (ctxt->attsDefault == NULL) { + ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); + if (ctxt->attsDefault == NULL) + goto mem_error; + } + + /* + * split the element name into prefix:localname , the string found + * are within the DTD and then not associated to namespace names. + */ + name = xmlSplitQName3(fullname, &len); + if (name == NULL) { + name = xmlDictLookup(ctxt->dict, fullname, -1); + prefix = NULL; + } else { + name = xmlDictLookup(ctxt->dict, name, -1); + prefix = xmlDictLookup(ctxt->dict, fullname, len); + } + + /* + * make sure there is some storage + */ + defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); + if (defaults == NULL) { + defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + + (4 * 5) * sizeof(const xmlChar *)); + if (defaults == NULL) + goto mem_error; + defaults->nbAttrs = 0; + defaults->maxAttrs = 4; + if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, + defaults, NULL) < 0) { + xmlFree(defaults); + goto mem_error; + } + } else if (defaults->nbAttrs >= defaults->maxAttrs) { + xmlDefAttrsPtr temp; + + temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + + (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); + if (temp == NULL) + goto mem_error; + defaults = temp; + defaults->maxAttrs *= 2; + if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, + defaults, NULL) < 0) { + xmlFree(defaults); + goto mem_error; + } + } + + /* + * Split the element name into prefix:localname , the string found + * are within the DTD and hen not associated to namespace names. + */ + name = xmlSplitQName3(fullattr, &len); + if (name == NULL) { + name = xmlDictLookup(ctxt->dict, fullattr, -1); + prefix = NULL; + } else { + name = xmlDictLookup(ctxt->dict, name, -1); + prefix = xmlDictLookup(ctxt->dict, fullattr, len); + } + + defaults->values[5 * defaults->nbAttrs] = name; + defaults->values[5 * defaults->nbAttrs + 1] = prefix; + /* intern the string and precompute the end */ + len = xmlStrlen(value); + value = xmlDictLookup(ctxt->dict, value, len); + defaults->values[5 * defaults->nbAttrs + 2] = value; + defaults->values[5 * defaults->nbAttrs + 3] = value + len; + if (ctxt->external) + defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; + else + defaults->values[5 * defaults->nbAttrs + 4] = NULL; + defaults->nbAttrs++; + + return; + +mem_error: + xmlErrMemory(ctxt, NULL); + return; +} + +/** + * xmlAddSpecialAttr: + * @ctxt: an XML parser context + * @fullname: the element fullname + * @fullattr: the attribute fullname + * @type: the attribute type + * + * Register this attribute type + */ +static void +xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, + const xmlChar *fullname, + const xmlChar *fullattr, + int type) +{ + if (ctxt->attsSpecial == NULL) { + ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); + if (ctxt->attsSpecial == NULL) + goto mem_error; + } + + if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) + return; + + xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, + (void *) (ptrdiff_t) type); + return; + +mem_error: + xmlErrMemory(ctxt, NULL); + return; +} + +/** + * xmlCleanSpecialAttrCallback: + * + * Removes CDATA attributes from the special attribute table + */ +static void +xmlCleanSpecialAttrCallback(void *payload, void *data, + const xmlChar *fullname, const xmlChar *fullattr, + const xmlChar *unused ATTRIBUTE_UNUSED) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; + + if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { + xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); + } +} + +/** + * xmlCleanSpecialAttr: + * @ctxt: an XML parser context + * + * Trim the list of attributes defined to remove all those of type + * CDATA as they are not special. This call should be done when finishing + * to parse the DTD and before starting to parse the document root. + */ +static void +xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) +{ + if (ctxt->attsSpecial == NULL) + return; + + xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); + + if (xmlHashSize(ctxt->attsSpecial) == 0) { + xmlHashFree(ctxt->attsSpecial, NULL); + ctxt->attsSpecial = NULL; + } + return; +} + +/** + * xmlCheckLanguageID: + * @lang: pointer to the string value + * + * Checks that the value conforms to the LanguageID production: + * + * NOTE: this is somewhat deprecated, those productions were removed from + * the XML Second edition. + * + * [33] LanguageID ::= Langcode ('-' Subcode)* + * [34] Langcode ::= ISO639Code | IanaCode | UserCode + * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) + * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ + * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ + * [38] Subcode ::= ([a-z] | [A-Z])+ + * + * The current REC reference the successors of RFC 1766, currently 5646 + * + * http://www.rfc-editor.org/rfc/rfc5646.txt + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + * language = 2*3ALPHA ; shortest ISO 639 code + * ["-" extlang] ; sometimes followed by + * ; extended language subtags + * / 4ALPHA ; or reserved for future use + * / 5*8ALPHA ; or registered language subtag + * + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + * + * script = 4ALPHA ; ISO 15924 code + * + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + * + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + * + * extension = singleton 1*("-" (2*8alphanum)) + * + * ; Single alphanumerics + * ; "x" reserved for private use + * singleton = DIGIT ; 0 - 9 + * / %x41-57 ; A - W + * / %x59-5A ; Y - Z + * / %x61-77 ; a - w + * / %x79-7A ; y - z + * + * it sounds right to still allow Irregular i-xxx IANA and user codes too + * The parser below doesn't try to cope with extension or privateuse + * that could be added but that's not interoperable anyway + * + * Returns 1 if correct 0 otherwise + **/ +int +xmlCheckLanguageID(const xmlChar * lang) +{ + const xmlChar *cur = lang, *nxt; + + if (cur == NULL) + return (0); + if (((cur[0] == 'i') && (cur[1] == '-')) || + ((cur[0] == 'I') && (cur[1] == '-')) || + ((cur[0] == 'x') && (cur[1] == '-')) || + ((cur[0] == 'X') && (cur[1] == '-'))) { + /* + * Still allow IANA code and user code which were coming + * from the previous version of the XML-1.0 specification + * it's deprecated but we should not fail + */ + cur += 2; + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + ((cur[0] >= 'a') && (cur[0] <= 'z'))) + cur++; + return(cur[0] == 0); + } + nxt = cur; + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + if (nxt - cur >= 4) { + /* + * Reserved + */ + if ((nxt - cur > 8) || (nxt[0] != 0)) + return(0); + return(1); + } + if (nxt - cur < 2) + return(0); + /* we got an ISO 639 code */ + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can have extlang or script or region or variant */ + if ((nxt[0] >= '0') && (nxt[0] <= '9')) + goto region_m49; + + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + if (nxt - cur == 4) + goto script; + if (nxt - cur == 2) + goto region; + if ((nxt - cur >= 5) && (nxt - cur <= 8)) + goto variant; + if (nxt - cur != 3) + return(0); + /* we parsed an extlang */ + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can have script or region or variant */ + if ((nxt[0] >= '0') && (nxt[0] <= '9')) + goto region_m49; + + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + if (nxt - cur == 2) + goto region; + if ((nxt - cur >= 5) && (nxt - cur <= 8)) + goto variant; + if (nxt - cur != 4) + return(0); + /* we parsed a script */ +script: + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can have region or variant */ + if ((nxt[0] >= '0') && (nxt[0] <= '9')) + goto region_m49; + + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + + if ((nxt - cur >= 5) && (nxt - cur <= 8)) + goto variant; + if (nxt - cur != 2) + return(0); + /* we parsed a region */ +region: + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + + nxt++; + cur = nxt; + /* now we can just have a variant */ + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || + ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) + nxt++; + + if ((nxt - cur < 5) || (nxt - cur > 8)) + return(0); + + /* we parsed a variant */ +variant: + if (nxt[0] == 0) + return(1); + if (nxt[0] != '-') + return(0); + /* extensions and private use subtags not checked */ + return (1); + +region_m49: + if (((nxt[1] >= '0') && (nxt[1] <= '9')) && + ((nxt[2] >= '0') && (nxt[2] <= '9'))) { + nxt += 3; + goto region; + } + return(0); +} + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, + const xmlChar ** str); + +#ifdef SAX2 +/** + * nsPush: + * @ctxt: an XML parser context + * @prefix: the namespace prefix or NULL + * @URL: the namespace name + * + * Pushes a new parser namespace on top of the ns stack + * + * Returns -1 in case of error, -2 if the namespace should be discarded + * and the index in the stack otherwise. + */ +static int +nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) +{ + if (ctxt->options & XML_PARSE_NSCLEAN) { + int i; + for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { + if (ctxt->nsTab[i] == prefix) { + /* in scope */ + if (ctxt->nsTab[i + 1] == URL) + return(-2); + /* out of scope keep it */ + break; + } + } + } + if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { + ctxt->nsMax = 10; + ctxt->nsNr = 0; + ctxt->nsTab = (const xmlChar **) + xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); + if (ctxt->nsTab == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->nsMax = 0; + return (-1); + } + } else if (ctxt->nsNr >= ctxt->nsMax) { + const xmlChar ** tmp; + ctxt->nsMax *= 2; + tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, + ctxt->nsMax * sizeof(ctxt->nsTab[0])); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->nsMax /= 2; + return (-1); + } + ctxt->nsTab = tmp; + } + ctxt->nsTab[ctxt->nsNr++] = prefix; + ctxt->nsTab[ctxt->nsNr++] = URL; + return (ctxt->nsNr); +} +/** + * nsPop: + * @ctxt: an XML parser context + * @nr: the number to pop + * + * Pops the top @nr parser prefix/namespace from the ns stack + * + * Returns the number of namespaces removed + */ +static int +nsPop(xmlParserCtxtPtr ctxt, int nr) +{ + int i; + + if (ctxt->nsTab == NULL) return(0); + if (ctxt->nsNr < nr) { + xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); + nr = ctxt->nsNr; + } + if (ctxt->nsNr <= 0) + return (0); + + for (i = 0;i < nr;i++) { + ctxt->nsNr--; + ctxt->nsTab[ctxt->nsNr] = NULL; + } + return(nr); +} +#endif + +static int +xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { + const xmlChar **atts; + int *attallocs; + int maxatts; + + if (ctxt->atts == NULL) { + maxatts = 55; /* allow for 10 attrs by default */ + atts = (const xmlChar **) + xmlMalloc(maxatts * sizeof(xmlChar *)); + if (atts == NULL) goto mem_error; + ctxt->atts = atts; + attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); + if (attallocs == NULL) goto mem_error; + ctxt->attallocs = attallocs; + ctxt->maxatts = maxatts; + } else if (nr + 5 > ctxt->maxatts) { + maxatts = (nr + 5) * 2; + atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, + maxatts * sizeof(const xmlChar *)); + if (atts == NULL) goto mem_error; + ctxt->atts = atts; + attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, + (maxatts / 5) * sizeof(int)); + if (attallocs == NULL) goto mem_error; + ctxt->attallocs = attallocs; + ctxt->maxatts = maxatts; + } + return(ctxt->maxatts); +mem_error: + xmlErrMemory(ctxt, NULL); + return(-1); +} + +/** + * inputPush: + * @ctxt: an XML parser context + * @value: the parser input + * + * Pushes a new parser input on top of the input stack + * + * Returns -1 in case of error, the index in the stack otherwise + */ +int +inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) +{ + if ((ctxt == NULL) || (value == NULL)) + return(-1); + if (ctxt->inputNr >= ctxt->inputMax) { + ctxt->inputMax *= 2; + ctxt->inputTab = + (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, + ctxt->inputMax * + sizeof(ctxt->inputTab[0])); + if (ctxt->inputTab == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFreeInputStream(value); + ctxt->inputMax /= 2; + value = NULL; + return (-1); + } + } + ctxt->inputTab[ctxt->inputNr] = value; + ctxt->input = value; + return (ctxt->inputNr++); +} +/** + * inputPop: + * @ctxt: an XML parser context + * + * Pops the top parser input from the input stack + * + * Returns the input just removed + */ +xmlParserInputPtr +inputPop(xmlParserCtxtPtr ctxt) +{ + xmlParserInputPtr ret; + + if (ctxt == NULL) + return(NULL); + if (ctxt->inputNr <= 0) + return (NULL); + ctxt->inputNr--; + if (ctxt->inputNr > 0) + ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; + else + ctxt->input = NULL; + ret = ctxt->inputTab[ctxt->inputNr]; + ctxt->inputTab[ctxt->inputNr] = NULL; + return (ret); +} +/** + * nodePush: + * @ctxt: an XML parser context + * @value: the element node + * + * Pushes a new element node on top of the node stack + * + * Returns -1 in case of error, the index in the stack otherwise + */ +int +nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) +{ + if (ctxt == NULL) return(0); + if (ctxt->nodeNr >= ctxt->nodeMax) { + xmlNodePtr *tmp; + + tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, + ctxt->nodeMax * 2 * + sizeof(ctxt->nodeTab[0])); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + return (-1); + } + ctxt->nodeTab = tmp; + ctxt->nodeMax *= 2; + } + if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, + "Excessive depth in document: %d use XML_PARSE_HUGE option\n", + xmlParserMaxDepth); + xmlHaltParser(ctxt); + return(-1); + } + ctxt->nodeTab[ctxt->nodeNr] = value; + ctxt->node = value; + return (ctxt->nodeNr++); +} + +/** + * nodePop: + * @ctxt: an XML parser context + * + * Pops the top element node from the node stack + * + * Returns the node just removed + */ +xmlNodePtr +nodePop(xmlParserCtxtPtr ctxt) +{ + xmlNodePtr ret; + + if (ctxt == NULL) return(NULL); + if (ctxt->nodeNr <= 0) + return (NULL); + ctxt->nodeNr--; + if (ctxt->nodeNr > 0) + ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; + else + ctxt->node = NULL; + ret = ctxt->nodeTab[ctxt->nodeNr]; + ctxt->nodeTab[ctxt->nodeNr] = NULL; + return (ret); +} + +/** + * nameNsPush: + * @ctxt: an XML parser context + * @value: the element name + * @prefix: the element prefix + * @URI: the element namespace name + * + * Pushes a new element name/prefix/URL on top of the name stack + * + * Returns -1 in case of error, the index in the stack otherwise + */ +static int +nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, + const xmlChar *prefix, const xmlChar *URI, int nsNr) +{ + if (ctxt->nameNr >= ctxt->nameMax) { + const xmlChar * *tmp; + void **tmp2; + ctxt->nameMax *= 2; + tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, + ctxt->nameMax * + sizeof(ctxt->nameTab[0])); + if (tmp == NULL) { + ctxt->nameMax /= 2; + goto mem_error; + } + ctxt->nameTab = tmp; + tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, + ctxt->nameMax * 3 * + sizeof(ctxt->pushTab[0])); + if (tmp2 == NULL) { + ctxt->nameMax /= 2; + goto mem_error; + } + ctxt->pushTab = tmp2; + } else if (ctxt->pushTab == NULL) { + ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * + sizeof(ctxt->pushTab[0])); + if (ctxt->pushTab == NULL) + goto mem_error; + } + ctxt->nameTab[ctxt->nameNr] = value; + ctxt->name = value; + ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; + ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; + ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr; + return (ctxt->nameNr++); +mem_error: + xmlErrMemory(ctxt, NULL); + return (-1); +} +#ifdef LIBXML_PUSH_ENABLED +/** + * nameNsPop: + * @ctxt: an XML parser context + * + * Pops the top element/prefix/URI name from the name stack + * + * Returns the name just removed + */ +static const xmlChar * +nameNsPop(xmlParserCtxtPtr ctxt) +{ + const xmlChar *ret; + + if (ctxt->nameNr <= 0) + return (NULL); + ctxt->nameNr--; + if (ctxt->nameNr > 0) + ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; + else + ctxt->name = NULL; + ret = ctxt->nameTab[ctxt->nameNr]; + ctxt->nameTab[ctxt->nameNr] = NULL; + return (ret); +} +#endif /* LIBXML_PUSH_ENABLED */ + +/** + * namePush: + * @ctxt: an XML parser context + * @value: the element name + * + * Pushes a new element name on top of the name stack + * + * Returns -1 in case of error, the index in the stack otherwise + */ +int +namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) +{ + if (ctxt == NULL) return (-1); + + if (ctxt->nameNr >= ctxt->nameMax) { + const xmlChar * *tmp; + tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, + ctxt->nameMax * 2 * + sizeof(ctxt->nameTab[0])); + if (tmp == NULL) { + goto mem_error; + } + ctxt->nameTab = tmp; + ctxt->nameMax *= 2; + } + ctxt->nameTab[ctxt->nameNr] = value; + ctxt->name = value; + return (ctxt->nameNr++); +mem_error: + xmlErrMemory(ctxt, NULL); + return (-1); +} +/** + * namePop: + * @ctxt: an XML parser context + * + * Pops the top element name from the name stack + * + * Returns the name just removed + */ +const xmlChar * +namePop(xmlParserCtxtPtr ctxt) +{ + const xmlChar *ret; + + if ((ctxt == NULL) || (ctxt->nameNr <= 0)) + return (NULL); + ctxt->nameNr--; + if (ctxt->nameNr > 0) + ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; + else + ctxt->name = NULL; + ret = ctxt->nameTab[ctxt->nameNr]; + ctxt->nameTab[ctxt->nameNr] = NULL; + return (ret); +} + +static int spacePush(xmlParserCtxtPtr ctxt, int val) { + if (ctxt->spaceNr >= ctxt->spaceMax) { + int *tmp; + + ctxt->spaceMax *= 2; + tmp = (int *) xmlRealloc(ctxt->spaceTab, + ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->spaceMax /=2; + return(-1); + } + ctxt->spaceTab = tmp; + } + ctxt->spaceTab[ctxt->spaceNr] = val; + ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; + return(ctxt->spaceNr++); +} + +static int spacePop(xmlParserCtxtPtr ctxt) { + int ret; + if (ctxt->spaceNr <= 0) return(0); + ctxt->spaceNr--; + if (ctxt->spaceNr > 0) + ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; + else + ctxt->space = &ctxt->spaceTab[0]; + ret = ctxt->spaceTab[ctxt->spaceNr]; + ctxt->spaceTab[ctxt->spaceNr] = -1; + return(ret); +} + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one often need to make assumption on the context to + * use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * To be used with extreme caution since operations consuming + * characters may move the input buffer to a different location ! + * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * RAW same as CUR but in the input buffer, bypass any token + * extraction that may have been done + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings without newlines within the parser. + * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII + * defined char within the parser. + * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding + * + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * NEXTL(l) Skip the current unicode character of l xmlChars long. + * CUR_CHAR(l) returns the current unicode character (int), set l + * to the number of xmlChars used for the encoding [0-5]. + * CUR_SCHAR same but operate on a string instead of the context + * COPY_BUF copy the current unicode char to the target buffer, increment + * the index + * GROW, SHRINK handling of input buffers + */ + +#define RAW (*ctxt->input->cur) +#define CUR (*ctxt->input->cur) +#define NXT(val) ctxt->input->cur[(val)] +#define CUR_PTR ctxt->input->cur +#define BASE_PTR ctxt->input->base + +#define CMP4( s, c1, c2, c3, c4 ) \ + ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ + ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) +#define CMP5( s, c1, c2, c3, c4, c5 ) \ + ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) +#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ + ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) +#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ + ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) +#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ + ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) +#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ + ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ + ((unsigned char *) s)[ 8 ] == c9 ) +#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ + ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ + ((unsigned char *) s)[ 9 ] == c10 ) + +#define SKIP(val) do { \ + ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ + if (*ctxt->input->cur == 0) \ + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ + } while (0) + +#define SKIPL(val) do { \ + int skipl; \ + for(skipl=0; skiplinput->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->nbChars++; \ + ctxt->input->cur++; \ + } \ + if (*ctxt->input->cur == 0) \ + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ + } while (0) + +#define SHRINK if ((ctxt->progressive == 0) && \ + (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ + (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ + xmlSHRINK (ctxt); + +static void xmlSHRINK (xmlParserCtxtPtr ctxt) { + xmlParserInputShrink(ctxt->input); + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); +} + +#define GROW if ((ctxt->progressive == 0) && \ + (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ + xmlGROW (ctxt); + +static void xmlGROW (xmlParserCtxtPtr ctxt) { + ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur; + ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base; + + if (((curEnd > XML_MAX_LOOKUP_LIMIT) || + (curBase > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->input->buf) && + (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + xmlHaltParser(ctxt); + return; + } + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((ctxt->input->cur > ctxt->input->end) || + (ctxt->input->cur < ctxt->input->base)) { + xmlHaltParser(ctxt); + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); + return; + } + if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); +} + +#define SKIP_BLANKS xmlSkipBlankChars(ctxt) + +#define NEXT xmlNextChar(ctxt) + +#define NEXT1 { \ + ctxt->input->col++; \ + ctxt->input->cur++; \ + ctxt->nbChars++; \ + if (*ctxt->input->cur == 0) \ + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ + } + +#define NEXTL(l) do { \ + if (*(ctxt->input->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->input->cur += l; \ + } while (0) + +#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) +#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyCharMultiByte(&b[i],v) + +/** + * xmlSkipBlankChars: + * @ctxt: the XML parser context + * + * skip all blanks character found at that point in the input streams. + * It pops up finished entities in the process if allowable at that point. + * + * Returns the number of space chars skipped + */ + +int +xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { + int res = 0; + + /* + * It's Okay to use CUR/NEXT here since all the blanks are on + * the ASCII range. + */ + if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { + const xmlChar *cur; + /* + * if we are in the document content, go really fast + */ + cur = ctxt->input->cur; + while (IS_BLANK_CH(*cur)) { + if (*cur == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else { + ctxt->input->col++; + } + cur++; + res++; + if (*cur == 0) { + ctxt->input->cur = cur; + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + } + ctxt->input->cur = cur; + } else { + int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); + + while (1) { + if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ + NEXT; + } else if (CUR == '%') { + /* + * Need to handle support of entities branching here + */ + if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) + break; + xmlParsePEReference(ctxt); + } else if (CUR == 0) { + if (ctxt->inputNr <= 1) + break; + xmlPopInput(ctxt); + } else { + break; + } + + /* + * Also increase the counter when entering or exiting a PERef. + * The spec says: "When a parameter-entity reference is recognized + * in the DTD and included, its replacement text MUST be enlarged + * by the attachment of one leading and one following space (#x20) + * character." + */ + res++; + } + } + return(res); +} + +/************************************************************************ + * * + * Commodity functions to handle entities * + * * + ************************************************************************/ + +/** + * xmlPopInput: + * @ctxt: an XML parser context + * + * xmlPopInput: the current input pointed by ctxt->input came to an end + * pop it and return the next char. + * + * Returns the current xmlChar in the parser context + */ +xmlChar +xmlPopInput(xmlParserCtxtPtr ctxt) { + if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "Popping input %d\n", ctxt->inputNr); + if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && + (ctxt->instate != XML_PARSER_EOF)) + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, + "Unfinished entity outside the DTD"); + xmlFreeInputStream(inputPop(ctxt)); + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + return(CUR); +} + +/** + * xmlPushInput: + * @ctxt: an XML parser context + * @input: an XML parser input fragment (entity, XML fragment ...). + * + * xmlPushInput: switch to a new input stream which is stacked on top + * of the previous one(s). + * Returns -1 in case of error or the index in the input stack + */ +int +xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { + int ret; + if (input == NULL) return(-1); + + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); + } + if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || + (ctxt->inputNr > 1024)) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + while (ctxt->inputNr > 1) + xmlFreeInputStream(inputPop(ctxt)); + return(-1); + } + ret = inputPush(ctxt, input); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); + GROW; + return(ret); +} + +/** + * xmlParseCharRef: + * @ctxt: an XML parser context + * + * parse Reference declarations + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + * + * Returns the value parsed (as an int), 0 in case of error + */ +int +xmlParseCharRef(xmlParserCtxtPtr ctxt) { + int val = 0; + int count = 0; + + /* + * Using RAW/CUR/NEXT is okay since we are working on ASCII range here + */ + if ((RAW == '&') && (NXT(1) == '#') && + (NXT(2) == 'x')) { + SKIP(3); + GROW; + while (RAW != ';') { /* loop blocked by count */ + if (count++ > 20) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); + } + if ((RAW >= '0') && (RAW <= '9')) + val = val * 16 + (CUR - '0'); + else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) + val = val * 16 + (CUR - 'a') + 10; + else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) + val = val * 16 + (CUR - 'A') + 10; + else { + xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); + val = 0; + break; + } + if (val > 0x110000) + val = 0x110000; + + NEXT; + count++; + } + if (RAW == ';') { + /* on purpose to avoid reentrancy problems with NEXT and SKIP */ + ctxt->input->col++; + ctxt->nbChars ++; + ctxt->input->cur++; + } + } else if ((RAW == '&') && (NXT(1) == '#')) { + SKIP(2); + GROW; + while (RAW != ';') { /* loop blocked by count */ + if (count++ > 20) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); + } + if ((RAW >= '0') && (RAW <= '9')) + val = val * 10 + (CUR - '0'); + else { + xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); + val = 0; + break; + } + if (val > 0x110000) + val = 0x110000; + + NEXT; + count++; + } + if (RAW == ';') { + /* on purpose to avoid reentrancy problems with NEXT and SKIP */ + ctxt->input->col++; + ctxt->nbChars ++; + ctxt->input->cur++; + } + } else { + xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); + } + + /* + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + */ + if (val >= 0x110000) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseCharRef: character reference out of bounds\n", + val); + } else if (IS_CHAR(val)) { + return(val); + } else { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseCharRef: invalid xmlChar value %d\n", + val); + } + return(0); +} + +/** + * xmlParseStringCharRef: + * @ctxt: an XML parser context + * @str: a pointer to an index in the string + * + * parse Reference declarations, variant parsing from a string rather + * than an an input flow. + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + * + * Returns the value parsed (as an int), 0 in case of error, str will be + * updated to the current value of the index + */ +static int +xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { + const xmlChar *ptr; + xmlChar cur; + int val = 0; + + if ((str == NULL) || (*str == NULL)) return(0); + ptr = *str; + cur = *ptr; + if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { + ptr += 3; + cur = *ptr; + while (cur != ';') { /* Non input consuming loop */ + if ((cur >= '0') && (cur <= '9')) + val = val * 16 + (cur - '0'); + else if ((cur >= 'a') && (cur <= 'f')) + val = val * 16 + (cur - 'a') + 10; + else if ((cur >= 'A') && (cur <= 'F')) + val = val * 16 + (cur - 'A') + 10; + else { + xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); + val = 0; + break; + } + if (val > 0x110000) + val = 0x110000; + + ptr++; + cur = *ptr; + } + if (cur == ';') + ptr++; + } else if ((cur == '&') && (ptr[1] == '#')){ + ptr += 2; + cur = *ptr; + while (cur != ';') { /* Non input consuming loops */ + if ((cur >= '0') && (cur <= '9')) + val = val * 10 + (cur - '0'); + else { + xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); + val = 0; + break; + } + if (val > 0x110000) + val = 0x110000; + + ptr++; + cur = *ptr; + } + if (cur == ';') + ptr++; + } else { + xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); + return(0); + } + *str = ptr; + + /* + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + */ + if (val >= 0x110000) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseStringCharRef: character reference out of bounds\n", + val); + } else if (IS_CHAR(val)) { + return(val); + } else { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseStringCharRef: invalid xmlChar value %d\n", + val); + } + return(0); +} + +/** + * xmlParserHandlePEReference: + * @ctxt: the parser context + * + * [69] PEReference ::= '%' Name ';' + * + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive + * reference to itself, either directly or indirectly. + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", ... ... The declaration of a parameter + * entity must precede any reference to it... + * + * [ VC: Entity Declared ] + * In a document with an external subset or external parameter entities + * with "standalone='no'", ... ... The declaration of a parameter entity + * must precede any reference to it... + * + * [ WFC: In DTD ] + * Parameter-entity references may only appear in the DTD. + * NOTE: misleading but this is handled. + * + * A PEReference may have been detected in the current input stream + * the handling is done accordingly to + * http://www.w3.org/TR/REC-xml#entproc + * i.e. + * - Included in literal in entity values + * - Included as Parameter Entity reference within DTDs + */ +void +xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { + switch(ctxt->instate) { + case XML_PARSER_CDATA_SECTION: + return; + case XML_PARSER_COMMENT: + return; + case XML_PARSER_START_TAG: + return; + case XML_PARSER_END_TAG: + return; + case XML_PARSER_EOF: + xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); + return; + case XML_PARSER_PROLOG: + case XML_PARSER_START: + case XML_PARSER_MISC: + xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); + return; + case XML_PARSER_ENTITY_DECL: + case XML_PARSER_CONTENT: + case XML_PARSER_ATTRIBUTE_VALUE: + case XML_PARSER_PI: + case XML_PARSER_SYSTEM_LITERAL: + case XML_PARSER_PUBLIC_LITERAL: + /* we just ignore it there */ + return; + case XML_PARSER_EPILOG: + xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); + return; + case XML_PARSER_ENTITY_VALUE: + /* + * NOTE: in the case of entity values, we don't do the + * substitution here since we need the literal + * entity value to be able to save the internal + * subset of the document. + * This will be handled by xmlStringDecodeEntities + */ + return; + case XML_PARSER_DTD: + /* + * [WFC: Well-Formedness Constraint: PEs in Internal Subset] + * In the internal DTD subset, parameter-entity references + * can occur only where markup declarations can occur, not + * within markup declarations. + * In that case this is handled in xmlParseMarkupDecl + */ + if ((ctxt->external == 0) && (ctxt->inputNr == 1)) + return; + if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) + return; + break; + case XML_PARSER_IGNORE: + return; + } + + xmlParsePEReference(ctxt); +} + +/* + * Macro used to grow the current buffer. + * buffer##_size is expected to be a size_t + * mem_error: is expected to handle memory allocation failures + */ +#define growBuffer(buffer, n) { \ + xmlChar *tmp; \ + size_t new_size = buffer##_size * 2 + n; \ + if (new_size < buffer##_size) goto mem_error; \ + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ + if (tmp == NULL) goto mem_error; \ + buffer = tmp; \ + buffer##_size = new_size; \ +} + +/** + * xmlStringLenDecodeEntities: + * @ctxt: the parser context + * @str: the input string + * @len: the string length + * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Takes a entity string content and process to do the adequate substitutions. + * + * [67] Reference ::= EntityRef | CharRef + * + * [69] PEReference ::= '%' Name ';' + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, + int what, xmlChar end, xmlChar end2, xmlChar end3) { + xmlChar *buffer = NULL; + size_t buffer_size = 0; + size_t nbchars = 0; + + xmlChar *current = NULL; + xmlChar *rep = NULL; + const xmlChar *last; + xmlEntityPtr ent; + int c,l; + + if ((ctxt == NULL) || (str == NULL) || (len < 0)) + return(NULL); + last = str + len; + + if (((ctxt->depth > 40) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) || + (ctxt->depth > 1024)) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buffer_size = XML_PARSER_BIG_BUFFER_SIZE; + buffer = (xmlChar *) xmlMallocAtomic(buffer_size); + if (buffer == NULL) goto mem_error; + + /* + * OK loop until we reach one of the ending char or a size limit. + * we are operating on already parsed values. + */ + if (str < last) + c = CUR_SCHAR(str, l); + else + c = 0; + while ((c != 0) && (c != end) && /* non input consuming loop */ + (c != end2) && (c != end3)) { + + if (c == 0) break; + if ((c == '&') && (str[1] == '#')) { + int val = xmlParseStringCharRef(ctxt, &str); + if (val == 0) + goto int_error; + COPY_BUF(0,buffer,nbchars,val); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "String decoding Entity Reference: %.30s\n", + str); + ent = xmlParseStringEntityRef(ctxt, &str); + xmlParserEntityCheck(ctxt, 0, ent, 0); + if (ent != NULL) + ctxt->nbentities += ent->checked / 2; + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (ent->content != NULL) { + COPY_BUF(0,buffer,nbchars,ent->content[0]); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } else { + xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, + "predefined entity has no content\n"); + goto int_error; + } + } else if ((ent != NULL) && (ent->content != NULL)) { + ctxt->depth++; + rep = xmlStringDecodeEntities(ctxt, ent->content, what, + 0, 0, 0); + ctxt->depth--; + if (rep == NULL) + goto int_error; + + current = rep; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) + goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } + xmlFree(rep); + rep = NULL; + } else if (ent != NULL) { + int i = xmlStrlen(ent->name); + const xmlChar *cur = ent->name; + + buffer[nbchars++] = '&'; + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); + } + for (;i > 0;i--) + buffer[nbchars++] = *cur++; + buffer[nbchars++] = ';'; + } + } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "String decoding PE Reference: %.30s\n", str); + ent = xmlParseStringPEReference(ctxt, &str); + xmlParserEntityCheck(ctxt, 0, ent, 0); + if (ent != NULL) + ctxt->nbentities += ent->checked / 2; + if (ent != NULL) { + if (ent->content == NULL) { + /* + * Note: external parsed entities will not be loaded, + * it is not required for a non-validating parser to + * complete external PEReferences coming from the + * internal subset + */ + if (((ctxt->options & XML_PARSE_NOENT) != 0) || + ((ctxt->options & XML_PARSE_DTDVALID) != 0) || + (ctxt->validate != 0)) { + xmlLoadEntityContent(ctxt, ent); + } else { + xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, + "not validating will not read content for PE entity %s\n", + ent->name, NULL); + } + } + ctxt->depth++; + rep = xmlStringDecodeEntities(ctxt, ent->content, what, + 0, 0, 0); + ctxt->depth--; + if (rep == NULL) + goto int_error; + current = rep; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) + goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } + xmlFree(rep); + rep = NULL; + } + } else { + COPY_BUF(l,buffer,nbchars,c); + str += l; + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } + if (str < last) + c = CUR_SCHAR(str, l); + else + c = 0; + } + buffer[nbchars] = 0; + return(buffer); + +mem_error: + xmlErrMemory(ctxt, NULL); +int_error: + if (rep != NULL) + xmlFree(rep); + if (buffer != NULL) + xmlFree(buffer); + return(NULL); +} + +/** + * xmlStringDecodeEntities: + * @ctxt: the parser context + * @str: the input string + * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Takes a entity string content and process to do the adequate substitutions. + * + * [67] Reference ::= EntityRef | CharRef + * + * [69] PEReference ::= '%' Name ';' + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, + xmlChar end, xmlChar end2, xmlChar end3) { + if ((ctxt == NULL) || (str == NULL)) return(NULL); + return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, + end, end2, end3)); +} + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ + +/** + * areBlanks: + * @ctxt: an XML parser context + * @str: a xmlChar * + * @len: the size of @str + * @blank_chars: we know the chars are blanks + * + * Is this a sequence of blank chars that one can ignore ? + * + * Returns 1 if ignorable 0 otherwise. + */ + +static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, + int blank_chars) { + int i, ret; + xmlNodePtr lastChild; + + /* + * Don't spend time trying to differentiate them, the same callback is + * used ! + */ + if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) + return(0); + + /* + * Check for xml:space value. + */ + if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || + (*(ctxt->space) == -2)) + return(0); + + /* + * Check that the string is made of blanks + */ + if (blank_chars == 0) { + for (i = 0;i < len;i++) + if (!(IS_BLANK_CH(str[i]))) return(0); + } + + /* + * Look if the element is mixed content in the DTD if available + */ + if (ctxt->node == NULL) return(0); + if (ctxt->myDoc != NULL) { + ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); + if (ret == 0) return(1); + if (ret == 1) return(0); + } + + /* + * Otherwise, heuristic :-\ + */ + if ((RAW != '<') && (RAW != 0xD)) return(0); + if ((ctxt->node->children == NULL) && + (RAW == '<') && (NXT(1) == '/')) return(0); + + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if ((ctxt->node->type != XML_ELEMENT_NODE) && + (ctxt->node->content != NULL)) return(0); + } else if (xmlNodeIsText(lastChild)) + return(0); + else if ((ctxt->node->children != NULL) && + (xmlNodeIsText(ctxt->node->children))) + return(0); + return(1); +} + +/************************************************************************ + * * + * Extra stuff for namespace support * + * Relates to http://www.w3.org/TR/WD-xml-names * + * * + ************************************************************************/ + +/** + * xmlSplitQName: + * @ctxt: an XML parser context + * @name: an XML parser context + * @prefix: a xmlChar ** + * + * parse an UTF8 encoded XML qualified name string + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +xmlChar * +xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + xmlChar *buffer = NULL; + int len = 0; + int max = XML_MAX_NAMELEN; + xmlChar *ret = NULL; + const xmlChar *cur = name; + int c; + + if (prefix == NULL) return(NULL); + *prefix = NULL; + + if (cur == NULL) return(NULL); + +#ifndef XML_XML_NAMESPACE + /* xml: prefix is not really a namespace */ + if ((cur[0] == 'x') && (cur[1] == 'm') && + (cur[2] == 'l') && (cur[3] == ':')) + return(xmlStrdup(name)); +#endif + + /* nasty but well=formed */ + if (cur[0] == ':') + return(xmlStrdup(name)); + + c = *cur++; + while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); + if (buffer == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + memcpy(buffer, buf, len); + while ((c != 0) && (c != ':')) { /* tested bigname.xml */ + if (len + 10 > max) { + xmlChar *tmp; + + max *= 2; + tmp = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (tmp == NULL) { + xmlFree(buffer); + xmlErrMemory(ctxt, NULL); + return(NULL); + } + buffer = tmp; + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if ((c == ':') && (*cur == 0)) { + if (buffer != NULL) + xmlFree(buffer); + *prefix = NULL; + return(xmlStrdup(name)); + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + buffer = NULL; + max = XML_MAX_NAMELEN; + } + + + if (c == ':') { + c = *cur; + *prefix = ret; + if (c == 0) { + return(xmlStrndup(BAD_CAST "", 0)); + } + len = 0; + + /* + * Check that the first character is proper to start + * a new name + */ + if (!(((c >= 0x61) && (c <= 0x7A)) || + ((c >= 0x41) && (c <= 0x5A)) || + (c == '_') || (c == ':'))) { + int l; + int first = CUR_SCHAR(cur, l); + + if (!IS_LETTER(first) && (first != '_')) { + xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, + "Name %s is not XML Namespace compliant\n", + name); + } + } + cur++; + + while ((c != 0) && (len < max)) { /* tested bigname2.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); + if (buffer == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + memcpy(buffer, buf, len); + while (c != 0) { /* tested bigname2.xml */ + if (len + 10 > max) { + xmlChar *tmp; + + max *= 2; + tmp = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(buffer); + return(NULL); + } + buffer = tmp; + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + } + } + + return(ret); +} + +/************************************************************************ + * * + * The parser itself * + * Relates to http://www.w3.org/TR/REC-xml * + * * + ************************************************************************/ + +/************************************************************************ + * * + * Routines to parse Name, NCName and NmToken * + * * + ************************************************************************/ +#ifdef DEBUG +static unsigned long nbParseName = 0; +static unsigned long nbParseNmToken = 0; +static unsigned long nbParseNCName = 0; +static unsigned long nbParseNCNameComplex = 0; +static unsigned long nbParseNameComplex = 0; +static unsigned long nbParseStringName = 0; +#endif + +/* + * The two following functions are related to the change of accepted + * characters for Name and NmToken in the Revision 5 of XML-1.0 + * They correspond to the modified production [4] and the new production [4a] + * changes in that revision. Also note that the macros used for the + * productions Letter, Digit, CombiningChar and Extender are not needed + * anymore. + * We still keep compatibility to pre-revision5 parsing semantic if the + * new XML_PARSE_OLD10 option is given to the parser. + */ +static int +xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if (IS_LETTER(c) || (c == '_') || (c == ':')) + return(1); + } + return(0); +} + +static int +xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) + return(1); + } + return(0); +} + +static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, + int *len, int *alloc, int normalize); + +static const xmlChar * +xmlParseNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + +#ifdef DEBUG + nbParseNameComplex++; +#endif + + /* + * Handler for more complex cases + */ + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!(((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)) + )) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } else { + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + (c != ':'))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } + if (ctxt->input->cur - ctxt->input->base < len) { + /* + * There were a couple of bugs where PERefs lead to to a change + * of the buffer. Check the buffer size to avoid passing an invalid + * pointer to xmlDictLookup. + */ + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, + "unexpected change of input buffer"); + return (NULL); + } + if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); +} + +/** + * xmlParseName: + * @ctxt: an XML parser context + * + * parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (#x20 Name)* + * + * Returns the Name parsed or NULL + */ + +const xmlChar * +xmlParseName(xmlParserCtxtPtr ctxt) { + const xmlChar *in; + const xmlChar *ret; + int count = 0; + + GROW; + +#ifdef DEBUG + nbParseName++; +#endif + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->input->cur; + if (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_') || (*in == ':')) { + in++; + while (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == ':') || (*in == '.')) + in++; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } + ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); + ctxt->input->cur = in; + ctxt->nbChars += count; + ctxt->input->col += count; + if (ret == NULL) + xmlErrMemory(ctxt, NULL); + return(ret); + } + } + /* accelerator for special cases */ + return(xmlParseNameComplex(ctxt)); +} + +static const xmlChar * +xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + size_t startPosition = 0; + +#ifdef DEBUG + nbParseNCNameComplex++; +#endif + + /* + * Handler for more complex cases + */ + GROW; + startPosition = CUR_PTR - BASE_PTR; + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + (xmlIsNameChar(ctxt, c) && (c != ':'))) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + if (c == 0) { + count = 0; + /* + * when shrinking to extend the buffer we really need to preserve + * the part of the name we already parsed. Hence rolling back + * by current length. + */ + ctxt->input->cur -= l; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + ctxt->input->cur += l; + c = CUR_CHAR(l); + } + } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); +} + +/** + * xmlParseNCName: + * @ctxt: an XML parser context + * @len: length of the string parsed + * + * parse an XML name. + * + * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * [5NS] NCName ::= (Letter | '_') (NCNameChar)* + * + * Returns the Name parsed or NULL + */ + +static const xmlChar * +xmlParseNCName(xmlParserCtxtPtr ctxt) { + const xmlChar *in, *e; + const xmlChar *ret; + int count = 0; + +#ifdef DEBUG + nbParseNCName++; +#endif + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->input->cur; + e = ctxt->input->end; + if ((((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_')) && (in < e)) { + in++; + while ((((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == '.')) && (in < e)) + in++; + if (in >= e) + goto complex; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); + ctxt->input->cur = in; + ctxt->nbChars += count; + ctxt->input->col += count; + if (ret == NULL) { + xmlErrMemory(ctxt, NULL); + } + return(ret); + } + } +complex: + return(xmlParseNCNameComplex(ctxt)); +} + +/** + * xmlParseNameAndCompare: + * @ctxt: an XML parser context + * + * parse an XML name and compares for match + * (specialized for endtag parsing) + * + * Returns NULL for an illegal name, (xmlChar*) 1 for success + * and the name for mismatch + */ + +static const xmlChar * +xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { + register const xmlChar *cmp = other; + register const xmlChar *in; + const xmlChar *ret; + + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + + in = ctxt->input->cur; + while (*in != 0 && *in == *cmp) { + ++in; + ++cmp; + ctxt->input->col++; + } + if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { + /* success */ + ctxt->input->cur = in; + return (const xmlChar*) 1; + } + /* failure (or end of input buffer), check with full function */ + ret = xmlParseName (ctxt); + /* strings coming from the dictionary direct compare possible */ + if (ret == other) { + return (const xmlChar*) 1; + } + return ret; +} + +/** + * xmlParseStringName: + * @ctxt: an XML parser context + * @str: a pointer to the string pointer (IN/OUT) + * + * parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (#x20 Name)* + * + * Returns the Name parsed or NULL. The @str pointer + * is updated to the current location in the string. + */ + +static xmlChar * +xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + const xmlChar *cur = *str; + int len = 0, l; + int c; + +#ifdef DEBUG + nbParseStringName++; +#endif + + c = CUR_SCHAR(cur, l); + if (!xmlIsNameStartChar(ctxt, c)) { + return(NULL); + } + + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + while (xmlIsNameChar(ctxt, c)) { + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); + if (buffer == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + memcpy(buffer, buf, len); + while (xmlIsNameChar(ctxt, c)) { + if (len + 10 > max) { + xmlChar *tmp; + + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + xmlFree(buffer); + return(NULL); + } + max *= 2; + tmp = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(buffer); + return(NULL); + } + buffer = tmp; + } + COPY_BUF(l,buffer,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + } + buffer[len] = 0; + *str = cur; + return(buffer); + } + } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } + *str = cur; + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseNmtoken: + * @ctxt: an XML parser context + * + * parse an XML Nmtoken. + * + * [7] Nmtoken ::= (NameChar)+ + * + * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* + * + * Returns the Nmtoken parsed or NULL + */ + +xmlChar * +xmlParseNmtoken(xmlParserCtxtPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + int count = 0; + +#ifdef DEBUG + nbParseNmToken++; +#endif + + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + + while (xmlIsNameChar(ctxt, c)) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } + if (len >= XML_MAX_NAMELEN) { + /* + * Okay someone managed to make a huge token, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); + if (buffer == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + memcpy(buffer, buf, len); + while (xmlIsNameChar(ctxt, c)) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buffer); + return(NULL); + } + } + if (len + 10 > max) { + xmlChar *tmp; + + if ((max > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + xmlFree(buffer); + return(NULL); + } + max *= 2; + tmp = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(buffer); + return(NULL); + } + buffer = tmp; + } + COPY_BUF(l,buffer,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + buffer[len] = 0; + return(buffer); + } + } + if (len == 0) + return(NULL); + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + return(NULL); + } + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseEntityValue: + * @ctxt: an XML parser context + * @orig: if non-NULL store a copy of the original entity value + * + * parse a value for ENTITY declarations + * + * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | + * "'" ([^%&'] | PEReference | Reference)* "'" + * + * Returns the EntityValue parsed with reference substituted or NULL + */ + +xmlChar * +xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int c, l; + xmlChar stop; + xmlChar *ret = NULL; + const xmlChar *cur = NULL; + xmlParserInputPtr input; + + if (RAW == '"') stop = '"'; + else if (RAW == '\'') stop = '\''; + else { + xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); + return(NULL); + } + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + + /* + * The content of the entity definition is copied in a buffer. + */ + + ctxt->instate = XML_PARSER_ENTITY_VALUE; + input = ctxt->input; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + goto error; + NEXT; + c = CUR_CHAR(l); + /* + * NOTE: 4.4.5 Included in Literal + * When a parameter entity reference appears in a literal entity + * value, ... a single or double quote character in the replacement + * text is always treated as a normal data character and will not + * terminate the literal. + * In practice it means we stop the loop only when back at parsing + * the initial entity and the quote is found + */ + while (((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { + if (len + 5 >= size) { + xmlChar *tmp; + + size *= 2; + tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + goto error; + } + buf = tmp; + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + + GROW; + c = CUR_CHAR(l); + if (c == 0) { + GROW; + c = CUR_CHAR(l); + } + } + buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) + goto error; + if (c != stop) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); + goto error; + } + NEXT; + + /* + * Raise problem w.r.t. '&' and '%' being used in non-entities + * reference constructs. Note Charref will be handled in + * xmlStringDecodeEntities() + */ + cur = buf; + while (*cur != 0) { /* non input consuming */ + if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { + xmlChar *name; + xmlChar tmp = *cur; + int nameOk = 0; + + cur++; + name = xmlParseStringName(ctxt, &cur); + if (name != NULL) { + nameOk = 1; + xmlFree(name); + } + if ((nameOk == 0) || (*cur != ';')) { + xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, + "EntityValue: '%c' forbidden except for entities references\n", + tmp); + goto error; + } + if ((tmp == '%') && (ctxt->inSubset == 1) && + (ctxt->inputNr == 1)) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); + goto error; + } + if (*cur == 0) + break; + } + cur++; + } + + /* + * Then PEReference entities are substituted. + * + * NOTE: 4.4.7 Bypassed + * When a general entity reference appears in the EntityValue in + * an entity declaration, it is bypassed and left as is. + * so XML_SUBSTITUTE_REF is not set here. + */ + ++ctxt->depth; + ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, + 0, 0, 0); + --ctxt->depth; + if (orig != NULL) { + *orig = buf; + buf = NULL; + } + +error: + if (buf != NULL) + xmlFree(buf); + return(ret); +} + +/** + * xmlParseAttValueComplex: + * @ctxt: an XML parser context + * @len: the resulting attribute len + * @normalize: whether to apply the inner normalization + * + * parse a value for an attribute, this is the fallback function + * of xmlParseAttValue() when the attribute parsing requires handling + * of non-ASCII characters, or normalization compaction. + * + * Returns the AttValue parsed or NULL. The value has to be freed by the caller. + */ +static xmlChar * +xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { + xmlChar limit = 0; + xmlChar *buf = NULL; + xmlChar *rep = NULL; + size_t len = 0; + size_t buf_size = 0; + int c, l, in_space = 0; + xmlChar *current = NULL; + xmlEntityPtr ent; + + if (NXT(0) == '"') { + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + limit = '"'; + NEXT; + } else if (NXT(0) == '\'') { + limit = '\''; + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + NEXT; + } else { + xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buf_size = XML_PARSER_BUFFER_SIZE; + buf = (xmlChar *) xmlMallocAtomic(buf_size); + if (buf == NULL) goto mem_error; + + /* + * OK loop until we reach one of the ending char or a size limit. + */ + c = CUR_CHAR(l); + while (((NXT(0) != limit) && /* checked */ + (IS_CHAR(c)) && (c != '<')) && + (ctxt->instate != XML_PARSER_EOF)) { + /* + * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE + * special option is given + */ + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } + if (c == 0) break; + if (c == '&') { + in_space = 0; + if (NXT(1) == '#') { + int val = xmlParseCharRef(ctxt); + + if (val == '&') { + if (ctxt->replaceEntities) { + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + buf[len++] = '&'; + } else { + /* + * The reparsing will be done in xmlStringGetNodeList() + * called by the attribute() function in SAX.c + */ + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + buf[len++] = '&'; + buf[len++] = '#'; + buf[len++] = '3'; + buf[len++] = '8'; + buf[len++] = ';'; + } + } else if (val != 0) { + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + len += xmlCopyChar(0, &buf[len], val); + } + } else { + ent = xmlParseEntityRef(ctxt); + ctxt->nbentities++; + if (ent != NULL) + ctxt->nbentities += ent->owner; + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + if ((ctxt->replaceEntities == 0) && + (ent->content[0] == '&')) { + buf[len++] = '&'; + buf[len++] = '#'; + buf[len++] = '3'; + buf[len++] = '8'; + buf[len++] = ';'; + } else { + buf[len++] = ent->content[0]; + } + } else if ((ent != NULL) && + (ctxt->replaceEntities != 0)) { + if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { + ++ctxt->depth; + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, + 0, 0, 0); + --ctxt->depth; + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming */ + if ((*current == 0xD) || (*current == 0xA) || + (*current == 0x9)) { + buf[len++] = 0x20; + current++; + } else + buf[len++] = *current++; + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + } + xmlFree(rep); + rep = NULL; + } + } else { + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + if (ent->content != NULL) + buf[len++] = ent->content[0]; + } + } else if (ent != NULL) { + int i = xmlStrlen(ent->name); + const xmlChar *cur = ent->name; + + /* + * This may look absurd but is needed to detect + * entities problems + */ + if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && + (ent->content != NULL) && (ent->checked == 0)) { + unsigned long oldnbent = ctxt->nbentities, diff; + + ++ctxt->depth; + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + --ctxt->depth; + + diff = ctxt->nbentities - oldnbent + 1; + if (diff > INT_MAX / 2) + diff = INT_MAX / 2; + ent->checked = diff * 2; + if (rep != NULL) { + if (xmlStrchr(rep, '<')) + ent->checked |= 1; + xmlFree(rep); + rep = NULL; + } else { + ent->content[0] = 0; + } + } + + /* + * Just output the reference + */ + buf[len++] = '&'; + while (len + i + 10 > buf_size) { + growBuffer(buf, i + 10); + } + for (;i > 0;i--) + buf[len++] = *cur++; + buf[len++] = ';'; + } + } + } else { + if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { + if ((len != 0) || (!normalize)) { + if ((!normalize) || (!in_space)) { + COPY_BUF(l,buf,len,0x20); + while (len + 10 > buf_size) { + growBuffer(buf, 10); + } + } + in_space = 1; + } + } else { + in_space = 0; + COPY_BUF(l,buf,len,c); + if (len + 10 > buf_size) { + growBuffer(buf, 10); + } + } + NEXTL(l); + } + GROW; + c = CUR_CHAR(l); + } + if (ctxt->instate == XML_PARSER_EOF) + goto error; + + if ((in_space) && (normalize)) { + while ((len > 0) && (buf[len - 1] == 0x20)) len--; + } + buf[len] = 0; + if (RAW == '<') { + xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); + } else if (RAW != limit) { + if ((c != 0) && (!IS_CHAR(c))) { + xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, + "invalid character in attribute value\n"); + } else { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue: ' expected\n"); + } + } else + NEXT; + + /* + * There we potentially risk an overflow, don't allow attribute value of + * length more than INT_MAX it is a very reasonable assumption ! + */ + if (len >= INT_MAX) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } + + if (attlen != NULL) *attlen = (int) len; + return(buf); + +mem_error: + xmlErrMemory(ctxt, NULL); +error: + if (buf != NULL) + xmlFree(buf); + if (rep != NULL) + xmlFree(rep); + return(NULL); +} + +/** + * xmlParseAttValue: + * @ctxt: an XML parser context + * + * parse a value for an attribute + * Note: the parser won't do substitution of entities here, this + * will be handled later in xmlStringGetNodeList + * + * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | + * "'" ([^<&'] | Reference)* "'" + * + * 3.3.3 Attribute-Value Normalization: + * Before the value of an attribute is passed to the application or + * checked for validity, the XML processor must normalize it as follows: + * - a character reference is processed by appending the referenced + * character to the attribute value + * - an entity reference is processed by recursively processing the + * replacement text of the entity + * - a whitespace character (#x20, #xD, #xA, #x9) is processed by + * appending #x20 to the normalized value, except that only a single + * #x20 is appended for a "#xD#xA" sequence that is part of an external + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by a single space (#x20) character. + * All attributes for which no declaration has been read should be treated + * by a non-validating parser as if declared CDATA. + * + * Returns the AttValue parsed or NULL. The value has to be freed by the caller. + */ + + +xmlChar * +xmlParseAttValue(xmlParserCtxtPtr ctxt) { + if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); + return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); +} + +/** + * xmlParseSystemLiteral: + * @ctxt: an XML parser context + * + * parse an XML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + * + * Returns the SystemLiteral parsed or NULL + */ + +xmlChar * +xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int cur, l; + xmlChar stop; + int state = ctxt->instate; + int count = 0; + + SHRINK; + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); + return(NULL); + } + + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + ctxt->instate = XML_PARSER_SYSTEM_LITERAL; + cur = CUR_CHAR(l); + while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ + if (len + 5 >= size) { + xmlChar *tmp; + + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); + xmlFree(buf); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } + size *= 2; + tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (tmp == NULL) { + xmlFree(buf); + xmlErrMemory(ctxt, NULL); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } + buf = tmp; + } + count++; + if (count > 50) { + GROW; + count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } + } + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + GROW; + SHRINK; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + ctxt->instate = (xmlParserInputState) state; + if (!IS_CHAR(cur)) { + xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); + } else { + NEXT; + } + return(buf); +} + +/** + * xmlParsePubidLiteral: + * @ctxt: an XML parser context + * + * parse an XML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * + * Returns the PubidLiteral parsed or NULL. + */ + +xmlChar * +xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + xmlChar cur; + xmlChar stop; + int count = 0; + xmlParserInputState oldstate = ctxt->instate; + + SHRINK; + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); + return(NULL); + } + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + ctxt->instate = XML_PARSER_PUBLIC_LITERAL; + cur = CUR; + while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ + if (len + 1 >= size) { + xmlChar *tmp; + + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); + xmlFree(buf); + return(NULL); + } + size *= 2; + tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(buf); + return(NULL); + } + buf = tmp; + } + buf[len++] = cur; + count++; + if (count > 50) { + GROW; + count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } + } + NEXT; + cur = CUR; + if (cur == 0) { + GROW; + SHRINK; + cur = CUR; + } + } + buf[len] = 0; + if (cur != stop) { + xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); + } else { + NEXT; + } + ctxt->instate = oldstate; + return(buf); +} + +static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); + +/* + * used for the test in the inner loop of the char data testing + */ +static const unsigned char test_char_data[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/** + * xmlParseCharData: + * @ctxt: an XML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * The right angle bracket (>) may be represented using the string ">", + * and must, for compatibility, be escaped using ">" or a character + * reference when it appears in the string "]]>" in content, when that + * string is not marking the end of a CDATA section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +void +xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { + const xmlChar *in; + int nbchar = 0; + int line = ctxt->input->line; + int col = ctxt->input->col; + int ccol; + + SHRINK; + GROW; + /* + * Accelerated common case where input don't need to be + * modified before passing it to the handler. + */ + if (!cdata) { + in = ctxt->input->cur; + do { +get_more_space: + while (*in == 0x20) { in++; ctxt->input->col++; } + if (*in == 0xA) { + do { + ctxt->input->line++; ctxt->input->col = 1; + in++; + } while (*in == 0xA); + goto get_more_space; + } + if (*in == '<') { + nbchar = in - ctxt->input->cur; + if (nbchar > 0) { + const xmlChar *tmp = ctxt->input->cur; + ctxt->input->cur = in; + + if ((ctxt->sax != NULL) && + (ctxt->sax->ignorableWhitespace != + ctxt->sax->characters)) { + if (areBlanks(ctxt, tmp, nbchar, 1)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + tmp, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, + tmp, nbchar); + if (*ctxt->space == -1) + *ctxt->space = -2; + } + } else if ((ctxt->sax != NULL) && + (ctxt->sax->characters != NULL)) { + ctxt->sax->characters(ctxt->userData, + tmp, nbchar); + } + } + return; + } + +get_more: + ccol = ctxt->input->col; + while (test_char_data[*in]) { + in++; + ccol++; + } + ctxt->input->col = ccol; + if (*in == 0xA) { + do { + ctxt->input->line++; ctxt->input->col = 1; + in++; + } while (*in == 0xA); + goto get_more; + } + if (*in == ']') { + if ((in[1] == ']') && (in[2] == '>')) { + xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); + ctxt->input->cur = in + 1; + return; + } + in++; + ctxt->input->col++; + goto get_more; + } + nbchar = in - ctxt->input->cur; + if (nbchar > 0) { + if ((ctxt->sax != NULL) && + (ctxt->sax->ignorableWhitespace != + ctxt->sax->characters) && + (IS_BLANK_CH(*ctxt->input->cur))) { + const xmlChar *tmp = ctxt->input->cur; + ctxt->input->cur = in; + + if (areBlanks(ctxt, tmp, nbchar, 0)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + tmp, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, + tmp, nbchar); + if (*ctxt->space == -1) + *ctxt->space = -2; + } + line = ctxt->input->line; + col = ctxt->input->col; + } else if (ctxt->sax != NULL) { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, + ctxt->input->cur, nbchar); + line = ctxt->input->line; + col = ctxt->input->col; + } + /* something really bad happened in the SAX callback */ + if (ctxt->instate != XML_PARSER_CONTENT) + return; + } + ctxt->input->cur = in; + if (*in == 0xD) { + in++; + if (*in == 0xA) { + ctxt->input->cur = in; + in++; + ctxt->input->line++; ctxt->input->col = 1; + continue; /* while */ + } + in--; + } + if (*in == '<') { + return; + } + if (*in == '&') { + return; + } + SHRINK; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; + in = ctxt->input->cur; + } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); + nbchar = 0; + } + ctxt->input->line = line; + ctxt->input->col = col; + xmlParseCharDataComplex(ctxt, cdata); +} + +/** + * xmlParseCharDataComplex: + * @ctxt: an XML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section.this is the fallback function + * of xmlParseCharData() when the parsing requires handling + * of non-ASCII characters. + */ +static void +xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { + xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; + int nbchar = 0; + int cur, l; + int count = 0; + + SHRINK; + GROW; + cur = CUR_CHAR(l); + while ((cur != '<') && /* checked */ + (cur != '&') && + (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { + if ((cur == ']') && (NXT(1) == ']') && + (NXT(2) == '>')) { + if (cdata) break; + else { + xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); + } + } + COPY_BUF(l,buf,nbchar,cur); + if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { + buf[nbchar] = 0; + + /* + * OK the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar, 0)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + if ((ctxt->sax->characters != + ctxt->sax->ignorableWhitespace) && + (*ctxt->space == -1)) + *ctxt->space = -2; + } + } + nbchar = 0; + /* something really bad happened in the SAX callback */ + if (ctxt->instate != XML_PARSER_CONTENT) + return; + } + count++; + if (count > 50) { + GROW; + count = 0; + if (ctxt->instate == XML_PARSER_EOF) + return; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + if (nbchar != 0) { + buf[nbchar] = 0; + /* + * OK the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar, 0)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && + (*ctxt->space == -1)) + *ctxt->space = -2; + } + } + } + if ((cur != 0) && (!IS_CHAR(cur))) { + /* Generate the error and skip the offending character */ + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "PCDATA invalid Char value %d\n", + cur); + NEXTL(l); + } +} + +/** + * xmlParseExternalID: + * @ctxt: an XML parser context + * @publicID: a xmlChar** receiving PubidLiteral + * @strict: indicate whether we should restrict parsing to only + * production [75], see NOTE below + * + * Parse an External ID or a Public ID + * + * NOTE: Productions [75] and [83] interact badly since [75] can generate + * 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [83] PublicID ::= 'PUBLIC' S PubidLiteral + * + * Returns the function returns SystemLiteral and in the second + * case publicID receives PubidLiteral, is strict is off + * it is possible to return NULL and have publicID set. + */ + +xmlChar * +xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { + xmlChar *URI = NULL; + + SHRINK; + + *publicID = NULL; + if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { + SKIP(6); + if (SKIP_BLANKS == 0) { + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, + "Space required after 'SYSTEM'\n"); + } + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) { + xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); + } + } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { + SKIP(6); + if (SKIP_BLANKS == 0) { + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, + "Space required after 'PUBLIC'\n"); + } + *publicID = xmlParsePubidLiteral(ctxt); + if (*publicID == NULL) { + xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); + } + if (strict) { + /* + * We don't handle [83] so "S SystemLiteral" is required. + */ + if (SKIP_BLANKS == 0) { + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, + "Space required after the Public Identifier\n"); + } + } else { + /* + * We handle [83] so we return immediately, if + * "S SystemLiteral" is not detected. We skip blanks if no + * system literal was found, but this is harmless since we must + * be at the end of a NotationDecl. + */ + if (SKIP_BLANKS == 0) return(NULL); + if ((CUR != '\'') && (CUR != '"')) return(NULL); + } + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) { + xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); + } + } + return(URI); +} + +/** + * xmlParseCommentComplex: + * @ctxt: an XML parser context + * @buf: the already parsed part of the buffer + * @len: number of bytes in the buffer + * @size: allocated size of the buffer + * + * Skip an XML (SGML) comment + * The spec says that "For compatibility, the string "--" (double-hyphen) + * must not occur within comments. " + * This is the slow routine in case the accelerator for ascii didn't work + * + * [15] Comment ::= '' + */ +static void +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, + size_t len, size_t size) { + int q, ql; + int r, rl; + int cur, l; + size_t count = 0; + int inputid; + + inputid = ctxt->input->id; + + if (buf == NULL) { + len = 0; + size = XML_PARSER_BUFFER_SIZE; + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + return; + } + } + GROW; /* Assure there's enough input data */ + q = CUR_CHAR(ql); + if (q == 0) + goto not_terminated; + if (!IS_CHAR(q)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseComment: invalid xmlChar value %d\n", + q); + xmlFree (buf); + return; + } + NEXTL(ql); + r = CUR_CHAR(rl); + if (r == 0) + goto not_terminated; + if (!IS_CHAR(r)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "xmlParseComment: invalid xmlChar value %d\n", + q); + xmlFree (buf); + return; + } + NEXTL(rl); + cur = CUR_CHAR(l); + if (cur == 0) + goto not_terminated; + while (IS_CHAR(cur) && /* checked */ + ((cur != '>') || + (r != '-') || (q != '-'))) { + if ((r == '-') && (q == '-')) { + xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); + } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } + if (len + 5 >= size) { + xmlChar *new_buf; + size_t new_size; + + new_size = size * 2; + new_buf = (xmlChar *) xmlRealloc(buf, new_size); + if (new_buf == NULL) { + xmlFree (buf); + xmlErrMemory(ctxt, NULL); + return; + } + buf = new_buf; + size = new_size; + } + COPY_BUF(ql,buf,len,q); + q = r; + ql = rl; + r = cur; + rl = l; + + count++; + if (count > 50) { + GROW; + count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } + } + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (cur == 0) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment not terminated \n + * The spec says that "For compatibility, the string "--" (double-hyphen) + * must not occur within comments. " + * + * [15] Comment ::= '' + */ +void +xmlParseComment(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + size_t size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; + xmlParserInputState state; + const xmlChar *in; + size_t nbchar = 0; + int ccol; + int inputid; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + inputid = ctxt->input->id; + SKIP(4); + SHRINK; + GROW; + + /* + * Accelerated common case where input don't need to be + * modified before passing it to the handler. + */ + in = ctxt->input->cur; + do { + if (*in == 0xA) { + do { + ctxt->input->line++; ctxt->input->col = 1; + in++; + } while (*in == 0xA); + } +get_more: + ccol = ctxt->input->col; + while (((*in > '-') && (*in <= 0x7F)) || + ((*in >= 0x20) && (*in < '-')) || + (*in == 0x09)) { + in++; + ccol++; + } + ctxt->input->col = ccol; + if (*in == 0xA) { + do { + ctxt->input->line++; ctxt->input->col = 1; + in++; + } while (*in == 0xA); + goto get_more; + } + nbchar = in - ctxt->input->cur; + /* + * save current set of data + */ + if (nbchar > 0) { + if ((ctxt->sax != NULL) && + (ctxt->sax->comment != NULL)) { + if (buf == NULL) { + if ((*in == '-') && (in[1] == '-')) + size = nbchar + 1; + else + size = XML_PARSER_BUFFER_SIZE + nbchar; + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + len = 0; + } else if (len + nbchar + 1 >= size) { + xmlChar *new_buf; + size += len + nbchar + XML_PARSER_BUFFER_SIZE; + new_buf = (xmlChar *) xmlRealloc(buf, + size * sizeof(xmlChar)); + if (new_buf == NULL) { + xmlFree (buf); + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + buf = new_buf; + } + memcpy(&buf[len], ctxt->input->cur, nbchar); + len += nbchar; + buf[len] = 0; + } + } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } + ctxt->input->cur = in; + if (*in == 0xA) { + in++; + ctxt->input->line++; ctxt->input->col = 1; + } + if (*in == 0xD) { + in++; + if (*in == 0xA) { + ctxt->input->cur = in; + in++; + ctxt->input->line++; ctxt->input->col = 1; + continue; /* while */ + } + in--; + } + SHRINK; + GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } + in = ctxt->input->cur; + if (*in == '-') { + if (in[1] == '-') { + if (in[2] == '>') { + if (ctxt->input->id != inputid) { + xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, + "comment doesn't start and stop in the" + " same entity\n"); + } + SKIP(3); + if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && + (!ctxt->disableSAX)) { + if (buf != NULL) + ctxt->sax->comment(ctxt->userData, buf); + else + ctxt->sax->comment(ctxt->userData, BAD_CAST ""); + } + if (buf != NULL) + xmlFree(buf); + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; + return; + } + if (buf != NULL) { + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, + "Double hyphen within comment: " + "