From 35a201cc8ef0c3f5b2df88d2e528aabee1048348 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 30 Apr 2021 18:47:09 +0200 Subject: Initial/Final commit --- libxml2-2.9.10/os400/libxmlrpg/HTMLparser.rpgle | 419 ++++++++++++++++++++++++ 1 file changed, 419 insertions(+) create mode 100644 libxml2-2.9.10/os400/libxmlrpg/HTMLparser.rpgle (limited to 'libxml2-2.9.10/os400/libxmlrpg/HTMLparser.rpgle') diff --git a/libxml2-2.9.10/os400/libxmlrpg/HTMLparser.rpgle b/libxml2-2.9.10/os400/libxmlrpg/HTMLparser.rpgle new file mode 100644 index 0000000..c27d8e5 --- /dev/null +++ b/libxml2-2.9.10/os400/libxmlrpg/HTMLparser.rpgle @@ -0,0 +1,419 @@ + * Summary: interface for an HTML 4.0 non-verifying parser + * Description: this module implements an HTML 4.0 non-verifying parser + * with API compatible with the XML parser ones. It should + * be able to parse "real world" HTML, even if severely + * broken from a specification point of view. + * + * Copy: See Copyright for the status of this software. + * + * Author: Patrick Monnerat , DATASPHERE S.A. + + /if not defined(HTML_PARSER_H__) + /define HTML_PARSER_H__ + + /include "libxmlrpg/xmlversion" + + /if defined(LIBXML_HTML_ENABLED) + + /include "libxmlrpg/xmlTypesC" + /include "libxmlrpg/parser" + + * Most of the back-end structures from XML and HTML are shared. + + d htmlParserCtxtPtr... + d s based(######typedef######) + d like(xmlParserCtxtPtr) + + d htmlParserCtxt ds based(htmlParserCtxtPtr) + d likeds(xmlParserCtxt) + + d htmlParserNodeInfoPtr... + d s based(######typedef######) + d like(xmlParserNodeInfoPtr) + + d htmlParserNodeInfo... + d ds based(htmlParserNodeInfoPtr) + d likeds(xmlParserNodeInfo) + + d htmlSAXHandlerPtr... + d s based(######typedef######) + d like(xmlSAXHandlerPtr) + + d htmlSAXHandler ds based(htmlSAXHandlerPtr) + d likeds(xmlSAXHandler) + + d htmlParserInputPtr... + d s based(######typedef######) + d like(xmlParserInputPtr) + + d htmlParserInput... + d ds based(htmlParserInputPtr) + d likeds(xmlParserInput) + + d htmlDocPtr s based(######typedef######) + d like(xmlDocPtr) + + d htmlNodePtr s based(######typedef######) + d like(xmlNodePtr) + + * Internal description of an HTML element, representing HTML 4.01 + * and XHTML 1.0 (which share the same structure). + + d htmlElemDescPtr... + d s * based(######typedef######) + + d htmlElemDesc ds based(htmlElemDescPtr) + d align qualified + d name * const char * + d startTag like(xmlCchar) Start tag implied ? + d endTag like(xmlCchar) End tag implied ? + d saveEndTag like(xmlCchar) Save end tag ? + d empty like(xmlCchar) Empty element ? + d depr like(xmlCchar) Deprecated element ? + d dtd like(xmlCchar) Loose DTD/Frameset + d isinline like(xmlCchar) Block 0/inline elem? + d desc * const char * + * + * New fields encapsulating HTML structure + * + * Bugs: + * This is a very limited representation. It fails to tell us when + * an element *requires* subelements (we only have whether they're + * allowed or not), and it doesn't tell us where CDATA and PCDATA + * are allowed. Some element relationships are not fully represented: + * these are flagged with the word MODIFIER + * + d subelts * const char * * + d defaultsubelt * const char * + d attrs_opt * const char * * + d attrs_depr * const char * * + d attrs_req * const char * * + + * Internal description of an HTML entity. + + d htmlEntityDescPtr... + d s * based(######typedef######) + + d htmlEntityDesc... + d ds based(htmlEntityDescPtr) + d align qualified + d value like(xmlCuint) + d name * const char * + d desc * const char * + + * There is only few public functions. + + d htmlTagLookup pr extproc('htmlTagLookup') + d like(htmlElemDescPtr) const + d tag * value options(*string) const xmlChar * + + d htmlEntityLookup... + d pr extproc('htmlEntityLookup') + d like(htmlEntityDescPtr) const + d name * value options(*string) const xmlChar * + + d htmlEntityValueLookup... + d pr extproc('htmlEntityValueLookup') + d like(htmlEntityDescPtr) const + d value value like(xmlCuint) + + d htmlIsAutoClosed... + d pr extproc('htmlIsAutoClosed') + d like(xmlCint) + d doc value like(htmlDocPtr) + d elem value like(htmlNodePtr) + + d htmlAutoCloseTag... + d pr extproc('htmlAutoCloseTag') + d like(xmlCint) + d doc value like(htmlDocPtr) + d name * value options(*string) const xmlChar * + d elem value like(htmlNodePtr) + + d htmlParseEntityRef... + d pr extproc('htmlParseEntityRef') + d like(htmlEntityDescPtr) const + d ctxt value like(htmlParserCtxtPtr) + d str * const xmlChar *(*) + + d htmlParseCharRef... + d pr extproc('htmlParseCharRef') + d like(xmlCint) + d ctxt value like(htmlParserCtxtPtr) + + d htmlParseElement... + d pr extproc('htmlParseElement') + d ctxt value like(htmlParserCtxtPtr) + + d htmlNewParserCtxt... + d pr extproc('htmlNewParserCtxt') + d like(htmlParserCtxtPtr) + + d htmlCreateMemoryParserCtxt... + d pr extproc('htmlCreateMemoryParserCtxt') + d like(htmlParserCtxtPtr) + d buffer * value options(*string) const char * + d size value like(xmlCint) + + d htmlParseDocument... + d pr extproc('htmlParseDocument') + d like(xmlCint) + d ctxt value like(htmlParserCtxtPtr) + + d htmlSAXParseDoc... + d pr extproc('htmlSAXParseDoc') + d like(htmlDocPtr) + d cur * value options(*string) xmlChar * + d encoding * value options(*string) const char * + d sax value like(htmlSAXHandlerPtr) + d userData * value void * + + d htmlParseDoc pr extproc('htmlParseDoc') + d like(htmlDocPtr) + d cur * value options(*string) xmlChar * + d encoding * value options(*string) const char * + + d htmlSAXParseFile... + d pr extproc('htmlSAXParseFile') + d like(htmlDocPtr) + d filename * value options(*string) const char * + d encoding * value options(*string) const char * + d sax value like(htmlSAXHandlerPtr) + d userData * value void * + + d htmlParseFile pr extproc('htmlParseFile') + d like(htmlDocPtr) + d filename * value options(*string) const char * + d encoding * value options(*string) const char * + + d UTF8ToHtml pr extproc('UTF8ToHtml') + d like(xmlCint) + d out 65535 options(*varsize) unsigned char [] + d outlen like(xmlCint) + d in * value options(*string) const unsigned char* + d inlen like(xmlCint) + + d htmlEncodeEntities... + d pr extproc('htmlEncodeEntities') + d like(xmlCint) + d out 65535 options(*varsize) unsigned char [] + d outlen like(xmlCint) + d in * value options(*string) const unsigned char* + d inlen like(xmlCint) + d quoteChar value like(xmlCint) + + d htmlIsScriptAttribute... + d pr extproc('htmlIsScriptAttribute') + d like(xmlCint) + d name * value options(*string) const xmlChar * + + d htmlHandleOmittedElem... + d pr extproc('htmlHandleOmittedElem') + d like(xmlCint) + d val value like(xmlCint) + + /if defined(LIBXML_PUSH_ENABLED) + + * Interfaces for the Push mode. + + d htmlCreatePushParserCtxt... + d pr extproc('htmlCreatePushParserCtxt') + d like(htmlParserCtxtPtr) + d sax value like(htmlSAXHandlerPtr) + d user_data * value void * + d chunk * value options(*string) const char * + d size value like(xmlCint) + d filename * value options(*string) const char * + d enc value like(xmlCharEncoding) + + d htmlParseChunk pr extproc('htmlParseChunk') + d like(xmlCint) + d ctxt value like(htmlParserCtxtPtr) + d chunk * value options(*string) const char * + d size value like(xmlCint) + d terminate value like(xmlCint) + /endif LIBXML_PUSH_ENABLED + + d htmlFreeParserCtxt... + d pr extproc('htmlFreeParserCtxt') + d ctxt value like(htmlParserCtxtPtr) + + * New set of simpler/more flexible APIs + + * xmlParserOption: + * + * This is the set of XML parser options that can be passed down + * to the xmlReadDoc() and similar calls. + + d htmlParserOption... + d s based(######typedef######) + d like(xmlCenum) + d HTML_PARSE_RECOVER... Relaxed parsing + d c X'00000001' + d HTML_PARSE_NODEFDTD... No default doctype + d c X'00000004' + d HTML_PARSE_NOERROR... No error reports + d c X'00000020' + d HTML_PARSE_NOWARNING... No warning reports + d c X'00000040' + d HTML_PARSE_PEDANTIC... Pedantic err reports + d c X'00000080' + d HTML_PARSE_NOBLANKS... Remove blank nodes + d c X'00000100' + d HTML_PARSE_NONET... Forbid net access + d c X'00000800' + d HTML_PARSE_NOIMPLIED... No implied html/body + d c X'00002000' + d HTML_PARSE_COMPACT... compact small txtnod + d c X'00010000' + d HTML_PARSE_IGNORE_ENC... Ignore encoding hint + d c X'00200000' + + d htmlCtxtReset pr extproc('htmlCtxtReset') + d ctxt value like(htmlParserCtxtPtr) + + d htmlCtxtUseOptions... + d pr extproc('htmlCtxtUseOptions') + d like(xmlCint) + d ctxt value like(htmlParserCtxtPtr) + d options value like(xmlCint) + + d htmlReadDoc pr extproc('htmlReadDoc') + d like(htmlDocPtr) + d cur * value options(*string) const xmlChar * + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlReadFile pr extproc('htmlReadFile') + d like(htmlDocPtr) + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlReadMemory pr extproc('htmlReadMemory') + d like(htmlDocPtr) + d buffer * value options(*string) const char * + d size value like(xmlCint) + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlReadFd pr extproc('htmlReadFd') + d like(htmlDocPtr) + d fd value like(xmlCint) + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlReadIO pr extproc('htmlReadIO') + d like(htmlDocPtr) + d ioread value like(xmlInputReadCallback) + d ioclose value like(xmlInputCloseCallback) + d ioctx * value void * + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlCtxtReadDoc... + d pr extproc('htmlCtxtReadDoc') + d like(htmlDocPtr) + d ctxt value like(xmlParserCtxtPtr) + d cur * value options(*string) const xmlChar * + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlCtxtReadFile... + d pr extproc('htmlCtxtReadFile') + d like(htmlDocPtr) + d ctxt value like(xmlParserCtxtPtr) + d filename * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlCtxtReadMemory... + d pr extproc('htmlCtxtReadMemory') + d like(htmlDocPtr) + d ctxt value like(xmlParserCtxtPtr) + d buffer * value options(*string) const char * + d size value like(xmlCint) + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlCtxtReadFd pr extproc('htmlCtxtReadFd') + d like(htmlDocPtr) + d ctxt value like(xmlParserCtxtPtr) + d fd value like(xmlCint) + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + d htmlCtxtReadIO pr extproc('htmlCtxtReadIO') + d like(htmlDocPtr) + d ctxt value like(xmlParserCtxtPtr) + d ioread value like(xmlInputReadCallback) + d ioclose value like(xmlInputCloseCallback) + d ioctx * value void * + d URL * value options(*string) const char * + d encoding * value options(*string) const char * + d options value like(xmlCint) + + * Further knowledge of HTML structure + + d htmlStatus s based(######typedef######) + d like(xmlCenum) + d HTML_NA c X'0000' No check at all + d HTML_INVALID c X'0001' + d HTML_DEPRECATED... + d c X'0002' + d HTML_VALID c X'0004' + d HTML_REQUIRED c X'000C' HTML_VALID ored-in + + * Using htmlElemDesc rather than name here, to emphasise the fact + * that otherwise there's a lookup overhead + + d htmlAttrAllowed... + d pr extproc('htmlAttrAllowed') + d like(htmlStatus) + d #param1 value like(htmlElemDescPtr) const + d #param2 * value options(*string) const xmlChar * + d #param3 value like(xmlCint) + + d htmlElementAllowedHere... + d pr extproc('htmlElementAllowedHere') + d like(xmlCint) + d #param1 value like(htmlElemDescPtr) const + d #param2 * value options(*string) const xmlChar * + + d htmlElementStatusHere... + d pr extproc('htmlElementStatusHere') + d like(htmlStatus) + d #param1 value like(htmlElemDescPtr) const + d #param2 value like(htmlElemDescPtr) const + + d htmlNodeStatus pr extproc('htmlNodeStatus') + d like(htmlStatus) + d #param1 value like(htmlNodePtr) + d #param2 value like(xmlCint) + + * C macros implemented as procedures for ILE/RPG support. + + d htmlDefaultSubelement... + d pr * extproc('__htmlDefaultSubelement') const char * + d elt * value const htmlElemDesc * + + d htmlElementAllowedHereDesc... + d pr extproc( + d '__htmlElementAllowedHereDesc') + d like(xmlCint) + d parent * value const htmlElemDesc * + d elt * value const htmlElemDesc * + + d htmlRequiredAttrs... + d pr * extproc('__htmlRequiredAttrs') const char * * + d elt * value const htmlElemDesc * + + /endif LIBXML_HTML_ENABLED + /endif HTML_PARSER_H__ -- cgit v1.2.3