path: root/libxml2-2.9.10/python/libxml.py
diff options
authorWojtek Kosior <wk@koszkonutek-tmp.pl.eu.org>2021-04-30 18:47:09 +0200
committerWojtek Kosior <wk@koszkonutek-tmp.pl.eu.org>2021-04-30 18:47:09 +0200
commit35a201cc8ef0c3f5b2df88d2e528aabee1048348 (patch)
tree902dae955480e19f4498dbe4964619fc91d09b06 /libxml2-2.9.10/python/libxml.py
Initial/Final commitHEADmaster
Diffstat (limited to 'libxml2-2.9.10/python/libxml.py')
1 files changed, 788 insertions, 0 deletions
diff --git a/libxml2-2.9.10/python/libxml.py b/libxml2-2.9.10/python/libxml.py
new file mode 100644
index 0000000..2466cc9
--- /dev/null
+++ b/libxml2-2.9.10/python/libxml.py
@@ -0,0 +1,788 @@
+import libxml2mod
+import types
+import sys
+# The root of all libxml2 errors.
+class libxmlError(Exception): pass
+# Type of the wrapper class for the C objects wrappers
+def checkWrapper(obj):
+ try:
+ n = type(_obj).__name__
+ if n != 'PyCObject' and n != 'PyCapsule':
+ return 1
+ except:
+ return 0
+ return 0
+# id() is sometimes negative ...
+def pos_id(o):
+ i = id(o)
+ if (i < 0):
+ return (sys.maxsize - i)
+ return i
+# Errors raised by the wrappers when some tree handling failed.
+class treeError(libxmlError):
+ def __init__(self, msg):
+ self.msg = msg
+ def __str__(self):
+ return self.msg
+class parserError(libxmlError):
+ def __init__(self, msg):
+ self.msg = msg
+ def __str__(self):
+ return self.msg
+class uriError(libxmlError):
+ def __init__(self, msg):
+ self.msg = msg
+ def __str__(self):
+ return self.msg
+class xpathError(libxmlError):
+ def __init__(self, msg):
+ self.msg = msg
+ def __str__(self):
+ return self.msg
+class ioWrapper:
+ def __init__(self, _obj):
+ self.__io = _obj
+ self._o = None
+ def io_close(self):
+ if self.__io == None:
+ return(-1)
+ self.__io.close()
+ self.__io = None
+ return(0)
+ def io_flush(self):
+ if self.__io == None:
+ return(-1)
+ self.__io.flush()
+ return(0)
+ def io_read(self, len = -1):
+ if self.__io == None:
+ return(-1)
+ try:
+ if len < 0:
+ ret = self.__io.read()
+ else:
+ ret = self.__io.read(len)
+ except Exception:
+ import sys
+ e = sys.exc_info()[1]
+ print("failed to read from Python:", type(e))
+ print("on IO:", self.__io)
+ self.__io == None
+ return(-1)
+ return(ret)
+ def io_write(self, str, len = -1):
+ if self.__io == None:
+ return(-1)
+ if len < 0:
+ return(self.__io.write(str))
+ return(self.__io.write(str, len))
+class ioReadWrapper(ioWrapper):
+ def __init__(self, _obj, enc = ""):
+ ioWrapper.__init__(self, _obj)
+ self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
+ def __del__(self):
+ print("__del__")
+ self.io_close()
+ if self._o != None:
+ libxml2mod.xmlFreeParserInputBuffer(self._o)
+ self._o = None
+ def close(self):
+ self.io_close()
+ if self._o != None:
+ libxml2mod.xmlFreeParserInputBuffer(self._o)
+ self._o = None
+class ioWriteWrapper(ioWrapper):
+ def __init__(self, _obj, enc = ""):
+# print "ioWriteWrapper.__init__", _obj
+ if type(_obj) == type(''):
+ print("write io from a string")
+ self.o = None
+ elif type(_obj).__name__ == 'PyCapsule':
+ file = libxml2mod.outputBufferGetPythonFile(_obj)
+ if file != None:
+ ioWrapper.__init__(self, file)
+ else:
+ ioWrapper.__init__(self, _obj)
+ self._o = _obj
+# elif type(_obj) == types.InstanceType:
+# print(("write io from instance of %s" % (_obj.__class__)))
+# ioWrapper.__init__(self, _obj)
+# self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
+ else:
+ file = libxml2mod.outputBufferGetPythonFile(_obj)
+ if file != None:
+ ioWrapper.__init__(self, file)
+ else:
+ ioWrapper.__init__(self, _obj)
+ self._o = _obj
+ def __del__(self):
+# print "__del__"
+ self.io_close()
+ if self._o != None:
+ libxml2mod.xmlOutputBufferClose(self._o)
+ self._o = None
+ def flush(self):
+ self.io_flush()
+ if self._o != None:
+ libxml2mod.xmlOutputBufferClose(self._o)
+ self._o = None
+ def close(self):
+ self.io_flush()
+ if self._o != None:
+ libxml2mod.xmlOutputBufferClose(self._o)
+ self._o = None
+# Example of a class to handle SAX events
+class SAXCallback:
+ """Base class for SAX handlers"""
+ def startDocument(self):
+ """called at the start of the document"""
+ pass
+ def endDocument(self):
+ """called at the end of the document"""
+ pass
+ def startElement(self, tag, attrs):
+ """called at the start of every element, tag is the name of
+ the element, attrs is a dictionary of the element's attributes"""
+ pass
+ def endElement(self, tag):
+ """called at the start of every element, tag is the name of
+ the element"""
+ pass
+ def characters(self, data):
+ """called when character data have been read, data is the string
+ containing the data, multiple consecutive characters() callback
+ are possible."""
+ pass
+ def cdataBlock(self, data):
+ """called when CDATA section have been read, data is the string
+ containing the data, multiple consecutive cdataBlock() callback
+ are possible."""
+ pass
+ def reference(self, name):
+ """called when an entity reference has been found"""
+ pass
+ def ignorableWhitespace(self, data):
+ """called when potentially ignorable white spaces have been found"""
+ pass
+ def processingInstruction(self, target, data):
+ """called when a PI has been found, target contains the PI name and
+ data is the associated data in the PI"""
+ pass
+ def comment(self, content):
+ """called when a comment has been found, content contains the comment"""
+ pass
+ def externalSubset(self, name, externalID, systemID):
+ """called when a DOCTYPE declaration has been found, name is the
+ DTD name and externalID, systemID are the DTD public and system
+ identifier for that DTd if available"""
+ pass
+ def internalSubset(self, name, externalID, systemID):
+ """called when a DOCTYPE declaration has been found, name is the
+ DTD name and externalID, systemID are the DTD public and system
+ identifier for that DTD if available"""
+ pass
+ def entityDecl(self, name, type, externalID, systemID, content):
+ """called when an ENTITY declaration has been found, name is the
+ entity name and externalID, systemID are the entity public and
+ system identifier for that entity if available, type indicates
+ the entity type, and content reports it's string content"""
+ pass
+ def notationDecl(self, name, externalID, systemID):
+ """called when an NOTATION declaration has been found, name is the
+ notation name and externalID, systemID are the notation public and
+ system identifier for that notation if available"""
+ pass
+ def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
+ """called when an ATTRIBUTE definition has been found"""
+ pass
+ def elementDecl(self, name, type, content):
+ """called when an ELEMENT definition has been found"""
+ pass
+ def entityDecl(self, name, publicId, systemID, notationName):
+ """called when an unparsed ENTITY declaration has been found,
+ name is the entity name and publicId,, systemID are the entity
+ public and system identifier for that entity if available,
+ and notationName indicate the associated NOTATION"""
+ pass
+ def warning(self, msg):
+ #print msg
+ pass
+ def error(self, msg):
+ raise parserError(msg)
+ def fatalError(self, msg):
+ raise parserError(msg)
+# This class is the ancestor of all the Node classes. It provides
+# the basic functionalities shared by all nodes (and handle
+# gracefylly the exception), like name, navigation in the tree,
+# doc reference, content access and serializing to a string or URI
+class xmlCore:
+ def __init__(self, _obj=None):
+ if _obj != None:
+ self._o = _obj;
+ return
+ self._o = None
+ def __eq__(self, other):
+ if other == None:
+ return False
+ ret = libxml2mod.compareNodesEqual(self._o, other._o)
+ if ret == None:
+ return False
+ return ret == True
+ def __ne__(self, other):
+ if other == None:
+ return True
+ ret = libxml2mod.compareNodesEqual(self._o, other._o)
+ return not ret
+ def __hash__(self):
+ ret = libxml2mod.nodeHash(self._o)
+ return ret
+ def __str__(self):
+ return self.serialize()
+ def get_parent(self):
+ ret = libxml2mod.parent(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ def get_children(self):
+ ret = libxml2mod.children(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ def get_last(self):
+ ret = libxml2mod.last(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ def get_next(self):
+ ret = libxml2mod.next(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ def get_properties(self):
+ ret = libxml2mod.properties(self._o)
+ if ret == None:
+ return None
+ return xmlAttr(_obj=ret)
+ def get_prev(self):
+ ret = libxml2mod.prev(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ def get_content(self):
+ return libxml2mod.xmlNodeGetContent(self._o)
+ getContent = get_content # why is this duplicate naming needed ?
+ def get_name(self):
+ return libxml2mod.name(self._o)
+ def get_type(self):
+ return libxml2mod.type(self._o)
+ def get_doc(self):
+ ret = libxml2mod.doc(self._o)
+ if ret == None:
+ if self.type in ["document_xml", "document_html"]:
+ return xmlDoc(_obj=self._o)
+ else:
+ return None
+ return xmlDoc(_obj=ret)
+ #
+ # Those are common attributes to nearly all type of nodes
+ # defined as python2 properties
+ #
+ import sys
+ if float(sys.version[0:3]) < 2.2:
+ def __getattr__(self, attr):
+ if attr == "parent":
+ ret = libxml2mod.parent(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ elif attr == "properties":
+ ret = libxml2mod.properties(self._o)
+ if ret == None:
+ return None
+ return xmlAttr(_obj=ret)
+ elif attr == "children":
+ ret = libxml2mod.children(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ elif attr == "last":
+ ret = libxml2mod.last(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ elif attr == "next":
+ ret = libxml2mod.next(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ elif attr == "prev":
+ ret = libxml2mod.prev(self._o)
+ if ret == None:
+ return None
+ return nodeWrap(ret)
+ elif attr == "content":
+ return libxml2mod.xmlNodeGetContent(self._o)
+ elif attr == "name":
+ return libxml2mod.name(self._o)
+ elif attr == "type":
+ return libxml2mod.type(self._o)
+ elif attr == "doc":
+ ret = libxml2mod.doc(self._o)
+ if ret == None:
+ if self.type == "document_xml" or self.type == "document_html":
+ return xmlDoc(_obj=self._o)
+ else:
+ return None
+ return xmlDoc(_obj=ret)
+ raise AttributeError(attr)
+ else:
+ parent = property(get_parent, None, None, "Parent node")
+ children = property(get_children, None, None, "First child node")
+ last = property(get_last, None, None, "Last sibling node")
+ next = property(get_next, None, None, "Next sibling node")
+ prev = property(get_prev, None, None, "Previous sibling node")
+ properties = property(get_properties, None, None, "List of properies")
+ content = property(get_content, None, None, "Content of this node")
+ name = property(get_name, None, None, "Node name")
+ type = property(get_type, None, None, "Node type")
+ doc = property(get_doc, None, None, "The document this node belongs to")
+ #
+ # Serialization routines, the optional arguments have the following
+ # meaning:
+ # encoding: string to ask saving in a specific encoding
+ # indent: if 1 the serializer is asked to indent the output
+ #
+ def serialize(self, encoding = None, format = 0):
+ return libxml2mod.serializeNode(self._o, encoding, format)
+ def saveTo(self, file, encoding = None, format = 0):
+ return libxml2mod.saveNodeTo(self._o, file, encoding, format)
+ #
+ # Canonicalization routines:
+ #
+ # nodes: the node set (tuple or list) to be included in the
+ # canonized image or None if all document nodes should be
+ # included.
+ # exclusive: the exclusive flag (0 - non-exclusive
+ # canonicalization; otherwise - exclusive canonicalization)
+ # prefixes: the list of inclusive namespace prefixes (strings),
+ # or None if there is no inclusive namespaces (only for
+ # exclusive canonicalization, ignored otherwise)
+ # with_comments: include comments in the result (!=0) or not
+ # (==0)
+ def c14nMemory(self,
+ nodes=None,
+ exclusive=0,
+ prefixes=None,
+ with_comments=0):
+ if nodes:
+ nodes = [n._o for n in nodes]
+ return libxml2mod.xmlC14NDocDumpMemory(
+ self.get_doc()._o,
+ nodes,
+ exclusive != 0,
+ prefixes,
+ with_comments != 0)
+ def c14nSaveTo(self,
+ file,
+ nodes=None,
+ exclusive=0,
+ prefixes=None,
+ with_comments=0):
+ if nodes:
+ nodes = [n._o for n in nodes]
+ return libxml2mod.xmlC14NDocSaveTo(
+ self.get_doc()._o,
+ nodes,
+ exclusive != 0,
+ prefixes,
+ with_comments != 0,
+ file)
+ #
+ # Selecting nodes using XPath, a bit slow because the context
+ # is allocated/freed every time but convenient.
+ #
+ def xpathEval(self, expr):
+ doc = self.doc
+ if doc == None:
+ return None
+ ctxt = doc.xpathNewContext()
+ ctxt.setContextNode(self)
+ res = ctxt.xpathEval(expr)
+ ctxt.xpathFreeContext()
+ return res
+# #
+# # Selecting nodes using XPath, faster because the context
+# # is allocated just once per xmlDoc.
+# #
+# # Removed: DV memleaks c.f. #126735
+# #
+# def xpathEval2(self, expr):
+# doc = self.doc
+# if doc == None:
+# return None
+# try:
+# doc._ctxt.setContextNode(self)
+# except:
+# doc._ctxt = doc.xpathNewContext()
+# doc._ctxt.setContextNode(self)
+# res = doc._ctxt.xpathEval(expr)
+# return res
+ def xpathEval2(self, expr):
+ return self.xpathEval(expr)
+ # Remove namespaces
+ def removeNsDef(self, href):
+ """
+ Remove a namespace definition from a node. If href is None,
+ remove all of the ns definitions on that node. The removed
+ namespaces are returned as a linked list.
+ Note: If any child nodes referred to the removed namespaces,
+ they will be left with dangling links. You should call
+ renconciliateNs() to fix those pointers.
+ Note: This method does not free memory taken by the ns
+ definitions. You will need to free it manually with the
+ freeNsList() method on the returns xmlNs object.
+ """
+ ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
+ if ret is None:return None
+ __tmp = xmlNs(_obj=ret)
+ return __tmp
+ # support for python2 iterators
+ def walk_depth_first(self):
+ return xmlCoreDepthFirstItertor(self)
+ def walk_breadth_first(self):
+ return xmlCoreBreadthFirstItertor(self)
+ __iter__ = walk_depth_first
+ def free(self):
+ try:
+ self.doc._ctxt.xpathFreeContext()
+ except:
+ pass
+ libxml2mod.xmlFreeDoc(self._o)
+# implements the depth-first iterator for libxml2 DOM tree
+class xmlCoreDepthFirstItertor:
+ def __init__(self, node):
+ self.node = node
+ self.parents = []
+ def __iter__(self):
+ return self
+ def __next__(self):
+ while 1:
+ if self.node:
+ ret = self.node
+ self.parents.append(self.node)
+ self.node = self.node.children
+ return ret
+ try:
+ parent = self.parents.pop()
+ except IndexError:
+ raise StopIteration
+ self.node = parent.next
+ next = __next__
+# implements the breadth-first iterator for libxml2 DOM tree
+class xmlCoreBreadthFirstItertor:
+ def __init__(self, node):
+ self.node = node
+ self.parents = []
+ def __iter__(self):
+ return self
+ def __next__(self):
+ while 1:
+ if self.node:
+ ret = self.node
+ self.parents.append(self.node)
+ self.node = self.node.next
+ return ret
+ try:
+ parent = self.parents.pop()
+ except IndexError:
+ raise StopIteration
+ self.node = parent.children
+ next = __next__
+# converters to present a nicer view of the XPath returns
+def nodeWrap(o):
+ # TODO try to cast to the most appropriate node class
+ name = libxml2mod.type(o)
+ if name == "element" or name == "text":
+ return xmlNode(_obj=o)
+ if name == "attribute":
+ return xmlAttr(_obj=o)
+ if name[0:8] == "document":
+ return xmlDoc(_obj=o)
+ if name == "namespace":
+ return xmlNs(_obj=o)
+ if name == "elem_decl":
+ return xmlElement(_obj=o)
+ if name == "attribute_decl":
+ return xmlAttribute(_obj=o)
+ if name == "entity_decl":
+ return xmlEntity(_obj=o)
+ if name == "dtd":
+ return xmlDtd(_obj=o)
+ return xmlNode(_obj=o)
+def xpathObjectRet(o):
+ otype = type(o)
+ if otype == type([]):
+ ret = list(map(xpathObjectRet, o))
+ return ret
+ elif otype == type(()):
+ ret = list(map(xpathObjectRet, o))
+ return tuple(ret)
+ elif otype == type('') or otype == type(0) or otype == type(0.0):
+ return o
+ else:
+ return nodeWrap(o)
+# register an XPath function
+def registerXPathFunction(ctxt, name, ns_uri, f):
+ ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
+# For the xmlTextReader parser configuration
+# For the error callback severities
+# register the libxml2 error handler
+def registerErrorHandler(f, ctx):
+ """Register a Python written function to for error reporting.
+ The function is called back as f(ctx, error). """
+ import sys
+ if 'libxslt' not in sys.modules:
+ # normal behaviour when libxslt is not imported
+ ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
+ else:
+ # when libxslt is already imported, one must
+ # use libxst's error handler instead
+ import libxslt
+ ret = libxslt.registerErrorHandler(f,ctx)
+ return ret
+class parserCtxtCore:
+ def __init__(self, _obj=None):
+ if _obj != None:
+ self._o = _obj;
+ return
+ self._o = None
+ def __del__(self):
+ if self._o != None:
+ libxml2mod.xmlFreeParserCtxt(self._o)
+ self._o = None
+ def setErrorHandler(self,f,arg):
+ """Register an error handler that will be called back as
+ f(arg,msg,severity,reserved).
+ @reserved is currently always None."""
+ libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
+ def getErrorHandler(self):
+ """Return (f,arg) as previously registered with setErrorHandler
+ or (None,None)."""
+ return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
+ def addLocalCatalog(self, uri):
+ """Register a local catalog with the parser"""
+ return libxml2mod.addLocalCatalog(self._o, uri)
+class ValidCtxtCore:
+ def __init__(self, *args, **kw):
+ pass
+ def setValidityErrorHandler(self, err_func, warn_func, arg=None):
+ """
+ Register error and warning handlers for DTD validation.
+ These will be called back as f(msg,arg)
+ """
+ libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
+class SchemaValidCtxtCore:
+ def __init__(self, *args, **kw):
+ pass
+ def setValidityErrorHandler(self, err_func, warn_func, arg=None):
+ """
+ Register error and warning handlers for Schema validation.
+ These will be called back as f(msg,arg)
+ """
+ libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
+class relaxNgValidCtxtCore:
+ def __init__(self, *args, **kw):
+ pass
+ def setValidityErrorHandler(self, err_func, warn_func, arg=None):
+ """
+ Register error and warning handlers for RelaxNG validation.
+ These will be called back as f(msg,arg)
+ """
+ libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
+def _xmlTextReaderErrorFunc(xxx_todo_changeme,msg,severity,locator):
+ """Intermediate callback to wrap the locator"""
+ (f,arg) = xxx_todo_changeme
+ return f(arg,msg,severity,xmlTextReaderLocator(locator))
+class xmlTextReaderCore:
+ def __init__(self, _obj=None):
+ self.input = None
+ if _obj != None:self._o = _obj;return
+ self._o = None
+ def __del__(self):
+ if self._o != None:
+ libxml2mod.xmlFreeTextReader(self._o)
+ self._o = None
+ def SetErrorHandler(self,f,arg):
+ """Register an error handler that will be called back as
+ f(arg,msg,severity,locator)."""
+ if f is None:
+ libxml2mod.xmlTextReaderSetErrorHandler(\
+ self._o,None,None)
+ else:
+ libxml2mod.xmlTextReaderSetErrorHandler(\
+ self._o,_xmlTextReaderErrorFunc,(f,arg))
+ def GetErrorHandler(self):
+ """Return (f,arg) as previously registered with setErrorHandler
+ or (None,None)."""
+ f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
+ if f is None:
+ return None,None
+ else:
+ # assert f is _xmlTextReaderErrorFunc
+ return arg
+# The cleanup now goes though a wrapper in libxml.c
+def cleanupParser():
+ libxml2mod.xmlPythonCleanupParser()
+# The interface to xmlRegisterInputCallbacks.
+# Since this API does not allow to pass a data object along with
+# match/open callbacks, it is necessary to maintain a list of all
+# Python callbacks.
+__input_callbacks = []
+def registerInputCallback(func):
+ def findOpenCallback(URI):
+ for cb in reversed(__input_callbacks):
+ o = cb(URI)
+ if o is not None:
+ return o
+ libxml2mod.xmlRegisterInputCallback(findOpenCallback)
+ __input_callbacks.append(func)
+def popInputCallbacks():
+ # First pop python-level callbacks, when no more available - start
+ # popping built-in ones.
+ if len(__input_callbacks) > 0:
+ __input_callbacks.pop()
+ if len(__input_callbacks) == 0:
+ libxml2mod.xmlUnregisterInputCallback()
+# Everything before this line comes from libxml.py
+# Everything after this line is automatically generated