aboutsummaryrefslogtreecommitdiff
path: root/libxml2-2.9.10/doc/apibuild.py
diff options
context:
space:
mode:
Diffstat (limited to 'libxml2-2.9.10/doc/apibuild.py')
-rwxr-xr-xlibxml2-2.9.10/doc/apibuild.py2151
1 files changed, 2151 insertions, 0 deletions
diff --git a/libxml2-2.9.10/doc/apibuild.py b/libxml2-2.9.10/doc/apibuild.py
new file mode 100755
index 0000000..95d7d04
--- /dev/null
+++ b/libxml2-2.9.10/doc/apibuild.py
@@ -0,0 +1,2151 @@
+#!/usr/bin/python -u
+#
+# This is the API builder, it parses the C sources and build the
+# API formal description in XML.
+#
+# See Copyright for the status of this software.
+#
+# daniel@veillard.com
+#
+import os, sys
+import string
+import glob
+
+debug=0
+#debugsym='ignorableWhitespaceSAXFunc'
+debugsym=None
+
+#
+# C parser analysis code
+#
+ignored_files = {
+ "trio": "too many non standard macros",
+ "trio.c": "too many non standard macros",
+ "trionan.c": "too many non standard macros",
+ "triostr.c": "too many non standard macros",
+ "acconfig.h": "generated portability layer",
+ "config.h": "generated portability layer",
+ "libxml.h": "internal only",
+ "testOOM.c": "out of memory tester",
+ "testOOMlib.h": "out of memory tester",
+ "testOOMlib.c": "out of memory tester",
+ "rngparser.c": "not yet integrated",
+ "rngparser.h": "not yet integrated",
+ "elfgcchack.h": "not a normal header",
+ "testHTML.c": "test tool",
+ "testReader.c": "test tool",
+ "testSchemas.c": "test tool",
+ "testXPath.c": "test tool",
+ "testAutomata.c": "test tool",
+ "testModule.c": "test tool",
+ "testRegexp.c": "test tool",
+ "testThreads.c": "test tool",
+ "testC14N.c": "test tool",
+ "testRelax.c": "test tool",
+ "testSAX.c": "test tool",
+ "testURI.c": "test tool",
+ "testapi.c": "generated regression tests",
+ "runtest.c": "regression tests program",
+ "runsuite.c": "regression tests program",
+ "tst.c": "not part of the library",
+ "test.c": "not part of the library",
+ "testdso.c": "test for dynamid shared libraries",
+ "testrecurse.c": "test for entities recursions",
+ "xzlib.h": "Internal API only 2.8.0",
+ "buf.h": "Internal API only 2.9.0",
+ "enc.h": "Internal API only 2.9.0",
+ "/save.h": "Internal API only 2.9.0",
+ "timsort.h": "Internal header only for xpath.c 2.9.0",
+}
+
+ignored_words = {
+ "WINAPI": (0, "Windows keyword"),
+ "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
+ "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
+ "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
+ "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
+ "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
+ "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
+ "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
+ "XMLCALL": (0, "Special macro for win32 calls"),
+ "XSLTCALL": (0, "Special macro for win32 calls"),
+ "XMLCDECL": (0, "Special macro for win32 calls"),
+ "EXSLTCALL": (0, "Special macro for win32 calls"),
+ "__declspec": (3, "Windows keyword"),
+ "__stdcall": (0, "Windows keyword"),
+ "ATTRIBUTE_UNUSED": (0, "macro keyword"),
+ "LIBEXSLT_PUBLIC": (0, "macro keyword"),
+ "X_IN_Y": (5, "macro function builder"),
+ "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
+ "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
+ "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
+ "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
+ "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
+}
+
+def escape(raw):
+ raw = raw.replace('&', '&')
+ raw = raw.replace('<', '&lt;')
+ raw = raw.replace('>', '&gt;')
+ raw = raw.replace("'", '&apos;')
+ raw = raw.replace('"', '&quot;')
+ return raw
+
+def uniq(items):
+ d = {}
+ for item in items:
+ d[item]=1
+ return list(d.keys())
+
+class identifier:
+ def __init__(self, name, header=None, module=None, type=None, lineno = 0,
+ info=None, extra=None, conditionals = None):
+ self.name = name
+ self.header = header
+ self.module = module
+ self.type = type
+ self.info = info
+ self.extra = extra
+ self.lineno = lineno
+ self.static = 0
+ if conditionals == None or len(conditionals) == 0:
+ self.conditionals = None
+ else:
+ self.conditionals = conditionals[:]
+ if self.name == debugsym:
+ print("=> define %s : %s" % (debugsym, (module, type, info,
+ extra, conditionals)))
+
+ def __repr__(self):
+ r = "%s %s:" % (self.type, self.name)
+ if self.static:
+ r = r + " static"
+ if self.module != None:
+ r = r + " from %s" % (self.module)
+ if self.info != None:
+ r = r + " " + repr(self.info)
+ if self.extra != None:
+ r = r + " " + repr(self.extra)
+ if self.conditionals != None:
+ r = r + " " + repr(self.conditionals)
+ return r
+
+
+ def set_header(self, header):
+ self.header = header
+ def set_module(self, module):
+ self.module = module
+ def set_type(self, type):
+ self.type = type
+ def set_info(self, info):
+ self.info = info
+ def set_extra(self, extra):
+ self.extra = extra
+ def set_lineno(self, lineno):
+ self.lineno = lineno
+ def set_static(self, static):
+ self.static = static
+ def set_conditionals(self, conditionals):
+ if conditionals == None or len(conditionals) == 0:
+ self.conditionals = None
+ else:
+ self.conditionals = conditionals[:]
+
+ def get_name(self):
+ return self.name
+ def get_header(self):
+ return self.module
+ def get_module(self):
+ return self.module
+ def get_type(self):
+ return self.type
+ def get_info(self):
+ return self.info
+ def get_lineno(self):
+ return self.lineno
+ def get_extra(self):
+ return self.extra
+ def get_static(self):
+ return self.static
+ def get_conditionals(self):
+ return self.conditionals
+
+ def update(self, header, module, type = None, info = None, extra=None,
+ conditionals=None):
+ if self.name == debugsym:
+ print("=> update %s : %s" % (debugsym, (module, type, info,
+ extra, conditionals)))
+ if header != None and self.header == None:
+ self.set_header(module)
+ if module != None and (self.module == None or self.header == self.module):
+ self.set_module(module)
+ if type != None and self.type == None:
+ self.set_type(type)
+ if info != None:
+ self.set_info(info)
+ if extra != None:
+ self.set_extra(extra)
+ if conditionals != None:
+ self.set_conditionals(conditionals)
+
+class index:
+ def __init__(self, name = "noname"):
+ self.name = name
+ self.identifiers = {}
+ self.functions = {}
+ self.variables = {}
+ self.includes = {}
+ self.structs = {}
+ self.enums = {}
+ self.typedefs = {}
+ self.macros = {}
+ self.references = {}
+ self.info = {}
+
+ def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
+ if name[0:2] == '__':
+ return None
+ d = None
+ try:
+ d = self.identifiers[name]
+ d.update(header, module, type, lineno, info, extra, conditionals)
+ except:
+ d = identifier(name, header, module, type, lineno, info, extra, conditionals)
+ self.identifiers[name] = d
+
+ if d != None and static == 1:
+ d.set_static(1)
+
+ if d != None and name != None and type != None:
+ self.references[name] = d
+
+ if name == debugsym:
+ print("New ref: %s" % (d))
+
+ return d
+
+ def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
+ if name[0:2] == '__':
+ return None
+ d = None
+ try:
+ d = self.identifiers[name]
+ d.update(header, module, type, lineno, info, extra, conditionals)
+ except:
+ d = identifier(name, header, module, type, lineno, info, extra, conditionals)
+ self.identifiers[name] = d
+
+ if d != None and static == 1:
+ d.set_static(1)
+
+ if d != None and name != None and type != None:
+ if type == "function":
+ self.functions[name] = d
+ elif type == "functype":
+ self.functions[name] = d
+ elif type == "variable":
+ self.variables[name] = d
+ elif type == "include":
+ self.includes[name] = d
+ elif type == "struct":
+ self.structs[name] = d
+ elif type == "enum":
+ self.enums[name] = d
+ elif type == "typedef":
+ self.typedefs[name] = d
+ elif type == "macro":
+ self.macros[name] = d
+ else:
+ print("Unable to register type ", type)
+
+ if name == debugsym:
+ print("New symbol: %s" % (d))
+
+ return d
+
+ def merge(self, idx):
+ for id in list(idx.functions.keys()):
+ #
+ # macro might be used to override functions or variables
+ # definitions
+ #
+ if id in self.macros:
+ del self.macros[id]
+ if id in self.functions:
+ print("function %s from %s redeclared in %s" % (
+ id, self.functions[id].header, idx.functions[id].header))
+ else:
+ self.functions[id] = idx.functions[id]
+ self.identifiers[id] = idx.functions[id]
+ for id in list(idx.variables.keys()):
+ #
+ # macro might be used to override functions or variables
+ # definitions
+ #
+ if id in self.macros:
+ del self.macros[id]
+ if id in self.variables:
+ print("variable %s from %s redeclared in %s" % (
+ id, self.variables[id].header, idx.variables[id].header))
+ else:
+ self.variables[id] = idx.variables[id]
+ self.identifiers[id] = idx.variables[id]
+ for id in list(idx.structs.keys()):
+ if id in self.structs:
+ print("struct %s from %s redeclared in %s" % (
+ id, self.structs[id].header, idx.structs[id].header))
+ else:
+ self.structs[id] = idx.structs[id]
+ self.identifiers[id] = idx.structs[id]
+ for id in list(idx.typedefs.keys()):
+ if id in self.typedefs:
+ print("typedef %s from %s redeclared in %s" % (
+ id, self.typedefs[id].header, idx.typedefs[id].header))
+ else:
+ self.typedefs[id] = idx.typedefs[id]
+ self.identifiers[id] = idx.typedefs[id]
+ for id in list(idx.macros.keys()):
+ #
+ # macro might be used to override functions or variables
+ # definitions
+ #
+ if id in self.variables:
+ continue
+ if id in self.functions:
+ continue
+ if id in self.enums:
+ continue
+ if id in self.macros:
+ print("macro %s from %s redeclared in %s" % (
+ id, self.macros[id].header, idx.macros[id].header))
+ else:
+ self.macros[id] = idx.macros[id]
+ self.identifiers[id] = idx.macros[id]
+ for id in list(idx.enums.keys()):
+ if id in self.enums:
+ print("enum %s from %s redeclared in %s" % (
+ id, self.enums[id].header, idx.enums[id].header))
+ else:
+ self.enums[id] = idx.enums[id]
+ self.identifiers[id] = idx.enums[id]
+
+ def merge_public(self, idx):
+ for id in list(idx.functions.keys()):
+ if id in self.functions:
+ # check that function condition agrees with header
+ if idx.functions[id].conditionals != \
+ self.functions[id].conditionals:
+ print("Header condition differs from Function for %s:" \
+ % id)
+ print(" H: %s" % self.functions[id].conditionals)
+ print(" C: %s" % idx.functions[id].conditionals)
+ up = idx.functions[id]
+ self.functions[id].update(None, up.module, up.type, up.info, up.extra)
+ # else:
+ # print "Function %s from %s is not declared in headers" % (
+ # id, idx.functions[id].module)
+ # TODO: do the same for variables.
+
+ def analyze_dict(self, type, dict):
+ count = 0
+ public = 0
+ for name in list(dict.keys()):
+ id = dict[name]
+ count = count + 1
+ if id.static == 0:
+ public = public + 1
+ if count != public:
+ print(" %d %s , %d public" % (count, type, public))
+ elif count != 0:
+ print(" %d public %s" % (count, type))
+
+
+ def analyze(self):
+ self.analyze_dict("functions", self.functions)
+ self.analyze_dict("variables", self.variables)
+ self.analyze_dict("structs", self.structs)
+ self.analyze_dict("typedefs", self.typedefs)
+ self.analyze_dict("macros", self.macros)
+
+class CLexer:
+ """A lexer for the C language, tokenize the input by reading and
+ analyzing it line by line"""
+ def __init__(self, input):
+ self.input = input
+ self.tokens = []
+ self.line = ""
+ self.lineno = 0
+
+ def getline(self):
+ line = ''
+ while line == '':
+ line = self.input.readline()
+ if not line:
+ return None
+ self.lineno = self.lineno + 1
+ line = line.lstrip()
+ line = line.rstrip()
+ if line == '':
+ continue
+ while line[-1] == '\\':
+ line = line[:-1]
+ n = self.input.readline()
+ self.lineno = self.lineno + 1
+ n = n.lstrip()
+ n = n.rstrip()
+ if not n:
+ break
+ else:
+ line = line + n
+ return line
+
+ def getlineno(self):
+ return self.lineno
+
+ def push(self, token):
+ self.tokens.insert(0, token);
+
+ def debug(self):
+ print("Last token: ", self.last)
+ print("Token queue: ", self.tokens)
+ print("Line %d end: " % (self.lineno), self.line)
+
+ def token(self):
+ while self.tokens == []:
+ if self.line == "":
+ line = self.getline()
+ else:
+ line = self.line
+ self.line = ""
+ if line == None:
+ return None
+
+ if line[0] == '#':
+ self.tokens = list(map((lambda x: ('preproc', x)),
+ line.split()))
+ break;
+ l = len(line)
+ if line[0] == '"' or line[0] == "'":
+ end = line[0]
+ line = line[1:]
+ found = 0
+ tok = ""
+ while found == 0:
+ i = 0
+ l = len(line)
+ while i < l:
+ if line[i] == end:
+ self.line = line[i+1:]
+ line = line[:i]
+ l = i
+ found = 1
+ break
+ if line[i] == '\\':
+ i = i + 1
+ i = i + 1
+ tok = tok + line
+ if found == 0:
+ line = self.getline()
+ if line == None:
+ return None
+ self.last = ('string', tok)
+ return self.last
+
+ if l >= 2 and line[0] == '/' and line[1] == '*':
+ line = line[2:]
+ found = 0
+ tok = ""
+ while found == 0:
+ i = 0
+ l = len(line)
+ while i < l:
+ if line[i] == '*' and i+1 < l and line[i+1] == '/':
+ self.line = line[i+2:]
+ line = line[:i-1]
+ l = i
+ found = 1
+ break
+ i = i + 1
+ if tok != "":
+ tok = tok + "\n"
+ tok = tok + line
+ if found == 0:
+ line = self.getline()
+ if line == None:
+ return None
+ self.last = ('comment', tok)
+ return self.last
+ if l >= 2 and line[0] == '/' and line[1] == '/':
+ line = line[2:]
+ self.last = ('comment', line)
+ return self.last
+ i = 0
+ while i < l:
+ if line[i] == '/' and i+1 < l and line[i+1] == '/':
+ self.line = line[i:]
+ line = line[:i]
+ break
+ if line[i] == '/' and i+1 < l and line[i+1] == '*':
+ self.line = line[i:]
+ line = line[:i]
+ break
+ if line[i] == '"' or line[i] == "'":
+ self.line = line[i:]
+ line = line[:i]
+ break
+ i = i + 1
+ l = len(line)
+ i = 0
+ while i < l:
+ if line[i] == ' ' or line[i] == '\t':
+ i = i + 1
+ continue
+ o = ord(line[i])
+ if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+ (o >= 48 and o <= 57):
+ s = i
+ while i < l:
+ o = ord(line[i])
+ if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+ (o >= 48 and o <= 57) or \
+ (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
+ i = i + 1
+ else:
+ break
+ self.tokens.append(('name', line[s:i]))
+ continue
+ if "(){}:;,[]".find(line[i]) != -1:
+# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
+# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
+# line[i] == ',' or line[i] == '[' or line[i] == ']':
+ self.tokens.append(('sep', line[i]))
+ i = i + 1
+ continue
+ if "+-*><=/%&!|.".find(line[i]) != -1:
+# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
+# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
+# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
+# line[i] == '!' or line[i] == '|' or line[i] == '.':
+ if line[i] == '.' and i + 2 < l and \
+ line[i+1] == '.' and line[i+2] == '.':
+ self.tokens.append(('name', '...'))
+ i = i + 3
+ continue
+
+ j = i + 1
+ if j < l and (
+ "+-*><=/%&!|".find(line[j]) != -1):
+# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
+# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
+# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
+# line[j] == '!' or line[j] == '|'):
+ self.tokens.append(('op', line[i:j+1]))
+ i = j + 1
+ else:
+ self.tokens.append(('op', line[i]))
+ i = i + 1
+ continue
+ s = i
+ while i < l:
+ o = ord(line[i])
+ if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+ (o >= 48 and o <= 57) or (
+ " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
+# line[i] != ' ' and line[i] != '\t' and
+# line[i] != '(' and line[i] != ')' and
+# line[i] != '{' and line[i] != '}' and
+# line[i] != ':' and line[i] != ';' and
+# line[i] != ',' and line[i] != '+' and
+# line[i] != '-' and line[i] != '*' and
+# line[i] != '/' and line[i] != '%' and
+# line[i] != '&' and line[i] != '!' and
+# line[i] != '|' and line[i] != '[' and
+# line[i] != ']' and line[i] != '=' and
+# line[i] != '*' and line[i] != '>' and
+# line[i] != '<'):
+ i = i + 1
+ else:
+ break
+ self.tokens.append(('name', line[s:i]))
+
+ tok = self.tokens[0]
+ self.tokens = self.tokens[1:]
+ self.last = tok
+ return tok
+
+class CParser:
+ """The C module parser"""
+ def __init__(self, filename, idx = None):
+ self.filename = filename
+ if len(filename) > 2 and filename[-2:] == '.h':
+ self.is_header = 1
+ else:
+ self.is_header = 0
+ self.input = open(filename)
+ self.lexer = CLexer(self.input)
+ if idx == None:
+ self.index = index()
+ else:
+ self.index = idx
+ self.top_comment = ""
+ self.last_comment = ""
+ self.comment = None
+ self.collect_ref = 0
+ self.no_error = 0
+ self.conditionals = []
+ self.defines = []
+
+ def collect_references(self):
+ self.collect_ref = 1
+
+ def stop_error(self):
+ self.no_error = 1
+
+ def start_error(self):
+ self.no_error = 0
+
+ def lineno(self):
+ return self.lexer.getlineno()
+
+ def index_add(self, name, module, static, type, info=None, extra = None):
+ if self.is_header == 1:
+ self.index.add(name, module, module, static, type, self.lineno(),
+ info, extra, self.conditionals)
+ else:
+ self.index.add(name, None, module, static, type, self.lineno(),
+ info, extra, self.conditionals)
+
+ def index_add_ref(self, name, module, static, type, info=None,
+ extra = None):
+ if self.is_header == 1:
+ self.index.add_ref(name, module, module, static, type,
+ self.lineno(), info, extra, self.conditionals)
+ else:
+ self.index.add_ref(name, None, module, static, type, self.lineno(),
+ info, extra, self.conditionals)
+
+ def warning(self, msg):
+ if self.no_error:
+ return
+ print(msg)
+
+ def error(self, msg, token=-1):
+ if self.no_error:
+ return
+
+ print("Parse Error: " + msg)
+ if token != -1:
+ print("Got token ", token)
+ self.lexer.debug()
+ sys.exit(1)
+
+ def debug(self, msg, token=-1):
+ print("Debug: " + msg)
+ if token != -1:
+ print("Got token ", token)
+ self.lexer.debug()
+
+ def parseTopComment(self, comment):
+ res = {}
+ lines = comment.split("\n")
+ item = None
+ for line in lines:
+ while line != "" and (line[0] == ' ' or line[0] == '\t'):
+ line = line[1:]
+ while line != "" and line[0] == '*':
+ line = line[1:]
+ while line != "" and (line[0] == ' ' or line[0] == '\t'):
+ line = line[1:]
+ try:
+ (it, line) = line.split(":", 1)
+ item = it
+ while line != "" and (line[0] == ' ' or line[0] == '\t'):
+ line = line[1:]
+ if item in res:
+ res[item] = res[item] + " " + line
+ else:
+ res[item] = line
+ except:
+ if item != None:
+ if item in res:
+ res[item] = res[item] + " " + line
+ else:
+ res[item] = line
+ self.index.info = res
+
+ def parseComment(self, token):
+ if self.top_comment == "":
+ self.top_comment = token[1]
+ if self.comment == None or token[1][0] == '*':
+ self.comment = token[1];
+ else:
+ self.comment = self.comment + token[1]
+ token = self.lexer.token()
+
+ if self.comment.find("DOC_DISABLE") != -1:
+ self.stop_error()
+
+ if self.comment.find("DOC_ENABLE") != -1:
+ self.start_error()
+
+ return token
+
+ #
+ # Parse a comment block associate to a typedef
+ #
+ def parseTypeComment(self, name, quiet = 0):
+ if name[0:2] == '__':
+ quiet = 1
+
+ args = []
+ desc = ""
+
+ if self.comment == None:
+ if not quiet:
+ self.warning("Missing comment for type %s" % (name))
+ return((args, desc))
+ if self.comment[0] != '*':
+ if not quiet:
+ self.warning("Missing * in type comment for %s" % (name))
+ return((args, desc))
+ lines = self.comment.split('\n')
+ if lines[0] == '*':
+ del lines[0]
+ if lines[0] != "* %s:" % (name):
+ if not quiet:
+ self.warning("Misformatted type comment for %s" % (name))
+ self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
+ return((args, desc))
+ del lines[0]
+ while len(lines) > 0 and lines[0] == '*':
+ del lines[0]
+ desc = ""
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = l.strip()
+ desc = desc + " " + l
+ del lines[0]
+
+ desc = desc.strip()
+
+ if quiet == 0:
+ if desc == "":
+ self.warning("Type comment for %s lack description of the macro" % (name))
+
+ return(desc)
+ #
+ # Parse a comment block associate to a macro
+ #
+ def parseMacroComment(self, name, quiet = 0):
+ if name[0:2] == '__':
+ quiet = 1
+
+ args = []
+ desc = ""
+
+ if self.comment == None:
+ if not quiet:
+ self.warning("Missing comment for macro %s" % (name))
+ return((args, desc))
+ if self.comment[0] != '*':
+ if not quiet:
+ self.warning("Missing * in macro comment for %s" % (name))
+ return((args, desc))
+ lines = self.comment.split('\n')
+ if lines[0] == '*':
+ del lines[0]
+ if lines[0] != "* %s:" % (name):
+ if not quiet:
+ self.warning("Misformatted macro comment for %s" % (name))
+ self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
+ return((args, desc))
+ del lines[0]
+ while lines[0] == '*':
+ del lines[0]
+ while len(lines) > 0 and lines[0][0:3] == '* @':
+ l = lines[0][3:]
+ try:
+ (arg, desc) = l.split(':', 1)
+ desc=desc.strip()
+ arg=arg.strip()
+ except:
+ if not quiet:
+ self.warning("Misformatted macro comment for %s" % (name))
+ self.warning(" problem with '%s'" % (lines[0]))
+ del lines[0]
+ continue
+ del lines[0]
+ l = lines[0].strip()
+ while len(l) > 2 and l[0:3] != '* @':
+ while l[0] == '*':
+ l = l[1:]
+ desc = desc + ' ' + l.strip()
+ del lines[0]
+ if len(lines) == 0:
+ break
+ l = lines[0]
+ args.append((arg, desc))
+ while len(lines) > 0 and lines[0] == '*':
+ del lines[0]
+ desc = ""
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = l.strip()
+ desc = desc + " " + l
+ del lines[0]
+
+ desc = desc.strip()
+
+ if quiet == 0:
+ if desc == "":
+ self.warning("Macro comment for %s lack description of the macro" % (name))
+
+ return((args, desc))
+
+ #
+ # Parse a comment block and merge the informations found in the
+ # parameters descriptions, finally returns a block as complete
+ # as possible
+ #
+ def mergeFunctionComment(self, name, description, quiet = 0):
+ if name == 'main':
+ quiet = 1
+ if name[0:2] == '__':
+ quiet = 1
+
+ (ret, args) = description
+ desc = ""
+ retdesc = ""
+
+ if self.comment == None:
+ if not quiet:
+ self.warning("Missing comment for function %s" % (name))
+ return(((ret[0], retdesc), args, desc))
+ if self.comment[0] != '*':
+ if not quiet:
+ self.warning("Missing * in function comment for %s" % (name))
+ return(((ret[0], retdesc), args, desc))
+ lines = self.comment.split('\n')
+ if lines[0] == '*':
+ del lines[0]
+ if lines[0] != "* %s:" % (name):
+ if not quiet:
+ self.warning("Misformatted function comment for %s" % (name))
+ self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
+ return(((ret[0], retdesc), args, desc))
+ del lines[0]
+ while lines[0] == '*':
+ del lines[0]
+ nbargs = len(args)
+ while len(lines) > 0 and lines[0][0:3] == '* @':
+ l = lines[0][3:]
+ try:
+ (arg, desc) = l.split(':', 1)
+ desc=desc.strip()
+ arg=arg.strip()
+ except:
+ if not quiet:
+ self.warning("Misformatted function comment for %s" % (name))
+ self.warning(" problem with '%s'" % (lines[0]))
+ del lines[0]
+ continue
+ del lines[0]
+ l = lines[0].strip()
+ while len(l) > 2 and l[0:3] != '* @':
+ while l[0] == '*':
+ l = l[1:]
+ desc = desc + ' ' + l.strip()
+ del lines[0]
+ if len(lines) == 0:
+ break
+ l = lines[0]
+ i = 0
+ while i < nbargs:
+ if args[i][1] == arg:
+ args[i] = (args[i][0], arg, desc)
+ break;
+ i = i + 1
+ if i >= nbargs:
+ if not quiet:
+ self.warning("Unable to find arg %s from function comment for %s" % (
+ arg, name))
+ while len(lines) > 0 and lines[0] == '*':
+ del lines[0]
+ desc = ""
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = l.strip()
+ if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
+ try:
+ l = l.split(' ', 1)[1]
+ except:
+ l = ""
+ retdesc = l.strip()
+ del lines[0]
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = l.strip()
+ retdesc = retdesc + " " + l
+ del lines[0]
+ else:
+ desc = desc + " " + l
+ del lines[0]
+
+ retdesc = retdesc.strip()
+ desc = desc.strip()
+
+ if quiet == 0:
+ #
+ # report missing comments
+ #
+ i = 0
+ while i < nbargs:
+ if args[i][2] == None and args[i][0] != "void" and \
+ ((args[i][1] != None) or (args[i][1] == '')):
+ self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
+ i = i + 1
+ if retdesc == "" and ret[0] != "void":
+ self.warning("Function comment for %s lacks description of return value" % (name))
+ if desc == "":
+ self.warning("Function comment for %s lacks description of the function" % (name))
+
+ return(((ret[0], retdesc), args, desc))
+
+ def parsePreproc(self, token):
+ if debug:
+ print("=> preproc ", token, self.lexer.tokens)
+ name = token[1]
+ if name == "#include":
+ token = self.lexer.token()
+ if token == None:
+ return None
+ if token[0] == 'preproc':
+ self.index_add(token[1], self.filename, not self.is_header,
+ "include")
+ return self.lexer.token()
+ return token
+ if name == "#define":
+ token = self.lexer.token()
+ if token == None:
+ return None
+ if token[0] == 'preproc':
+ # TODO macros with arguments
+ name = token[1]
+ lst = []
+ token = self.lexer.token()
+ while token != None and token[0] == 'preproc' and \
+ token[1][0] != '#':
+ lst.append(token[1])
+ token = self.lexer.token()
+ try:
+ name = name.split('(') [0]
+ except:
+ pass
+ info = self.parseMacroComment(name, not self.is_header)
+ self.index_add(name, self.filename, not self.is_header,
+ "macro", info)
+ return token
+
+ #
+ # Processing of conditionals modified by Bill 1/1/05
+ #
+ # We process conditionals (i.e. tokens from #ifdef, #ifndef,
+ # #if, #else and #endif) for headers and mainline code,
+ # store the ones from the header in libxml2-api.xml, and later
+ # (in the routine merge_public) verify that the two (header and
+ # mainline code) agree.
+ #
+ # There is a small problem with processing the headers. Some of
+ # the variables are not concerned with enabling / disabling of
+ # library functions (e.g. '__XML_PARSER_H__'), and we don't want
+ # them to be included in libxml2-api.xml, or involved in
+ # the check between the header and the mainline code. To
+ # accomplish this, we ignore any conditional which doesn't include
+ # the string 'ENABLED'
+ #
+ if name == "#ifdef":
+ apstr = self.lexer.tokens[0][1]
+ try:
+ self.defines.append(apstr)
+ if apstr.find('ENABLED') != -1:
+ self.conditionals.append("defined(%s)" % apstr)
+ except:
+ pass
+ elif name == "#ifndef":
+ apstr = self.lexer.tokens[0][1]
+ try:
+ self.defines.append(apstr)
+ if apstr.find('ENABLED') != -1:
+ self.conditionals.append("!defined(%s)" % apstr)
+ except:
+ pass
+ elif name == "#if":
+ apstr = ""
+ for tok in self.lexer.tokens:
+ if apstr != "":
+ apstr = apstr + " "
+ apstr = apstr + tok[1]
+ try:
+ self.defines.append(apstr)
+ if apstr.find('ENABLED') != -1:
+ self.conditionals.append(apstr)
+ except:
+ pass
+ elif name == "#else":
+ if self.conditionals != [] and \
+ self.defines[-1].find('ENABLED') != -1:
+ self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
+ elif name == "#endif":
+ if self.conditionals != [] and \
+ self.defines[-1].find('ENABLED') != -1:
+ self.conditionals = self.conditionals[:-1]
+ self.defines = self.defines[:-1]
+ token = self.lexer.token()
+ while token != None and token[0] == 'preproc' and \
+ token[1][0] != '#':
+ token = self.lexer.token()
+ return token
+
+ #
+ # token acquisition on top of the lexer, it handle internally
+ # preprocessor and comments since they are logically not part of
+ # the program structure.
+ #
+ def token(self):
+ global ignored_words
+
+ token = self.lexer.token()
+ while token != None:
+ if token[0] == 'comment':
+ token = self.parseComment(token)
+ continue
+ elif token[0] == 'preproc':
+ token = self.parsePreproc(token)
+ continue
+ elif token[0] == "name" and token[1] == "__const":
+ token = ("name", "const")
+ return token
+ elif token[0] == "name" and token[1] == "__attribute":
+ token = self.lexer.token()
+ while token != None and token[1] != ";":
+ token = self.lexer.token()
+ return token
+ elif token[0] == "name" and token[1] in ignored_words:
+ (n, info) = ignored_words[token[1]]
+ i = 0
+ while i < n:
+ token = self.lexer.token()
+ i = i + 1
+ token = self.lexer.token()
+ continue
+ else:
+ if debug:
+ print("=> ", token)
+ return token
+ return None
+
+ #
+ # Parse a typedef, it records the type and its name.
+ #
+ def parseTypedef(self, token):
+ if token == None:
+ return None
+ token = self.parseType(token)
+ if token == None:
+ self.error("parsing typedef")
+ return None
+ base_type = self.type
+ type = base_type
+ #self.debug("end typedef type", token)
+ while token != None:
+ if token[0] == "name":
+ name = token[1]
+ signature = self.signature
+ if signature != None:
+ type = type.split('(')[0]
+ d = self.mergeFunctionComment(name,
+ ((type, None), signature), 1)
+ self.index_add(name, self.filename, not self.is_header,
+ "functype", d)
+ else:
+ if base_type == "struct":
+ self.index_add(name, self.filename, not self.is_header,
+ "struct", type)
+ base_type = "struct " + name
+ else:
+ # TODO report missing or misformatted comments
+ info = self.parseTypeComment(name, 1)
+ self.index_add(name, self.filename, not self.is_header,
+ "typedef", type, info)
+ token = self.token()
+ else:
+ self.error("parsing typedef: expecting a name")
+ return token
+ #self.debug("end typedef", token)
+ if token != None and token[0] == 'sep' and token[1] == ',':
+ type = base_type
+ token = self.token()
+ while token != None and token[0] == "op":
+ type = type + token[1]
+ token = self.token()
+ elif token != None and token[0] == 'sep' and token[1] == ';':
+ break;
+ elif token != None and token[0] == 'name':
+ type = base_type
+ continue;
+ else:
+ self.error("parsing typedef: expecting ';'", token)
+ return token
+ token = self.token()
+ return token
+
+ #
+ # Parse a C code block, used for functions it parse till
+ # the balancing } included
+ #
+ def parseBlock(self, token):
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ self.comment = None
+ token = self.token()
+ return token
+ else:
+ if self.collect_ref == 1:
+ oldtok = token
+ token = self.token()
+ if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
+ if token[0] == "sep" and token[1] == "(":
+ self.index_add_ref(oldtok[1], self.filename,
+ 0, "function")
+ token = self.token()
+ elif token[0] == "name":
+ token = self.token()
+ if token[0] == "sep" and (token[1] == ";" or
+ token[1] == "," or token[1] == "="):
+ self.index_add_ref(oldtok[1], self.filename,
+ 0, "type")
+ elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
+ self.index_add_ref(oldtok[1], self.filename,
+ 0, "typedef")
+ elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
+ self.index_add_ref(oldtok[1], self.filename,
+ 0, "typedef")
+
+ else:
+ token = self.token()
+ return token
+
+ #
+ # Parse a C struct definition till the balancing }
+ #
+ def parseStruct(self, token):
+ fields = []
+ #self.debug("start parseStruct", token)
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ self.struct_fields = fields
+ #self.debug("end parseStruct", token)
+ #print fields
+ token = self.token()
+ return token
+ else:
+ base_type = self.type
+ #self.debug("before parseType", token)
+ token = self.parseType(token)
+ #self.debug("after parseType", token)
+ if token != None and token[0] == "name":
+ fname = token[1]
+ token = self.token()
+ if token[0] == "sep" and token[1] == ";":
+ self.comment = None
+ token = self.token()
+ fields.append((self.type, fname, self.comment))
+ self.comment = None
+ else:
+ self.error("parseStruct: expecting ;", token)
+ elif token != None and token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ if token != None and token[0] == "name":
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == ";":
+ token = self.token()
+ else:
+ self.error("parseStruct: expecting ;", token)
+ else:
+ self.error("parseStruct: name", token)
+ token = self.token()
+ self.type = base_type;
+ self.struct_fields = fields
+ #self.debug("end parseStruct", token)
+ #print fields
+ return token
+
+ #
+ # Parse a C enum block, parse till the balancing }
+ #
+ def parseEnumBlock(self, token):
+ self.enums = []
+ name = None
+ self.comment = None
+ comment = ""
+ value = "0"
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ if name != None:
+ if self.comment != None:
+ comment = self.comment
+ self.comment = None
+ self.enums.append((name, value, comment))
+ token = self.token()
+ return token
+ elif token[0] == "name":
+ if name != None:
+ if self.comment != None:
+ comment = self.comment.strip()
+ self.comment = None
+ self.enums.append((name, value, comment))
+ name = token[1]
+ comment = ""
+ token = self.token()
+ if token[0] == "op" and token[1][0] == "=":
+ value = ""
+ if len(token[1]) > 1:
+ value = token[1][1:]
+ token = self.token()
+ while token[0] != "sep" or (token[1] != ',' and
+ token[1] != '}'):
+ value = value + token[1]
+ token = self.token()
+ else:
+ try:
+ value = "%d" % (int(value) + 1)
+ except:
+ self.warning("Failed to compute value of enum %s" % (name))
+ value=""
+ if token[0] == "sep" and token[1] == ",":
+ token = self.token()
+ else:
+ token = self.token()
+ return token
+
+ #
+ # Parse a C definition block, used for structs it parse till
+ # the balancing }
+ #
+ def parseTypeBlock(self, token):
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ token = self.token()
+ return token
+ else:
+ token = self.token()
+ return token
+
+ #
+ # Parse a type: the fact that the type name can either occur after
+ # the definition or within the definition makes it a little harder
+ # if inside, the name token is pushed back before returning
+ #
+ def parseType(self, token):
+ self.type = ""
+ self.struct_fields = []
+ self.signature = None
+ if token == None:
+ return token
+
+ while token[0] == "name" and (
+ token[1] == "const" or \
+ token[1] == "unsigned" or \
+ token[1] == "signed"):
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ token = self.token()
+
+ if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ if token[0] == "name" and token[1] == "int":
+ if self.type == "":
+ self.type = tmp[1]
+ else:
+ self.type = self.type + " " + tmp[1]
+
+ elif token[0] == "name" and token[1] == "struct":
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ token = self.token()
+ nametok = None
+ if token[0] == "name":
+ nametok = token
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseStruct(token)
+ elif token != None and token[0] == "op" and token[1] == "*":
+ self.type = self.type + " " + nametok[1] + " *"
+ token = self.token()
+ while token != None and token[0] == "op" and token[1] == "*":
+ self.type = self.type + " *"
+ token = self.token()
+ if token[0] == "name":
+ nametok = token
+ token = self.token()
+ else:
+ self.error("struct : expecting name", token)
+ return token
+ elif token != None and token[0] == "name" and nametok != None:
+ self.type = self.type + " " + nametok[1]
+ return token
+
+ if nametok != None:
+ self.lexer.push(token)
+ token = nametok
+ return token
+
+ elif token[0] == "name" and token[1] == "enum":
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ self.enums = []
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseEnumBlock(token)
+ else:
+ self.error("parsing enum: expecting '{'", token)
+ enum_type = None
+ if token != None and token[0] != "name":
+ self.lexer.push(token)
+ token = ("name", "enum")
+ else:
+ enum_type = token[1]
+ for enum in self.enums:
+ self.index_add(enum[0], self.filename,
+ not self.is_header, "enum",
+ (enum[1], enum[2], enum_type))
+ return token
+
+ elif token[0] == "name":
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ else:
+ self.error("parsing type %s: expecting a name" % (self.type),
+ token)
+ return token
+ token = self.token()
+ while token != None and (token[0] == "op" or
+ token[0] == "name" and token[1] == "const"):
+ self.type = self.type + " " + token[1]
+ token = self.token()
+
+ #
+ # if there is a parenthesis here, this means a function type
+ #
+ if token != None and token[0] == "sep" and token[1] == '(':
+ self.type = self.type + token[1]
+ token = self.token()
+ while token != None and token[0] == "op" and token[1] == '*':
+ self.type = self.type + token[1]
+ token = self.token()
+ if token == None or token[0] != "name" :
+ self.error("parsing function type, name expected", token);
+ return token
+ self.type = self.type + token[1]
+ nametok = token
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == ')':
+ self.type = self.type + token[1]
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == '(':
+ token = self.token()
+ type = self.type;
+ token = self.parseSignature(token);
+ self.type = type;
+ else:
+ self.error("parsing function type, '(' expected", token);
+ return token
+ else:
+ self.error("parsing function type, ')' expected", token);
+ return token
+ self.lexer.push(token)
+ token = nametok
+ return token
+
+ #
+ # do some lookahead for arrays
+ #
+ if token != None and token[0] == "name":
+ nametok = token
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == '[':
+ self.type = self.type + nametok[1]
+ while token != None and token[0] == "sep" and token[1] == '[':
+ self.type = self.type + token[1]
+ token = self.token()
+ while token != None and token[0] != 'sep' and \
+ token[1] != ']' and token[1] != ';':
+ self.type = self.type + token[1]
+ token = self.token()
+ if token != None and token[0] == 'sep' and token[1] == ']':
+ self.type = self.type + token[1]
+ token = self.token()
+ else:
+ self.error("parsing array type, ']' expected", token);
+ return token
+ elif token != None and token[0] == "sep" and token[1] == ':':
+ # remove :12 in case it's a limited int size
+ token = self.token()
+ token = self.token()
+ self.lexer.push(token)
+ token = nametok
+
+ return token
+
+ #
+ # Parse a signature: '(' has been parsed and we scan the type definition
+ # up to the ')' included
+ def parseSignature(self, token):
+ signature = []
+ if token != None and token[0] == "sep" and token[1] == ')':
+ self.signature = []
+ token = self.token()
+ return token
+ while token != None:
+ token = self.parseType(token)
+ if token != None and token[0] == "name":
+ signature.append((self.type, token[1], None))
+ token = self.token()
+ elif token != None and token[0] == "sep" and token[1] == ',':
+ token = self.token()
+ continue
+ elif token != None and token[0] == "sep" and token[1] == ')':
+ # only the type was provided
+ if self.type == "...":
+ signature.append((self.type, "...", None))
+ else:
+ signature.append((self.type, None, None))
+ if token != None and token[0] == "sep":
+ if token[1] == ',':
+ token = self.token()
+ continue
+ elif token[1] == ')':
+ token = self.token()
+ break
+ self.signature = signature
+ return token
+
+ #
+ # Parse a global definition, be it a type, variable or function
+ # the extern "C" blocks are a bit nasty and require it to recurse.
+ #
+ def parseGlobal(self, token):
+ static = 0
+ if token[1] == 'extern':
+ token = self.token()
+ if token == None:
+ return token
+ if token[0] == 'string':
+ if token[1] == 'C':
+ token = self.token()
+ if token == None:
+ return token
+ if token[0] == 'sep' and token[1] == "{":
+ token = self.token()
+# print 'Entering extern "C line ', self.lineno()
+ while token != None and (token[0] != 'sep' or
+ token[1] != "}"):
+ if token[0] == 'name':
+ token = self.parseGlobal(token)
+ else:
+ self.error(
+ "token %s %s unexpected at the top level" % (
+ token[0], token[1]))
+ token = self.parseGlobal(token)
+# print 'Exiting extern "C" line', self.lineno()
+ token = self.token()
+ return token
+ else:
+ return token
+ elif token[1] == 'static':
+ static = 1
+ token = self.token()
+ if token == None or token[0] != 'name':
+ return token
+
+ if token[1] == 'typedef':
+ token = self.token()
+ return self.parseTypedef(token)
+ else:
+ token = self.parseType(token)
+ type_orig = self.type
+ if token == None or token[0] != "name":
+ return token
+ type = type_orig
+ self.name = token[1]
+ token = self.token()
+ while token != None and (token[0] == "sep" or token[0] == "op"):
+ if token[0] == "sep":
+ if token[1] == "[":
+ type = type + token[1]
+ token = self.token()
+ while token != None and (token[0] != "sep" or \
+ token[1] != ";"):
+ type = type + token[1]
+ token = self.token()
+
+ if token != None and token[0] == "op" and token[1] == "=":
+ #
+ # Skip the initialization of the variable
+ #
+ token = self.token()
+ if token[0] == 'sep' and token[1] == '{':
+ token = self.token()
+ token = self.parseBlock(token)
+ else:
+ self.comment = None
+ while token != None and (token[0] != "sep" or \
+ (token[1] != ';' and token[1] != ',')):
+ token = self.token()
+ self.comment = None
+ if token == None or token[0] != "sep" or (token[1] != ';' and
+ token[1] != ','):
+ self.error("missing ';' or ',' after value")
+
+ if token != None and token[0] == "sep":
+ if token[1] == ";":
+ self.comment = None
+ token = self.token()
+ if type == "struct":
+ self.index_add(self.name, self.filename,
+ not self.is_header, "struct", self.struct_fields)
+ else:
+ self.index_add(self.name, self.filename,
+ not self.is_header, "variable", type)
+ break
+ elif token[1] == "(":
+ token = self.token()
+ token = self.parseSignature(token)
+ if token == None:
+ return None
+ if token[0] == "sep" and token[1] == ";":
+ d = self.mergeFunctionComment(self.name,
+ ((type, None), self.signature), 1)
+ self.index_add(self.name, self.filename, static,
+ "function", d)
+ token = self.token()
+ elif token[0] == "sep" and token[1] == "{":
+ d = self.mergeFunctionComment(self.name,
+ ((type, None), self.signature), static)
+ self.index_add(self.name, self.filename, static,
+ "function", d)
+ token = self.token()
+ token = self.parseBlock(token);
+ elif token[1] == ',':
+ self.comment = None
+ self.index_add(self.name, self.filename, static,
+ "variable", type)
+ type = type_orig
+ token = self.token()
+ while token != None and token[0] == "sep":
+ type = type + token[1]
+ token = self.token()
+ if token != None and token[0] == "name":
+ self.name = token[1]
+ token = self.token()
+ else:
+ break
+
+ return token
+
+ def parse(self):
+ self.warning("Parsing %s" % (self.filename))
+ token = self.token()
+ while token != None:
+ if token[0] == 'name':
+ token = self.parseGlobal(token)
+ else:
+ self.error("token %s %s unexpected at the top level" % (
+ token[0], token[1]))
+ token = self.parseGlobal(token)
+ return
+ self.parseTopComment(self.top_comment)
+ return self.index
+
+
+class docBuilder:
+ """A documentation builder"""
+ def __init__(self, name, directories=['.'], excludes=[]):
+ self.name = name
+ self.directories = directories
+ self.excludes = excludes + list(ignored_files.keys())
+ self.modules = {}
+ self.headers = {}
+ self.idx = index()
+ self.xref = {}
+ self.index = {}
+ if name == 'libxml2':
+ self.basename = 'libxml'
+ else:
+ self.basename = name
+
+ def indexString(self, id, str):
+ if str == None:
+ return
+ str = str.replace("'", ' ')
+ str = str.replace('"', ' ')
+ str = str.replace("/", ' ')
+ str = str.replace('*', ' ')
+ str = str.replace("[", ' ')
+ str = str.replace("]", ' ')
+ str = str.replace("(", ' ')
+ str = str.replace(")", ' ')
+ str = str.replace("<", ' ')
+ str = str.replace('>', ' ')
+ str = str.replace("&", ' ')
+ str = str.replace('#', ' ')
+ str = str.replace(",", ' ')
+ str = str.replace('.', ' ')
+ str = str.replace(';', ' ')
+ tokens = str.split()
+ for token in tokens:
+ try:
+ c = token[0]
+ if string.ascii_letters.find(c) < 0:
+ pass
+ elif len(token) < 3:
+ pass
+ else:
+ lower = token.lower()
+ # TODO: generalize this a bit
+ if lower == 'and' or lower == 'the':
+ pass
+ elif token in self.xref:
+ self.xref[token].append(id)
+ else:
+ self.xref[token] = [id]
+ except:
+ pass
+
+ def analyze(self):
+ print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
+ self.idx.analyze()
+
+ def scanHeaders(self):
+ for header in list(self.headers.keys()):
+ parser = CParser(header)
+ idx = parser.parse()
+ self.headers[header] = idx;
+ self.idx.merge(idx)
+
+ def scanModules(self):
+ for module in list(self.modules.keys()):
+ parser = CParser(module)
+ idx = parser.parse()
+ # idx.analyze()
+ self.modules[module] = idx
+ self.idx.merge_public(idx)
+
+ def scan(self):
+ for directory in self.directories:
+ files = glob.glob(directory + "/*.c")
+ for file in files:
+ skip = 0
+ for excl in self.excludes:
+ if file.find(excl) != -1:
+ print("Skipping %s" % file)
+ skip = 1
+ break
+ if skip == 0:
+ self.modules[file] = None;
+ files = glob.glob(directory + "/*.h")
+ for file in files:
+ skip = 0
+ for excl in self.excludes:
+ if file.find(excl) != -1:
+ print("Skipping %s" % file)
+ skip = 1
+ break
+ if skip == 0:
+ self.headers[file] = None;
+ self.scanHeaders()
+ self.scanModules()
+
+ def modulename_file(self, file):
+ module = os.path.basename(file)
+ if module[-2:] == '.h':
+ module = module[:-2]
+ elif module[-2:] == '.c':
+ module = module[:-2]
+ return module
+
+ def serialize_enum(self, output, name):
+ id = self.idx.enums[name]
+ output.write(" <enum name='%s' file='%s'" % (name,
+ self.modulename_file(id.header)))
+ if id.info != None:
+ info = id.info
+ if info[0] != None and info[0] != '':
+ try:
+ val = eval(info[0])
+ except:
+ val = info[0]
+ output.write(" value='%s'" % (val));
+ if info[2] != None and info[2] != '':
+ output.write(" type='%s'" % info[2]);
+ if info[1] != None and info[1] != '':
+ output.write(" info='%s'" % escape(info[1]));
+ output.write("/>\n")
+
+ def serialize_macro(self, output, name):
+ id = self.idx.macros[name]
+ output.write(" <macro name='%s' file='%s'>\n" % (name,
+ self.modulename_file(id.header)))
+ if id.info != None:
+ try:
+ (args, desc) = id.info
+ if desc != None and desc != "":
+ output.write(" <info>%s</info>\n" % (escape(desc)))
+ self.indexString(name, desc)
+ for arg in args:
+ (name, desc) = arg
+ if desc != None and desc != "":
+ output.write(" <arg name='%s' info='%s'/>\n" % (
+ name, escape(desc)))
+ self.indexString(name, desc)
+ else:
+ output.write(" <arg name='%s'/>\n" % (name))
+ except:
+ pass
+ output.write(" </macro>\n")
+
+ def serialize_typedef(self, output, name):
+ id = self.idx.typedefs[name]
+ if id.info[0:7] == 'struct ':
+ output.write(" <struct name='%s' file='%s' type='%s'" % (
+ name, self.modulename_file(id.header), id.info))
+ name = id.info[7:]
+ if name in self.idx.structs and ( \
+ type(self.idx.structs[name].info) == type(()) or
+ type(self.idx.structs[name].info) == type([])):
+ output.write(">\n");
+ try:
+ for field in self.idx.structs[name].info:
+ desc = field[2]
+ self.indexString(name, desc)
+ if desc == None:
+ desc = ''
+ else:
+ desc = escape(desc)
+ output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
+ except:
+ print("Failed to serialize struct %s" % (name))
+ output.write(" </struct>\n")
+ else:
+ output.write("/>\n");
+ else :
+ output.write(" <typedef name='%s' file='%s' type='%s'" % (
+ name, self.modulename_file(id.header), id.info))
+ try:
+ desc = id.extra
+ if desc != None and desc != "":
+ output.write(">\n <info>%s</info>\n" % (escape(desc)))
+ output.write(" </typedef>\n")
+ else:
+ output.write("/>\n")
+ except:
+ output.write("/>\n")
+
+ def serialize_variable(self, output, name):
+ id = self.idx.variables[name]
+ if id.info != None:
+ output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
+ name, self.modulename_file(id.header), id.info))
+ else:
+ output.write(" <variable name='%s' file='%s'/>\n" % (
+ name, self.modulename_file(id.header)))
+
+ def serialize_function(self, output, name):
+ id = self.idx.functions[name]
+ if name == debugsym:
+ print("=>", id)
+
+ output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
+ name, self.modulename_file(id.header),
+ self.modulename_file(id.module)))
+ #
+ # Processing of conditionals modified by Bill 1/1/05
+ #
+ if id.conditionals != None:
+ apstr = ""
+ for cond in id.conditionals:
+ if apstr != "":
+ apstr = apstr + " &amp;&amp; "
+ apstr = apstr + cond
+ output.write(" <cond>%s</cond>\n"% (apstr));
+ try:
+ (ret, params, desc) = id.info
+ if (desc == None or desc == '') and \
+ name[0:9] != "xmlThrDef" and name != "xmlDllMain":
+ print("%s %s from %s has no description" % (id.type, name,
+ self.modulename_file(id.module)))
+
+ output.write(" <info>%s</info>\n" % (escape(desc)))
+ self.indexString(name, desc)
+ if ret[0] != None:
+ if ret[0] == "void":
+ output.write(" <return type='void'/>\n")
+ else:
+ output.write(" <return type='%s' info='%s'/>\n" % (
+ ret[0], escape(ret[1])))
+ self.indexString(name, ret[1])
+ for param in params:
+ if param[0] == 'void':
+ continue
+ if param[2] == None:
+ output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
+ else:
+ output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
+ self.indexString(name, param[2])
+ except:
+ print("Failed to save function %s info: " % name, repr(id.info))
+ output.write(" </%s>\n" % (id.type))
+
+ def serialize_exports(self, output, file):
+ module = self.modulename_file(file)
+ output.write(" <file name='%s'>\n" % (module))
+ dict = self.headers[file]
+ if dict.info != None:
+ for data in ('Summary', 'Description', 'Author'):
+ try:
+ output.write(" <%s>%s</%s>\n" % (
+ data.lower(),
+ escape(dict.info[data]),
+ data.lower()))
+ except:
+ print("Header %s lacks a %s description" % (module, data))
+ if 'Description' in dict.info:
+ desc = dict.info['Description']
+ if desc.find("DEPRECATED") != -1:
+ output.write(" <deprecated/>\n")
+
+ ids = list(dict.macros.keys())
+ ids.sort()
+ for id in uniq(ids):
+ # Macros are sometime used to masquerade other types.
+ if id in dict.functions:
+ continue
+ if id in dict.variables:
+ continue
+ if id in dict.typedefs:
+ continue
+ if id in dict.structs:
+ continue
+ if id in dict.enums:
+ continue
+ output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
+ ids = list(dict.enums.keys())
+ ids.sort()
+ for id in uniq(ids):
+ output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
+ ids = list(dict.typedefs.keys())
+ ids.sort()
+ for id in uniq(ids):
+ output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
+ ids = list(dict.structs.keys())
+ ids.sort()
+ for id in uniq(ids):
+ output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
+ ids = list(dict.variables.keys())
+ ids.sort()
+ for id in uniq(ids):
+ output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
+ ids = list(dict.functions.keys())
+ ids.sort()
+ for id in uniq(ids):
+ output.write(" <exports symbol='%s' type='function'/>\n" % (id))
+ output.write(" </file>\n")
+
+ def serialize_xrefs_files(self, output):
+ headers = list(self.headers.keys())
+ headers.sort()
+ for file in headers:
+ module = self.modulename_file(file)
+ output.write(" <file name='%s'>\n" % (module))
+ dict = self.headers[file]
+ ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
+ list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
+ list(dict.structs.keys()) + list(dict.enums.keys()))
+ ids.sort()
+ for id in ids:
+ output.write(" <ref name='%s'/>\n" % (id))
+ output.write(" </file>\n")
+ pass
+
+ def serialize_xrefs_functions(self, output):
+ funcs = {}
+ for name in list(self.idx.functions.keys()):
+ id = self.idx.functions[name]
+ try:
+ (ret, params, desc) = id.info
+ for param in params:
+ if param[0] == 'void':
+ continue
+ if param[0] in funcs:
+ funcs[param[0]].append(name)
+ else:
+ funcs[param[0]] = [name]
+ except:
+ pass
+ typ = list(funcs.keys())
+ typ.sort()
+ for type in typ:
+ if type == '' or type == 'void' or type == "int" or \
+ type == "char *" or type == "const char *" :
+ continue
+ output.write(" <type name='%s'>\n" % (type))
+ ids = funcs[type]
+ ids.sort()
+ pid = '' # not sure why we have dups, but get rid of them!
+ for id in ids:
+ if id != pid:
+ output.write(" <ref name='%s'/>\n" % (id))
+ pid = id
+ output.write(" </type>\n")
+
+ def serialize_xrefs_constructors(self, output):
+ funcs = {}
+ for name in list(self.idx.functions.keys()):
+ id = self.idx.functions[name]
+ try:
+ (ret, params, desc) = id.info
+ if ret[0] == "void":
+ continue
+ if ret[0] in funcs:
+ funcs[ret[0]].append(name)
+ else:
+ funcs[ret[0]] = [name]
+ except:
+ pass
+ typ = list(funcs.keys())
+ typ.sort()
+ for type in typ:
+ if type == '' or type == 'void' or type == "int" or \
+ type == "char *" or type == "const char *" :
+ continue
+ output.write(" <type name='%s'>\n" % (type))
+ ids = funcs[type]
+ ids.sort()
+ for id in ids:
+ output.write(" <ref name='%s'/>\n" % (id))
+ output.write(" </type>\n")
+
+ def serialize_xrefs_alpha(self, output):
+ letter = None
+ ids = list(self.idx.identifiers.keys())
+ ids.sort()
+ for id in ids:
+ if id[0] != letter:
+ if letter != None:
+ output.write(" </letter>\n")
+ letter = id[0]
+ output.write(" <letter name='%s'>\n" % (letter))
+ output.write(" <ref name='%s'/>\n" % (id))
+ if letter != None:
+ output.write(" </letter>\n")
+
+ def serialize_xrefs_references(self, output):
+ typ = list(self.idx.identifiers.keys())
+ typ.sort()
+ for id in typ:
+ idf = self.idx.identifiers[id]
+ module = idf.header
+ output.write(" <reference name='%s' href='%s'/>\n" % (id,
+ 'html/' + self.basename + '-' +
+ self.modulename_file(module) + '.html#' +
+ id))
+
+ def serialize_xrefs_index(self, output):
+ index = self.xref
+ typ = list(index.keys())
+ typ.sort()
+ letter = None
+ count = 0
+ chunk = 0
+ chunks = []
+ for id in typ:
+ if len(index[id]) > 30:
+ continue
+ if id[0] != letter:
+ if letter == None or count > 200:
+ if letter != None:
+ output.write(" </letter>\n")
+ output.write(" </chunk>\n")
+ count = 0
+ chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
+ output.write(" <chunk name='chunk%s'>\n" % (chunk))
+ first_letter = id[0]
+ chunk = chunk + 1
+ elif letter != None:
+ output.write(" </letter>\n")
+ letter = id[0]
+ output.write(" <letter name='%s'>\n" % (letter))
+ output.write(" <word name='%s'>\n" % (id))
+ tokens = index[id];
+ tokens.sort()
+ tok = None
+ for token in tokens:
+ if tok == token:
+ continue
+ tok = token
+ output.write(" <ref name='%s'/>\n" % (token))
+ count = count + 1
+ output.write(" </word>\n")
+ if letter != None:
+ output.write(" </letter>\n")
+ output.write(" </chunk>\n")
+ if count != 0:
+ chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
+ output.write(" <chunks>\n")
+ for ch in chunks:
+ output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
+ ch[0], ch[1], ch[2]))
+ output.write(" </chunks>\n")
+
+ def serialize_xrefs(self, output):
+ output.write(" <references>\n")
+ self.serialize_xrefs_references(output)
+ output.write(" </references>\n")
+ output.write(" <alpha>\n")
+ self.serialize_xrefs_alpha(output)
+ output.write(" </alpha>\n")
+ output.write(" <constructors>\n")
+ self.serialize_xrefs_constructors(output)
+ output.write(" </constructors>\n")
+ output.write(" <functions>\n")
+ self.serialize_xrefs_functions(output)
+ output.write(" </functions>\n")
+ output.write(" <files>\n")
+ self.serialize_xrefs_files(output)
+ output.write(" </files>\n")
+ output.write(" <index>\n")
+ self.serialize_xrefs_index(output)
+ output.write(" </index>\n")
+
+ def serialize(self):
+ filename = "%s-api.xml" % self.name
+ print("Saving XML description %s" % (filename))
+ output = open(filename, "w")
+ output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
+ output.write("<api name='%s'>\n" % self.name)
+ output.write(" <files>\n")
+ headers = list(self.headers.keys())
+ headers.sort()
+ for file in headers:
+ self.serialize_exports(output, file)
+ output.write(" </files>\n")
+ output.write(" <symbols>\n")
+ macros = list(self.idx.macros.keys())
+ macros.sort()
+ for macro in macros:
+ self.serialize_macro(output, macro)
+ enums = list(self.idx.enums.keys())
+ enums.sort()
+ for enum in enums:
+ self.serialize_enum(output, enum)
+ typedefs = list(self.idx.typedefs.keys())
+ typedefs.sort()
+ for typedef in typedefs:
+ self.serialize_typedef(output, typedef)
+ variables = list(self.idx.variables.keys())
+ variables.sort()
+ for variable in variables:
+ self.serialize_variable(output, variable)
+ functions = list(self.idx.functions.keys())
+ functions.sort()
+ for function in functions:
+ self.serialize_function(output, function)
+ output.write(" </symbols>\n")
+ output.write("</api>\n")
+ output.close()
+
+ filename = "%s-refs.xml" % self.name
+ print("Saving XML Cross References %s" % (filename))
+ output = open(filename, "w")
+ output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
+ output.write("<apirefs name='%s'>\n" % self.name)
+ self.serialize_xrefs(output)
+ output.write("</apirefs>\n")
+ output.close()
+
+
+def rebuild():
+ builder = None
+ if glob.glob("parser.c") != [] :
+ print("Rebuilding API description for libxml2")
+ builder = docBuilder("libxml2", [".", "."],
+ ["xmlwin32version.h", "tst.c"])
+ elif glob.glob("../parser.c") != [] :
+ print("Rebuilding API description for libxml2")
+ builder = docBuilder("libxml2", ["..", "../include/libxml"],
+ ["xmlwin32version.h", "tst.c"])
+ elif glob.glob("../libxslt/transform.c") != [] :
+ print("Rebuilding API description for libxslt")
+ builder = docBuilder("libxslt", ["../libxslt"],
+ ["win32config.h", "libxslt.h", "tst.c"])
+ else:
+ print("rebuild() failed, unable to guess the module")
+ return None
+ builder.scan()
+ builder.analyze()
+ builder.serialize()
+ if glob.glob("../libexslt/exslt.c") != [] :
+ extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
+ extra.scan()
+ extra.analyze()
+ extra.serialize()
+ return builder
+
+#
+# for debugging the parser
+#
+def parse(filename):
+ parser = CParser(filename)
+ idx = parser.parse()
+ return idx
+
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ debug = 1
+ parse(sys.argv[1])
+ else:
+ rebuild()