From 35a201cc8ef0c3f5b2df88d2e528aabee1048348 Mon Sep 17 00:00:00 2001
From: Wojtek Kosior <wk@koszkonutek-tmp.pl.eu.org>
Date: Fri, 30 Apr 2021 18:47:09 +0200
Subject: Initial/Final commit

---
 libxml2-2.9.10/genUnicode.py | 478 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 478 insertions(+)
 create mode 100755 libxml2-2.9.10/genUnicode.py

(limited to 'libxml2-2.9.10/genUnicode.py')

diff --git a/libxml2-2.9.10/genUnicode.py b/libxml2-2.9.10/genUnicode.py
new file mode 100755
index 0000000..4487eeb
--- /dev/null
+++ b/libxml2-2.9.10/genUnicode.py
@@ -0,0 +1,478 @@
+#!/usr/bin/python -u
+#
+# Original script modified in November 2003 to take advantage of
+# the character-validation range routines, and updated to the
+# current Unicode information (Version 4.0.1)
+#
+# NOTE: there is an 'alias' facility for blocks which are not present in
+#	the current release, but are needed for ABI compatibility.  This
+#	must be accomplished MANUALLY!  Please see the comments below under
+#     'blockAliases'
+#
+import sys
+import string
+import time
+
+webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
+sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt"
+
+#
+# blockAliases is a small hack - it is used for mapping block names which
+# were were used in the 3.1 release, but are missing or changed in the current
+# release.  The format is "OldBlockName:NewBlockName1[,NewBlockName2[,...]]"
+blockAliases = []
+blockAliases.append("CombiningMarksforSymbols:CombiningDiacriticalMarksforSymbols")
+blockAliases.append("Greek:GreekandCoptic")
+blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," + 
+	"SupplementaryPrivateUseArea-B")
+
+# minTableSize gives the minimum number of ranges which must be present
+# before a range table is produced.  If there are less than this
+# number, inline comparisons are generated
+minTableSize = 8
+
+(blockfile, catfile) = string.split(sources)
+
+
+#
+# Now process the "blocks" file, reducing it to a dictionary
+# indexed by blockname, containing a tuple with the applicable
+# block range
+#
+BlockNames = {}
+try:
+    blocks = open(blockfile, "r")
+except:
+    print "Missing %s, aborting ..." % blockfile
+    sys.exit(1)
+
+for line in blocks.readlines():
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+        continue
+    try:
+        fields = string.split(line, ';')
+        range = string.strip(fields[0])
+        (start, end) = string.split(range, "..")
+        name = string.strip(fields[1])
+        name = string.replace(name, ' ', '')
+    except:
+        print "Failed to process line: %s" % (line)
+        continue
+    start = "0x" + start
+    end = "0x" + end
+    try:
+        BlockNames[name].append((start, end))
+    except:
+        BlockNames[name] = [(start, end)]
+blocks.close()
+print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
+
+for block in blockAliases:
+    alias = string.split(block,':')
+    alist = string.split(alias[1],',')
+    for comp in alist:
+        if BlockNames.has_key(comp):
+            if alias[0] not in BlockNames:
+                BlockNames[alias[0]] = []
+            for r in BlockNames[comp]:
+                BlockNames[alias[0]].append(r)
+        else:
+            print "Alias %s: %s not in Blocks" % (alias[0], comp)
+            continue
+
+#
+# Next process the Categories file. This is more complex, since
+# the file is in code sequence, and we need to invert it.  We use
+# a dictionary with index category-name, with each entry containing
+# all the ranges (codepoints) of that category.  Note that category
+# names comprise two parts - the general category, and the "subclass"
+# within that category.  Therefore, both "general category" (which is
+# the first character of the 2-character category-name) and the full
+# (2-character) name are entered into this dictionary.
+#
+try:
+    data = open(catfile, "r")
+except:
+    print "Missing %s, aborting ..." % catfile
+    sys.exit(1)
+
+nbchar = 0;
+Categories = {}
+for line in data.readlines():
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+        continue
+    try:
+        fields = string.split(line, ';')
+        point = string.strip(fields[0])
+        value = 0
+        while point != '':
+            value = value * 16
+            if point[0] >= '0' and point[0] <= '9':
+                value = value + ord(point[0]) - ord('0')
+            elif point[0] >= 'A' and point[0] <= 'F':
+                value = value + 10 + ord(point[0]) - ord('A')
+            elif point[0] >= 'a' and point[0] <= 'f':
+                value = value + 10 + ord(point[0]) - ord('a')
+            point = point[1:]
+        name = fields[2]
+    except:
+        print "Failed to process line: %s" % (line)
+        continue
+    
+    nbchar = nbchar + 1
+    # update entry for "full name"
+    try:
+        Categories[name].append(value)
+    except:
+        try:
+            Categories[name] = [value]
+        except:
+            print "Failed to process line: %s" % (line)
+    # update "general category" name
+    try:
+        Categories[name[0]].append(value)
+    except:
+        try:
+            Categories[name[0]] = [value]
+        except:
+            print "Failed to process line: %s" % (line)
+
+blocks.close()
+print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
+
+#
+# The data is now all read.  Time to process it into a more useful form.
+#
+# reduce the number list into ranges
+for cat in Categories.keys():
+    list = Categories[cat]
+    start = -1
+    prev = -1
+    end = -1
+    ranges = []
+    for val in list:
+        if start == -1:
+            start = val
+            prev = val
+            continue
+        elif val == prev + 1:
+            prev = val
+            continue
+        elif prev == start:
+            ranges.append((prev, prev))
+            start = val
+            prev = val
+            continue
+        else:
+            ranges.append((start, prev))
+            start = val
+            prev = val
+            continue
+    if prev == start:
+        ranges.append((prev, prev))
+    else:
+        ranges.append((start, prev))
+    Categories[cat] = ranges
+
+#
+# Assure all data is in alphabetic order, since we will be doing binary
+# searches on the tables.
+#
+bkeys = BlockNames.keys()
+bkeys.sort()
+
+ckeys = Categories.keys()
+ckeys.sort()
+
+#
+# Generate the resulting files
+#
+try:
+    header = open("include/libxml/xmlunicode.h", "w")
+except:
+    print "Failed to open include/libxml/xmlunicode.h"
+    sys.exit(1)
+
+try:
+    output = open("xmlunicode.c", "w")
+except:
+    print "Failed to open xmlunicode.c"
+    sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * Summary: Unicode character APIs
+ * Description: API for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * %s
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Author: Daniel Veillard
+ */
+
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+
+#include <libxml/xmlversion.h>
+
+#ifdef LIBXML_UNICODE_ENABLED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+""" % (webpage, date, sources));
+
+output.write(
+"""/*
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * %s
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_UNICODE_ENABLED
+
+#include <string.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlunicode.h>
+#include <libxml/chvalid.h>
+
+typedef int (xmlIntFunc)(int);	/* just to keep one's mind untwisted */
+
+typedef struct {
+    const char *rangename;
+    xmlIntFunc *func;
+} xmlUnicodeRange;
+
+typedef struct {
+    const xmlUnicodeRange *table;
+    int		    numentries;
+} xmlUnicodeNameTable;
+
+
+static xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname);
+
+static const xmlUnicodeRange xmlUnicodeBlocks[] = {
+""" % (webpage, date, sources));
+
+flag = 0
+for block in bkeys:
+    name = string.replace(block, '-', '')
+    if flag:
+        output.write(',\n')
+    else:
+        flag = 1
+    output.write('  {"%s", xmlUCSIs%s}' % (block, name))
+output.write('};\n\n')
+
+output.write('static xmlUnicodeRange xmlUnicodeCats[] = {\n')
+flag = 0;
+for name in ckeys:
+    if flag:
+        output.write(',\n')
+    else:
+        flag = 1
+    output.write('  {"%s", xmlUCSIsCat%s}' % (name, name))
+output.write('};\n\n')
+
+#
+# For any categories with more than minTableSize ranges we generate
+# a range table suitable for xmlCharInRange
+#
+for name in ckeys:
+  if len(Categories[name]) > minTableSize:
+    numshort = 0
+    numlong = 0
+    ranges = Categories[name]
+    sptr = "NULL"
+    lptr = "NULL"
+    for range in ranges:
+      (low, high) = range
+      if high < 0x10000:
+        if numshort == 0:
+          pline = "static const xmlChSRange xml%sS[] = {" % name
+          sptr = "xml%sS" % name
+        else:
+          pline += ", "
+        numshort += 1
+      else:
+        if numlong == 0:
+          if numshort > 0:
+            output.write(pline + " };\n")
+          pline = "static const xmlChLRange xml%sL[] = {" % name
+          lptr = "xml%sL" % name
+        else:
+          pline += ", "
+        numlong += 1
+      if len(pline) > 60:
+        output.write(pline + "\n")
+        pline = "    "
+      pline += "{%s, %s}" % (hex(low), hex(high))
+    output.write(pline + " };\nstatic xmlChRangeGroup xml%sG = {%s,%s,%s,%s};\n\n"
+         % (name, numshort, numlong, sptr, lptr))
+
+
+output.write(
+"""static xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
+static xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
+
+/**
+ * xmlUnicodeLookup:
+ * @tptr: pointer to the name table
+ * @name: name to be found
+ *
+ * binary table lookup for user-supplied name
+ *
+ * Returns pointer to range function if found, otherwise NULL
+ */
+static xmlIntFunc
+*xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) {
+    int low, high, mid, cmp;
+    xmlUnicodeRange *sptr;
+
+    if ((tptr == NULL) || (tname == NULL)) return(NULL);
+
+    low = 0;
+    high = tptr->numentries - 1;
+    sptr = tptr->table;
+    while (low <= high) {
+	mid = (low + high) / 2;
+	if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0)
+	    return (sptr[mid].func);
+	if (cmp < 0)
+	    high = mid - 1;
+	else
+	    low = mid + 1;
+    }
+    return (NULL);    
+}
+
+""" % (len(BlockNames), len(Categories)) )
+
+for block in bkeys:
+    name = string.replace(block, '-', '')
+    header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
+    output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
+    output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
+                 (block))
+    output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+    output.write("int\nxmlUCSIs%s(int code) {\n    return(" % name)
+    flag = 0
+    for (start, end) in BlockNames[block]:
+        if flag:
+            output.write(" ||\n           ")
+        else:
+            flag = 1
+        output.write("((code >= %s) && (code <= %s))" % (start, end))
+    output.write(");\n}\n\n")
+
+header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code, const char *block);\n\n")
+output.write(
+"""/**
+ * xmlUCSIsBlock:
+ * @code: UCS code point
+ * @block: UCS block name
+ *
+ * Check whether the character is part of the UCS Block
+ *
+ * Returns 1 if true, 0 if false and -1 on unknown block
+ */
+int
+xmlUCSIsBlock(int code, const char *block) {
+    xmlIntFunc *func;
+
+    func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
+    if (func == NULL)
+	return (-1);
+    return (func(code));
+}
+
+""")
+
+for name in ckeys:
+    ranges = Categories[name]
+    header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name)
+    output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
+    output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
+                 (name))
+    output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+    output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
+    if len(Categories[name]) > minTableSize:
+        output.write("    return(xmlCharInRange((unsigned int)code, &xml%sG)"
+            % name)
+    else:
+        start = 1
+        for range in ranges:
+            (begin, end) = range;
+            if start:
+                output.write("    return(");
+                start = 0
+            else:
+                output.write(" ||\n           ");
+            if (begin == end):
+                output.write("(code == %s)" % (hex(begin)))
+            else:
+                output.write("((code >= %s) && (code <= %s))" % (
+                         hex(begin), hex(end)))
+    output.write(");\n}\n\n")
+
+header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code, const char *cat);\n")
+output.write(
+"""/**
+ * xmlUCSIsCat:
+ * @code: UCS code point
+ * @cat: UCS Category name
+ *
+ * Check whether the character is part of the UCS Category
+ *
+ * Returns 1 if true, 0 if false and -1 on unknown category
+ */
+int
+xmlUCSIsCat(int code, const char *cat) {
+    xmlIntFunc *func;
+
+    func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
+    if (func == NULL)
+	return (-1);
+    return (func(code));
+}
+
+#define bottom_xmlunicode
+#include "elfgcchack.h"
+#endif /* LIBXML_UNICODE_ENABLED */
+""")
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBXML_UNICODE_ENABLED */
+
+#endif /* __XML_UNICODE_H__ */
+""");
+
+header.close()
+output.close()
-- 
cgit v1.2.3