From 35a201cc8ef0c3f5b2df88d2e528aabee1048348 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 30 Apr 2021 18:47:09 +0200 Subject: Initial/Final commit --- libxml2-2.9.10/os400/iconv/bldcsndfa/bldcsndfa.c | 1953 +++++++++++++ .../os400/iconv/bldcsndfa/ccsid_mibenum.dtd | 15 + .../os400/iconv/bldcsndfa/ccsid_mibenum.xml | 270 ++ .../os400/iconv/bldcsndfa/character-sets.xhtml | 3077 ++++++++++++++++++++ 4 files changed, 5315 insertions(+) create mode 100644 libxml2-2.9.10/os400/iconv/bldcsndfa/bldcsndfa.c create mode 100644 libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.dtd create mode 100644 libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.xml create mode 100644 libxml2-2.9.10/os400/iconv/bldcsndfa/character-sets.xhtml (limited to 'libxml2-2.9.10/os400/iconv/bldcsndfa') diff --git a/libxml2-2.9.10/os400/iconv/bldcsndfa/bldcsndfa.c b/libxml2-2.9.10/os400/iconv/bldcsndfa/bldcsndfa.c new file mode 100644 index 0000000..48afd54 --- /dev/null +++ b/libxml2-2.9.10/os400/iconv/bldcsndfa/bldcsndfa.c @@ -0,0 +1,1953 @@ +/** +*** Build a deterministic finite automaton to associate CCSIDs with +*** character set names. +*** +*** Compile on OS/400 with options SYSIFCOPT(*IFSIO). +*** +*** See Copyright for the status of this software. +*** +*** Author: Patrick Monnerat , DATASPHERE S.A. +**/ + +#include +#include +#include +#include +#include +#include + +#include + + +#ifdef OLDXML +#include "xml.h" +#else +#include +#include +#include +#include +#endif + + +#ifdef __OS400__ +#define iconv_open_error(cd) ((cd).return_value == -1) +#define set_iconv_open_error(cd) ((cd).return_value = -1) +#else +#define iconv_open_error(cd) ((cd) == (iconv_t) -1) +#define set_iconv_open_error(cd) ((cd) = (iconv_t) -1) +#endif + + +#define C_SOURCE_CCSID 500 +#define C_UTF8_CCSID 1208 + + +#define UTF8_SPACE 0x20 +#define UTF8_HT 0x09 +#define UTF8_0 0x30 +#define UTF8_9 0x39 +#define UTF8_A 0x41 +#define UTF8_Z 0x5A +#define UTF8_a 0x61 +#define UTF8_z 0x7A + + +#define GRANULE 128 /* Memory allocation granule. */ + +#define EPSILON 0x100 /* Token for empty transition. */ + + +#ifndef OFFSETOF +#define OFFSETOF(t, f) ((unsigned int) ((char *) &((t *) 0)->f - (char *) 0)) +#endif + +#ifndef OFFSETBY +#define OFFSETBY(t, p, o) ((t *) ((char *) (p) + (unsigned int) (o))) +#endif + + +typedef struct t_transition t_transition; /* NFA/DFA transition. */ +typedef struct t_state t_state; /* NFA/DFA state node. */ +typedef struct t_symlist t_symlist; /* Symbol (i.e.: name) list. */ +typedef struct t_chset t_chset; /* Character set. */ +typedef struct t_stategroup t_stategroup; /* Optimization group. */ +typedef unsigned char utf8char; /* UTF-8 character byte. */ +typedef unsigned char byte; /* Untyped data byte. */ + + +typedef struct { /* Set of pointers. */ + unsigned int p_size; /* Current allocated size. */ + unsigned int p_card; /* Current element count. */ + void * p_set[1]; /* Element array. */ +} t_powerset; + + +struct t_transition { + t_transition * t_forwprev; /* Head of forward transition list. */ + t_transition * t_forwnext; /* Tail of forward transition list. */ + t_transition * t_backprev; /* Head of backward transition list. */ + t_transition * t_backnext; /* Tail of backward transition list. */ + t_state * t_from; /* Incoming state. */ + t_state * t_to; /* Destination state. */ + unsigned short t_token; /* Transition token. */ + unsigned int t_index; /* Transition array index. */ +}; + + +struct t_state { + t_state * s_next; /* Next state (for DFA construction). */ + t_state * s_stack; /* Unprocessed DFA states stack. */ + t_transition * s_forward; /* Forward transitions. */ + t_transition * s_backward; /* Backward transitions. */ + t_chset * s_final; /* Recognized character set. */ + t_powerset * s_nfastates; /* Corresponding NFA states. */ + unsigned int s_index; /* State index. */ +}; + + +struct t_symlist { + t_symlist * l_next; /* Next name in list. */ + utf8char l_symbol[1]; /* Name bytes. */ +}; + + +struct t_chset { + t_chset * c_next; /* Next character set. */ + t_symlist * c_names; /* Character set name list. */ + iconv_t c_fromUTF8; /* Conversion from UTF-8. */ + unsigned int c_ccsid; /* IBM character set code. */ + unsigned int c_mibenum; /* IANA character code. */ +}; + + +struct t_stategroup { + t_stategroup * g_next; /* Next group. */ + t_state * g_member; /* Group member (s_stack) list. */ + unsigned int g_id; /* Group ident. */ +}; + + + +t_chset * chset_list; /* Character set list. */ +t_state * initial_state; /* Initial NFA state. */ +iconv_t job2utf8; /* Job CCSID to UTF-8 conversion. */ +iconv_t utf82job; /* UTF-8 to job CCSID conversion. */ +t_state * dfa_states; /* List of DFA states. */ +unsigned int groupid; /* Group ident counter. */ + + +/** +*** UTF-8 strings. +**/ + +#pragma convert(819) + +static const utf8char utf8_MIBenum[] = "MIBenum"; +static const utf8char utf8_mibenum[] = "mibenum"; +static const utf8char utf8_ibm_[] = "ibm-"; +static const utf8char utf8_IBMCCSID[] = "IBMCCSID"; +static const utf8char utf8_iana_[] = "iana-"; +static const utf8char utf8_Name[] = "Name"; +static const utf8char utf8_Pref_MIME_Name[] = "Preferred MIME Name"; +static const utf8char utf8_Aliases[] = "Aliases"; +static const utf8char utf8_html[] = "html"; +static const utf8char utf8_htmluri[] = "http://www.w3.org/1999/xhtml"; +static const utf8char utf8_A[] = "A"; +static const utf8char utf8_C[] = "C"; +static const utf8char utf8_M[] = "M"; +static const utf8char utf8_N[] = "N"; +static const utf8char utf8_P[] = "P"; +static const utf8char utf8_T[] = "T"; +static const utf8char utf8_ccsid[] = "ccsid"; +static const utf8char utf8_EBCDIC[] = "EBCDIC"; +static const utf8char utf8_ASCII[] = "ASCII"; +static const utf8char utf8_assocnodes[] = "/ccsid_mibenum/assoc[@ccsid]"; +static const utf8char utf8_aliastext[] = + "/ccsid_mibenum/assoc[@ccsid=$C]/alias/text()"; +#ifdef OLDXML +static const utf8char utf8_tablerows[] = + "//table[@id='table-character-sets-1']/*/tr"; +static const utf8char utf8_headerpos[] = + "count(th[text()=$T]/preceding-sibling::th)+1"; +static const utf8char utf8_getmibenum[] = "number(td[$M])"; +static const utf8char utf8_getprefname[] = "string(td[$P])"; +static const utf8char utf8_getname[] = "string(td[$N])"; +static const utf8char utf8_getaliases[] = "td[$A]/text()"; +#else +static const utf8char utf8_tablerows[] = + "//html:table[@id='table-character-sets-1']/*/html:tr"; +static const utf8char utf8_headerpos[] = + "count(html:th[text()=$T]/preceding-sibling::html:th)+1"; +static const utf8char utf8_getmibenum[] = "number(html:td[$M])"; +static const utf8char utf8_getprefname[] = "string(html:td[$P])"; +static const utf8char utf8_getname[] = "string(html:td[$N])"; +static const utf8char utf8_getaliases[] = "html:td[$A]/text()"; +#endif + +#pragma convert(0) + + +/** +*** UTF-8 character length table. +*** +*** Index is first character byte, value is the character byte count. +**/ + +static signed char utf8_chlen[] = { +/* 00-07 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 08-0F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 10-17 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 18-1F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 20-27 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 28-2F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 30-37 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 38-3F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 40-47 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 48-4F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 50-57 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 58-5F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 60-67 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 68-6F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 70-77 */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 78-7F */ 1, 1, 1, 1, 1, 1, 1, 1, +/* 80-87 */ -1, -1, -1, -1, -1, -1, -1, -1, +/* 88-8F */ -1, -1, -1, -1, -1, -1, -1, -1, +/* 90-97 */ -1, -1, -1, -1, -1, -1, -1, -1, +/* 98-9F */ -1, -1, -1, -1, -1, -1, -1, -1, +/* A0-A7 */ -1, -1, -1, -1, -1, -1, -1, -1, +/* A8-AF */ -1, -1, -1, -1, -1, -1, -1, -1, +/* B0-B7 */ -1, -1, -1, -1, -1, -1, -1, -1, +/* B8-BF */ -1, -1, -1, -1, -1, -1, -1, -1, +/* C0-C7 */ 2, 2, 2, 2, 2, 2, 2, 2, +/* C8-CF */ 2, 2, 2, 2, 2, 2, 2, 2, +/* D0-D7 */ 2, 2, 2, 2, 2, 2, 2, 2, +/* D8-DF */ 2, 2, 2, 2, 2, 2, 2, 2, +/* E0-E7 */ 3, 3, 3, 3, 3, 3, 3, 3, +/* E8-EF */ 3, 3, 3, 3, 3, 3, 3, 3, +/* F0-F7 */ 4, 4, 4, 4, 4, 4, 4, 4, +/* F8-FF */ 5, 5, 5, 5, 6, 6, -1, -1 +}; + + + +void +chknull(void * p) + +{ + if (p) + return; + + fprintf(stderr, "Not enough memory\n"); + exit(1); +} + + +void +makecode(char * buf, unsigned int ccsid) + +{ + ccsid &= 0xFFFF; + memset(buf, 0, 32); + sprintf(buf, "IBMCCSID%05u0000000", ccsid); +} + + +iconv_t +iconv_open_ccsid(unsigned int ccsidout, + unsigned int ccsidin, unsigned int nullflag) + +{ + char fromcode[33]; + char tocode[33]; + + makecode(fromcode, ccsidin); + makecode(tocode, ccsidout); + memset(tocode + 13, 0, sizeof tocode - 13); + + if (nullflag) + fromcode[18] = '1'; + + return iconv_open(tocode, fromcode); +} + + +unsigned int +getnum(char * * cpp) + +{ + unsigned int n; + char * cp; + + cp = *cpp; + n = 0; + + while (isdigit(*cp)) + n = 10 * n + *cp++ - '0'; + + *cpp = cp; + return n; +} + + +const utf8char * +hashBinaryKey(const byte * bytes, unsigned int len) + +{ + const byte * bp; + utf8char * key; + utf8char * cp; + unsigned int n; + unsigned int n4; + unsigned int i; + + /** + *** Encode binary data in character form to be used as hash + *** table key. + **/ + + n = (4 * len + 2) / 3; + key = (utf8char *) malloc(n + 1); + chknull(key); + bp = bytes; + cp = key; + + for (n4 = n >> 2; n4; n4--) { + i = (bp[0] << 16) | (bp[1] << 8) | bp[2]; + *cp++ = 0x21 + ((i >> 18) & 0x3F); + *cp++ = 0x21 + ((i >> 12) & 0x3F); + *cp++ = 0x21 + ((i >> 6) & 0x3F); + *cp++ = 0x21 + (i & 0x3F); + bp += 3; + } + + switch (n & 0x3) { + + case 2: + *cp++ = 0x21 + ((*bp >> 2) & 0x3F); + *cp++ = 0x21 + ((*bp << 4) & 0x3F); + break; + + case 3: + i = (bp[0] << 8) | bp[1]; + *cp++ = 0x21 + ((i >> 10) & 0x3F); + *cp++ = 0x21 + ((i >> 4) & 0x3F); + *cp++ = 0x21 + ((i << 2) & 0x3F); + break; + } + + *cp = '\0'; + return key; +} + + +void * +hash_get(xmlHashTablePtr h, const void * binkey, unsigned int len) + +{ + const utf8char * key; + void * result; + + key = hashBinaryKey((const byte *) binkey, len); + result = xmlHashLookup(h, key); + free((char *) key); + return result; +} + + +int +hash_add(xmlHashTablePtr h, const void * binkey, unsigned int len, void * data) + +{ + const utf8char * key; + int result; + + key = hashBinaryKey((const byte *) binkey, len); + result = xmlHashAddEntry(h, key, data); + free((char *) key); + return result; +} + + +xmlDocPtr +loadXMLFile(const char * filename) + +{ + struct stat sbuf; + byte * databuf; + int fd; + int i; + xmlDocPtr doc; + + if (stat(filename, &sbuf)) + return (xmlDocPtr) NULL; + + databuf = malloc(sbuf.st_size + 4); + + if (!databuf) + return (xmlDocPtr) NULL; + + fd = open(filename, O_RDONLY +#ifdef O_BINARY + | O_BINARY +#endif + ); + + if (fd < 0) { + free((char *) databuf); + return (xmlDocPtr) NULL; + } + + i = read(fd, (char *) databuf, sbuf.st_size); + close(fd); + + if (i != sbuf.st_size) { + free((char *) databuf); + return (xmlDocPtr) NULL; + } + + databuf[i] = databuf[i + 1] = databuf[i + 2] = databuf[i + 3] = 0; + doc = xmlParseMemory((xmlChar *) databuf, i); + free((char *) databuf); + return doc; +} + + +int +match(char * * cpp, char * s) + +{ + char * cp; + int c1; + int c2; + + cp = *cpp; + + for (cp = *cpp; c2 = *s++; cp++) { + c1 = *cp; + + if (c1 != c2) { + if (isupper(c1)) + c1 = tolower(c1); + + if (isupper(c2)) + c2 = tolower(c2); + } + + if (c1 != c2) + return 0; + } + + c1 = *cp; + + while (c1 == ' ' || c1 == '\t') + c1 = *++cp; + + *cpp = cp; + return 1; +} + + +t_state * +newstate(void) + +{ + t_state * s; + + s = (t_state *) malloc(sizeof *s); + chknull(s); + memset((char *) s, 0, sizeof *s); + return s; +} + + +void +unlink_transition(t_transition * t) + +{ + if (t->t_backnext) + t->t_backnext->t_backprev = t->t_backprev; + + if (t->t_backprev) + t->t_backprev->t_backnext = t->t_backnext; + else if (t->t_to) + t->t_to->s_backward = t->t_backnext; + + if (t->t_forwnext) + t->t_forwnext->t_forwprev = t->t_forwprev; + + if (t->t_forwprev) + t->t_forwprev->t_forwnext = t->t_forwnext; + else if (t->t_from) + t->t_from->s_forward = t->t_forwnext; + + t->t_backprev = (t_transition *) NULL; + t->t_backnext = (t_transition *) NULL; + t->t_forwprev = (t_transition *) NULL; + t->t_forwnext = (t_transition *) NULL; + t->t_from = (t_state *) NULL; + t->t_to = (t_state *) NULL; +} + + +void +link_transition(t_transition * t, t_state * from, t_state * to) + +{ + if (!from) + from = t->t_from; + + if (!to) + to = t->t_to; + + unlink_transition(t); + + if ((t->t_from = from)) { + if ((t->t_forwnext = from->s_forward)) + t->t_forwnext->t_forwprev = t; + + from->s_forward = t; + } + + if ((t->t_to = to)) { + if ((t->t_backnext = to->s_backward)) + t->t_backnext->t_backprev = t; + + to->s_backward = t; + } +} + + +t_transition * +newtransition(unsigned int token, t_state * from, t_state * to) + +{ + t_transition * t; + + t = (t_transition *) malloc(sizeof *t); + chknull(t); + memset((char *) t, 0, sizeof *t); + t->t_token = token; + link_transition(t, from, to); + return t; +} + + +t_transition * +uniquetransition(unsigned int token, t_state * from, t_state * to) + +{ + t_transition * t; + + for (t = from->s_forward; t; t = t->t_forwnext) + if (t->t_token == token && (t->t_to == to || !to)) + return t; + + return to? newtransition(token, from, to): (t_transition *) NULL; +} + + +int +set_position(t_powerset * s, void * e) + +{ + unsigned int l; + unsigned int h; + unsigned int m; + int i; + + l = 0; + h = s->p_card; + + while (l < h) { + m = (l + h) >> 1; + + /** + *** If both pointers belong to different allocation arenas, + *** native comparison may find them neither + *** equal, nor greater, nor smaller. + *** We thus compare using memcmp() to get an orthogonal + *** result. + **/ + + i = memcmp(&e, s->p_set + m, sizeof e); + + if (i < 0) + h = m; + else if (!i) + return m; + else + l = m + 1; + } + + return l; +} + + +t_powerset * +set_include(t_powerset * s, void * e) + +{ + unsigned int pos; + unsigned int n; + + if (!s) { + s = (t_powerset *) malloc(sizeof *s + + GRANULE * sizeof s->p_set); + chknull(s); + s->p_size = GRANULE; + s->p_set[GRANULE] = (t_state *) NULL; + s->p_set[0] = e; + s->p_card = 1; + return s; + } + + pos = set_position(s, e); + + if (pos < s->p_card && s->p_set[pos] == e) + return s; + + if (s->p_card >= s->p_size) { + s->p_size += GRANULE; + s = (t_powerset *) realloc(s, + sizeof *s + s->p_size * sizeof s->p_set); + chknull(s); + s->p_set[s->p_size] = (t_state *) NULL; + } + + n = s->p_card - pos; + + if (n) + memmove((char *) (s->p_set + pos + 1), + (char *) (s->p_set + pos), n * sizeof s->p_set[0]); + + s->p_set[pos] = e; + s->p_card++; + return s; +} + + +t_state * +nfatransition(t_state * to, byte token) + +{ + t_state * from; + + from = newstate(); + newtransition(token, from, to); + return from; +} + + +static t_state * nfadevelop(t_state * from, t_state * final, iconv_t icc, + const utf8char * name, unsigned int len); + + +void +nfaslice(t_state * * from, t_state * * to, iconv_t icc, + const utf8char * chr, unsigned int chlen, + const utf8char * name, unsigned int len, t_state * final) + +{ + char * srcp; + char * dstp; + size_t srcc; + size_t dstc; + unsigned int cnt; + t_state * f; + t_state * t; + t_transition * tp; + byte bytebuf[8]; + + srcp = (char *) chr; + srcc = chlen; + dstp = (char *) bytebuf; + dstc = sizeof bytebuf; + iconv(icc, &srcp, &srcc, &dstp, &dstc); + dstp = (char *) bytebuf; + cnt = sizeof bytebuf - dstc; + t = *to; + f = *from; + + /** + *** Check for end of string. + **/ + + if (!len) + if (t && t != final) + uniquetransition(EPSILON, t, final); + else + t = final; + + if (f) + while (cnt) { + tp = uniquetransition(*dstp, f, (t_state *) NULL); + + if (!tp) + break; + + f = tp->t_to; + dstp++; + cnt--; + } + + if (!cnt) { + if (!t) + t = nfadevelop(f, final, icc, name, len); + + *to = t; + return; + } + + if (!t) { + t = nfadevelop((t_state *) NULL, final, icc, name, len); + *to = t; + } + + if (!f) + *from = f = newstate(); + + while (cnt > 1) + t = nfatransition(t, dstp[--cnt]); + + newtransition(*dstp, f, t); +} + + +t_state * +nfadevelop(t_state * from, t_state * final, iconv_t icc, + const utf8char * name, unsigned int len) + +{ + int chlen; + int i; + t_state * to; + int uccnt; + int lccnt; + utf8char chr; + + chlen = utf8_chlen[*name]; + + for (i = 1; i < chlen; i++) + if ((name[i] & 0xC0) != 0x80) + break; + + if (i != chlen) { + fprintf(stderr, + "Invalid UTF8 character in character set name\n"); + return (t_state *) NULL; + } + + to = (t_state *) NULL; + nfaslice(&from, &to, + icc, name, chlen, name + chlen, len - chlen, final); + + if (*name >= UTF8_a && *name <= UTF8_z) + chr = *name - UTF8_a + UTF8_A; + else if (*name >= UTF8_A && *name <= UTF8_Z) + chr = *name - UTF8_A + UTF8_a; + else + return from; + + nfaslice(&from, &to, icc, &chr, 1, name + chlen, len - chlen, final); + return from; +} + + + +void +nfaenter(const utf8char * name, int len, t_chset * charset) + +{ + t_chset * s; + t_state * final; + t_state * sp; + t_symlist * lp; + + /** + *** Enter case-insensitive `name' in NFA in all known + *** character codes. + *** Redundant shift state changes as well as shift state + *** differences between uppercase and lowercase are + *** not handled. + **/ + + if (len < 0) + len = strlen(name) + 1; + + for (lp = charset->c_names; lp; lp = lp->l_next) + if (!memcmp(name, lp->l_symbol, len)) + return; /* Already entered. */ + + lp = (t_symlist *) malloc(sizeof *lp + len); + chknull(lp); + memcpy(lp->l_symbol, name, len); + lp->l_symbol[len] = '\0'; + lp->l_next = charset->c_names; + charset->c_names = lp; + final = newstate(); + final->s_final = charset; + + for (s = chset_list; s; s = s->c_next) + if (!iconv_open_error(s->c_fromUTF8)) + sp = nfadevelop(initial_state, final, + s->c_fromUTF8, name, len); +} + + +unsigned int +utf8_utostr(utf8char * s, unsigned int v) + +{ + unsigned int d; + unsigned int i; + + d = v / 10; + v -= d * 10; + i = d? utf8_utostr(s, d): 0; + s[i++] = v + UTF8_0; + s[i] = '\0'; + return i; +} + + +unsigned int +utf8_utostrpad(utf8char * s, unsigned int v, int digits) + +{ + unsigned int i = utf8_utostr(s, v); + utf8char pad = UTF8_SPACE; + + if (digits < 0) { + pad = UTF8_0; + digits = -digits; + } + + if (i >= digits) + return i; + + memmove(s + digits - i, s, i + 1); + memset(s, pad, digits - i); + return digits; +} + + +unsigned int +utf8_strtou(const utf8char * s) + +{ + unsigned int v; + + while (*s == UTF8_SPACE || *s == UTF8_HT) + s++; + + for (v = 0; *s >= UTF8_0 && *s <= UTF8_9;) + v = 10 * v + *s++ - UTF8_0; + + return v; +} + + +unsigned int +getNumAttr(xmlNodePtr node, const xmlChar * name) + +{ + const xmlChar * s; + unsigned int val; + + s = xmlGetProp(node, name); + + if (!s) + return 0; + + val = utf8_strtou(s); + xmlFree((xmlChar *) s); + return val; +} + + +void +read_assocs(const char * filename) + +{ + xmlDocPtr doc; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr obj; + xmlNodePtr node; + t_chset * sp; + int i; + unsigned int ccsid; + unsigned int mibenum; + utf8char symbuf[32]; + + doc = loadXMLFile(filename); + + if (!doc) { + fprintf(stderr, "Cannot load file %s\n", filename); + exit(1); + } + + ctxt = xmlXPathNewContext(doc); + obj = xmlXPathEval(utf8_assocnodes, ctxt); + + if (!obj || obj->type != XPATH_NODESET || !obj->nodesetval || + !obj->nodesetval->nodeTab || !obj->nodesetval->nodeNr) { + fprintf(stderr, "No association found in %s\n", filename); + exit(1); + } + + for (i = 0; i < obj->nodesetval->nodeNr; i++) { + node = obj->nodesetval->nodeTab[i]; + ccsid = getNumAttr(node, utf8_ccsid); + mibenum = getNumAttr(node, utf8_mibenum); + + /** + *** Check for duplicate. + **/ + + for (sp = chset_list; sp; sp = sp->c_next) + if (ccsid && ccsid == sp->c_ccsid || + mibenum && mibenum == sp->c_mibenum) { + fprintf(stderr, "Duplicate character set: "); + fprintf(stderr, "CCSID = %u/%u, ", + ccsid, sp->c_ccsid); + fprintf(stderr, "MIBenum = %u/%u\n", + mibenum, sp->c_mibenum); + break; + } + + if (sp) + continue; + + /** + *** Allocate the new character set. + **/ + + sp = (t_chset *) malloc(sizeof *sp); + chknull(sp); + memset(sp, 0, sizeof *sp); + + if (!ccsid) /* Do not attempt with current job CCSID. */ + set_iconv_open_error(sp->c_fromUTF8); + else { + sp->c_fromUTF8 = + iconv_open_ccsid(ccsid, C_UTF8_CCSID, 0); + + if (iconv_open_error(sp->c_fromUTF8) == -1) + fprintf(stderr, + "Cannot convert into CCSID %u: ignored\n", + ccsid); + } + + sp->c_ccsid = ccsid; + sp->c_mibenum = mibenum; + sp->c_next = chset_list; + chset_list = sp; + } + + xmlXPathFreeObject(obj); + + /** + *** Enter aliases. + **/ + + for (sp = chset_list; sp; sp = sp->c_next) { + strcpy(symbuf, utf8_ibm_); + utf8_utostr(symbuf + 4, sp->c_ccsid); + nfaenter(symbuf, -1, sp); + strcpy(symbuf, utf8_IBMCCSID); + utf8_utostrpad(symbuf + 8, sp->c_ccsid, -5); + nfaenter(symbuf, 13, sp); /* Not null-terminated. */ + + if (sp->c_mibenum) { + strcpy(symbuf, utf8_iana_); + utf8_utostr(symbuf + 5, sp->c_mibenum); + nfaenter(symbuf, -1, sp); + } + + xmlXPathRegisterVariable(ctxt, utf8_C, + xmlXPathNewFloat((double) sp->c_ccsid)); + obj = xmlXPathEval(utf8_aliastext, ctxt); + + if (!obj || obj->type != XPATH_NODESET) { + fprintf(stderr, "getAlias failed in %s\n", filename); + exit(1); + } + + if (obj->nodesetval && + obj->nodesetval->nodeTab && obj->nodesetval->nodeNr) { + for (i = 0; i < obj->nodesetval->nodeNr; i++) { + node = obj->nodesetval->nodeTab[i]; + nfaenter(node->content, -1, sp); + } + } + + xmlXPathFreeObject(obj); + } + + xmlXPathFreeContext(ctxt); + xmlFreeDoc(doc); +} + + +unsigned int +columnPosition(xmlXPathContextPtr ctxt, const xmlChar * header) + +{ + xmlXPathObjectPtr obj; + unsigned int res = 0; + + xmlXPathRegisterVariable(ctxt, utf8_T, xmlXPathNewString(header)); + obj = xmlXPathEval(utf8_headerpos, ctxt); + + if (obj) { + if (obj->type == XPATH_NUMBER) + res = (unsigned int) obj->floatval; + + xmlXPathFreeObject(obj); + } + + return res; +} + + +void +read_iana(const char * filename) + +{ + xmlDocPtr doc; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr obj1; + xmlXPathObjectPtr obj2; + xmlNodePtr node; + int prefnamecol; + int namecol; + int mibenumcol; + int aliascol; + int mibenum; + t_chset * sp; + int n; + int i; + + doc = loadXMLFile(filename); + + if (!doc) { + fprintf(stderr, "Cannot load file %s\n", filename); + exit(1); + } + + ctxt = xmlXPathNewContext(doc); + +#ifndef OLDXML + xmlXPathRegisterNs(ctxt, utf8_html, utf8_htmluri); +#endif + + obj1 = xmlXPathEval(utf8_tablerows, ctxt); + + if (!obj1 || obj1->type != XPATH_NODESET || !obj1->nodesetval || + !obj1->nodesetval->nodeTab || obj1->nodesetval->nodeNr <= 1) { + fprintf(stderr, "No data in %s\n", filename); + exit(1); + } + + /** + *** Identify columns. + **/ + + xmlXPathSetContextNode(obj1->nodesetval->nodeTab[0], ctxt); + prefnamecol = columnPosition(ctxt, utf8_Pref_MIME_Name); + namecol = columnPosition(ctxt, utf8_Name); + mibenumcol = columnPosition(ctxt, utf8_MIBenum); + aliascol = columnPosition(ctxt, utf8_Aliases); + + if (!prefnamecol || !namecol || !mibenumcol || !aliascol) { + fprintf(stderr, "Key column(s) missing in %s\n", filename); + exit(1); + } + + xmlXPathRegisterVariable(ctxt, utf8_P, + xmlXPathNewFloat((double) prefnamecol)); + xmlXPathRegisterVariable(ctxt, utf8_N, + xmlXPathNewFloat((double) namecol)); + xmlXPathRegisterVariable(ctxt, utf8_M, + xmlXPathNewFloat((double) mibenumcol)); + xmlXPathRegisterVariable(ctxt, utf8_A, + xmlXPathNewFloat((double) aliascol)); + + /** + *** Process each row. + **/ + + for (n = 1; n < obj1->nodesetval->nodeNr; n++) { + xmlXPathSetContextNode(obj1->nodesetval->nodeTab[n], ctxt); + + /** + *** Get the MIBenum from current row. + */ + + obj2 = xmlXPathEval(utf8_getmibenum, ctxt); + + if (!obj2 || obj2->type != XPATH_NUMBER) { + fprintf(stderr, "get MIBenum failed at row %u\n", n); + exit(1); + } + + if (xmlXPathIsNaN(obj2->floatval) || + obj2->floatval < 1.0 || obj2->floatval > 65535.0 || + ((unsigned int) obj2->floatval) != obj2->floatval) { + fprintf(stderr, "invalid MIBenum at row %u\n", n); + xmlXPathFreeObject(obj2); + continue; + } + + mibenum = obj2->floatval; + xmlXPathFreeObject(obj2); + + /** + *** Search the associations for a corresponding CCSID. + **/ + + for (sp = chset_list; sp; sp = sp->c_next) + if (sp->c_mibenum == mibenum) + break; + + if (!sp) + continue; /* No CCSID for this MIBenum. */ + + /** + *** Process preferred MIME name. + **/ + + obj2 = xmlXPathEval(utf8_getprefname, ctxt); + + if (!obj2 || obj2->type != XPATH_STRING) { + fprintf(stderr, + "get Preferred_MIME_Name failed at row %u\n", n); + exit(1); + } + + if (obj2->stringval && obj2->stringval[0]) + nfaenter(obj2->stringval, -1, sp); + + xmlXPathFreeObject(obj2); + + /** + *** Process name. + **/ + + obj2 = xmlXPathEval(utf8_getname, ctxt); + + if (!obj2 || obj2->type != XPATH_STRING) { + fprintf(stderr, "get name failed at row %u\n", n); + exit(1); + } + + if (obj2->stringval && obj2->stringval[0]) + nfaenter(obj2->stringval, -1, sp); + + xmlXPathFreeObject(obj2); + + /** + *** Process aliases. + **/ + + obj2 = xmlXPathEval(utf8_getaliases, ctxt); + + if (!obj2 || obj2->type != XPATH_NODESET) { + fprintf(stderr, "get aliases failed at row %u\n", n); + exit(1); + } + + if (obj2->nodesetval && obj2->nodesetval->nodeTab) + for (i = 0; i < obj2->nodesetval->nodeNr; i++) { + node = obj2->nodesetval->nodeTab[i]; + + if (node && node->content && node->content[0]) + nfaenter(node->content, -1, sp); + } + + xmlXPathFreeObject(obj2); + } + + xmlXPathFreeObject(obj1); + xmlXPathFreeContext(ctxt); + xmlFreeDoc(doc); +} + + +t_powerset * closureset(t_powerset * dst, t_powerset * src); + + +t_powerset * +closure(t_powerset * dst, t_state * src) + +{ + t_transition * t; + unsigned int oldcard; + + if (src->s_nfastates) { + /** + *** Is a DFA state: return closure of set of equivalent + *** NFA states. + **/ + + return closureset(dst, src->s_nfastates); + } + + /** + *** Compute closure of NFA state. + **/ + + dst = set_include(dst, src); + + for (t = src->s_forward; t; t = t->t_forwnext) + if (t->t_token == EPSILON) { + oldcard = dst->p_card; + dst = set_include(dst, t->t_to); + + if (oldcard != dst->p_card) + dst = closure(dst, t->t_to); + } + + return dst; +} + + +t_powerset * +closureset(t_powerset * dst, t_powerset * src) + +{ + unsigned int i; + + for (i = 0; i < src->p_card; i++) + dst = closure(dst, (t_state *) src->p_set[i]); + + return dst; +} + + +t_state * +get_dfa_state(t_state * * stack, + t_powerset * nfastates, xmlHashTablePtr sethash) + +{ + t_state * s; + + if (s = hash_get(sethash, nfastates->p_set, + nfastates->p_card * sizeof nfastates->p_set[0])) { + /** + *** DFA state already present. + *** Release the NFA state set and return + *** the address of the old DFA state. + **/ + + free((char *) nfastates); + return s; + } + + /** + *** Build the new state. + **/ + + s = newstate(); + s->s_nfastates = nfastates; + s->s_next = dfa_states; + dfa_states = s; + s->s_stack = *stack; + *stack = s; + + /** + *** Enter it in hash. + **/ + + if (hash_add(sethash, nfastates->p_set, + nfastates->p_card * sizeof nfastates->p_set[0], s)) + chknull(NULL); /* Memory allocation error. */ + + return s; +} + + +int +transcmp(const void * p1, const void * p2) + +{ + t_transition * t1; + t_transition * t2; + + t1 = *(t_transition * *) p1; + t2 = *(t_transition * *) p2; + return ((int) t1->t_token) - ((int) t2->t_token); +} + + +void +builddfa(void) + +{ + t_powerset * transset; + t_powerset * stateset; + t_state * s; + t_state * s2; + unsigned int n; + unsigned int i; + unsigned int token; + t_transition * t; + t_state * stack; + xmlHashTablePtr sethash; + unsigned int nst; + + transset = set_include(NULL, NULL); + chknull(transset); + stateset = set_include(NULL, NULL); + chknull(stateset); + sethash = xmlHashCreate(1); + chknull(sethash); + dfa_states = (t_state *) NULL; + stack = (t_state *) NULL; + nst = 0; + + /** + *** Build the DFA initial state. + **/ + + get_dfa_state(&stack, closure(NULL, initial_state), sethash); + + /** + *** Build the other DFA states by looking at each + *** possible transition from stacked DFA states. + **/ + + do { + if (!(++nst % 100)) + fprintf(stderr, "%u DFA states\n", nst); + + s = stack; + stack = s->s_stack; + s->s_stack = (t_state *) NULL; + + /** + *** Build a set of all non-epsilon transitions from this + *** state. + **/ + + transset->p_card = 0; + + for (n = 0; n < s->s_nfastates->p_card; n++) { + s2 = s->s_nfastates->p_set[n]; + + for (t = s2->s_forward; t; t = t->t_forwnext) + if (t->t_token != EPSILON) { + transset = set_include(transset, t); + chknull(transset); + } + } + + /** + *** Sort transitions by token. + **/ + + qsort(transset->p_set, transset->p_card, + sizeof transset->p_set[0], transcmp); + + /** + *** Process all transitions, grouping them by token. + **/ + + stateset->p_card = 0; + token = EPSILON; + + for (i = 0; i < transset->p_card; i++) { + t = transset->p_set[i]; + + if (token != t->t_token) { + if (stateset->p_card) { + /** + *** Get the equivalent DFA state + *** and create transition. + **/ + + newtransition(token, s, + get_dfa_state(&stack, + closureset(NULL, stateset), + sethash)); + stateset->p_card = 0; + } + + token = t->t_token; + } + + stateset = set_include(stateset, t->t_to); + } + + if (stateset->p_card) + newtransition(token, s, get_dfa_state(&stack, + closureset(NULL, stateset), sethash)); + } while (stack); + + free((char *) transset); + free((char *) stateset); + xmlHashFree(sethash, NULL); + + /** + *** Reverse the state list to get the initial state first, + *** check for ambiguous prefixes, determine final states, + *** destroy NFA state sets. + **/ + + while (s = dfa_states) { + dfa_states = s->s_next; + s->s_next = stack; + stack = s; + stateset = s->s_nfastates; + s->s_nfastates = (t_powerset *) NULL; + + for (n = 0; n < stateset->p_card; n++) { + s2 = (t_state *) stateset->p_set[n]; + + if (s2->s_final) { + if (s->s_final && s->s_final != s2->s_final) + fprintf(stderr, + "Ambiguous name for CCSIDs %u/%u\n", + s->s_final->c_ccsid, + s2->s_final->c_ccsid); + + s->s_final = s2->s_final; + } + } + + free((char *) stateset); + } + + dfa_states = stack; +} + + +void +deletenfa(void) + +{ + t_transition * t; + t_state * s; + t_state * u; + t_state * stack; + + stack = initial_state; + stack->s_stack = (t_state *) NULL; + + while ((s = stack)) { + stack = s->s_stack; + + while ((t = s->s_forward)) { + u = t->t_to; + unlink_transition(t); + free((char *) t); + + if (!u->s_backward) { + u->s_stack = stack; + stack = u; + } + } + + free((char *) s); + } +} + + +t_stategroup * +newgroup(void) + +{ + t_stategroup * g; + + g = (t_stategroup *) malloc(sizeof *g); + chknull(g); + memset((char *) g, 0, sizeof *g); + g->g_id = groupid++; + return g; +} + + +void +optimizedfa(void) + +{ + unsigned int i; + xmlHashTablePtr h; + t_state * s1; + t_state * s2; + t_state * finstates; + t_state * * sp; + t_stategroup * g1; + t_stategroup * g2; + t_stategroup * ghead; + t_transition * t1; + t_transition * t2; + unsigned int done; + unsigned int startgroup; + unsigned int gtrans[1 << (8 * sizeof(unsigned char))]; + + /** + *** Reduce DFA state count. + **/ + + groupid = 0; + ghead = (t_stategroup *) NULL; + + /** + *** First split: non-final and each distinct final states. + **/ + + h = xmlHashCreate(4); + chknull(h); + + for (s1 = dfa_states; s1; s1 = s1->s_next) { + if (!(g1 = hash_get(h, &s1->s_final, sizeof s1->s_final))) { + g1 = newgroup(); + g1->g_next = ghead; + ghead = g1; + + if (hash_add(h, &s1->s_final, sizeof s1->s_final, g1)) + chknull(NULL); /* Memory allocation error. */ + } + + s1->s_index = g1->g_id; + s1->s_stack = g1->g_member; + g1->g_member = s1; + } + + xmlHashFree(h, NULL); + + /** + *** Subsequent splits: states that have the same forward + *** transition tokens to states in the same group. + **/ + + do { + done = 1; + + for (g2 = ghead; g2; g2 = g2->g_next) { + s1 = g2->g_member; + + if (!s1->s_stack) + continue; + + h = xmlHashCreate(1); + chknull(h); + + /** + *** Build the group transition map. + **/ + + memset((char *) gtrans, ~0, sizeof gtrans); + + for (t1 = s1->s_forward; t1; t1 = t1->t_forwnext) + gtrans[t1->t_token] = t1->t_to->s_index; + + if (hash_add(h, gtrans, sizeof gtrans, g2)) + chknull(NULL); + + /** + *** Process other states in group. + **/ + + sp = &s1->s_stack; + s1 = *sp; + + do { + *sp = s1->s_stack; + + /** + *** Build the transition map. + **/ + + memset((char *) gtrans, ~0, sizeof gtrans); + + for (t1 = s1->s_forward; + t1; t1 = t1->t_forwnext) + gtrans[t1->t_token] = t1->t_to->s_index; + + g1 = hash_get(h, gtrans, sizeof gtrans); + + if (g1 == g2) { + *sp = s1; + sp = &s1->s_stack; + } + else { + if (!g1) { + g1 = newgroup(); + g1->g_next = ghead; + ghead = g1; + + if (hash_add(h, gtrans, + sizeof gtrans, g1)) + chknull(NULL); + } + + s1->s_index = g1->g_id; + s1->s_stack = g1->g_member; + g1->g_member = s1; + done = 0; + } + } while (s1 = *sp); + + xmlHashFree(h, NULL); + } + } while (!done); + + /** + *** Establish group leaders and remap transitions. + **/ + + startgroup = dfa_states->s_index; + + for (g1 = ghead; g1; g1 = g1->g_next) + for (s1 = g1->g_member->s_stack; s1; s1 = s1->s_stack) + for (t1 = s1->s_backward; t1; t1 = t2) { + t2 = t1->t_backnext; + link_transition(t1, NULL, g1->g_member); + } + + /** + *** Remove redundant states and transitions. + **/ + + for (g1 = ghead; g1; g1 = g1->g_next) { + g1->g_member->s_next = (t_state *) NULL; + + while ((s1 = g1->g_member->s_stack)) { + g1->g_member->s_stack = s1->s_stack; + + for (t1 = s1->s_forward; t1; t1 = t2) { + t2 = t1->t_forwnext; + unlink_transition(t1); + free((char *) t1); + } + + free((char *) s1); + } + } + + /** + *** Remove group support and relink DFA states. + **/ + + dfa_states = (t_state *) NULL; + s2 = (t_state *) NULL; + finstates = (t_state *) NULL; + + while (g1 = ghead) { + ghead = g1->g_next; + s1 = g1->g_member; + + if (g1->g_id == startgroup) + dfa_states = s1; /* Keep start state first. */ + else if (s1->s_final) { /* Then final states. */ + s1->s_next = finstates; + finstates = s1; + } + else { /* Finish with non-final states. */ + s1->s_next = s2; + s2 = s1; + } + + free((char *) g1); + } + + for (dfa_states->s_next = finstates; finstates->s_next;) + finstates = finstates->s_next; + + finstates->s_next = s2; +} + + +const char * +inttype(unsigned long max) + +{ + int i; + + for (i = 0; max; i++) + max >>= 1; + + if (i > 8 * sizeof(unsigned int)) + return "unsigned long"; + + if (i > 8 * sizeof(unsigned short)) + return "unsigned int"; + + if (i > 8 * sizeof(unsigned char)) + return "unsigned short"; + + return "unsigned char"; +} + + +listids(FILE * fp) + +{ + unsigned int pos; + t_chset * cp; + t_symlist * lp; + char * srcp; + char * dstp; + size_t srcc; + size_t dstc; + char buf[80]; + + fprintf(fp, "/**\n*** CCSID For arg Recognized name.\n"); + pos = 0; + + for (cp = chset_list; cp; cp = cp->c_next) { + if (pos) { + fprintf(fp, "\n"); + pos = 0; + } + + if (!cp->c_names) + continue; + + pos = fprintf(fp, "*** %5u %c ", cp->c_ccsid, + iconv_open_error(cp->c_fromUTF8)? ' ': 'X'); + + for (lp = cp->c_names; lp; lp = lp->l_next) { + srcp = (char *) lp->l_symbol; + srcc = strlen(srcp); + dstp = buf; + dstc = sizeof buf; + iconv(utf82job, &srcp, &srcc, &dstp, &dstc); + srcc = dstp - buf; + + if (pos + srcc > 79) { + fprintf(fp, "\n***%22c", ' '); + pos = 25; + } + + pos += fprintf(fp, " %.*s", srcc, buf); + } + } + + if (pos) + fprintf(fp, "\n"); + + fprintf(fp, "**/\n\n"); +} + + +void +generate(FILE * fp) + +{ + unsigned int nstates; + unsigned int ntrans; + unsigned int maxfinal; + t_state * s; + t_transition * t; + unsigned int i; + unsigned int pos; + char * ns; + + /** + *** Assign indexes to states and transitions. + **/ + + nstates = 0; + ntrans = 0; + maxfinal = 0; + + for (s = dfa_states; s; s = s->s_next) { + s->s_index = nstates++; + + if (s->s_final) + maxfinal = nstates; + + for (t = s->s_forward; t; t = t->t_forwnext) + t->t_index = ntrans++; + } + + fprintf(fp, + "/**\n*** %u states, %u finals, %u transitions.\n**/\n\n", + nstates, maxfinal, ntrans); + fprintf(stderr, "%u states, %u finals, %u transitions.\n", + nstates, maxfinal, ntrans); + + /** + *** Generate types. + **/ + + fprintf(fp, "typedef unsigned short t_ccsid;\n"); + fprintf(fp, "typedef %-23s t_staterange;\n", inttype(nstates)); + fprintf(fp, "typedef %-23s t_transrange;\n\n", inttype(ntrans)); + + /** + *** Generate first transition index for each state. + **/ + + fprintf(fp, "static t_transrange trans_array[] = {\n"); + pos = 0; + ntrans = 0; + + for (s = dfa_states; s; s = s->s_next) { + pos += fprintf(fp, " %u,", ntrans); + + if (pos > 72) { + fprintf(fp, "\n"); + pos = 0; + } + + for (t = s->s_forward; t; t = t->t_forwnext) + ntrans++; + } + + fprintf(fp, " %u\n};\n\n", ntrans); + + /** + *** Generate final state info. + **/ + + fprintf(fp, "static t_ccsid final_array[] = {\n"); + pos = 0; + ns =""; + i = 0; + + for (s = dfa_states; s && i++ < maxfinal; s = s->s_next) { + pos += fprintf(fp, "%s", ns); + ns = ","; + + if (pos > 72) { + fprintf(fp, "\n"); + pos = 0; + } + + pos += fprintf(fp, " %u", + s->s_final? s->s_final->c_ccsid + 1: 0); + } + + fprintf(fp, "\n};\n\n"); + + /** + *** Generate goto table. + **/ + + fprintf(fp, "static t_staterange goto_array[] = {\n"); + pos = 0; + + for (s = dfa_states; s; s = s->s_next) + for (t = s->s_forward; t; t = t->t_forwnext) { + pos += fprintf(fp, " %u,", t->t_to->s_index); + + if (pos > 72) { + fprintf(fp, "\n"); + pos = 0; + } + } + + fprintf(fp, " %u\n};\n\n", nstates); + + /** + *** Generate transition label table. + **/ + + fprintf(fp, "static unsigned char label_array[] = {\n"); + pos = 0; + ns =""; + + for (s = dfa_states; s; s = s->s_next) + for (t = s->s_forward; t; t = t->t_forwnext) { + pos += fprintf(fp, "%s", ns); + ns = ","; + + if (pos > 72) { + fprintf(fp, "\n"); + pos = 0; + } + + pos += fprintf(fp, " 0x%02X", t->t_token); + } + + fprintf(fp, "\n};\n", nstates); +} + + +main(argc, argv) +int argc; +char * * argv; + +{ + FILE * fp; + t_chset * csp; + char symbuf[20]; + + chset_list = (t_chset *) NULL; + initial_state = newstate(); + job2utf8 = iconv_open_ccsid(C_UTF8_CCSID, C_SOURCE_CCSID, 0); + utf82job = iconv_open_ccsid(C_SOURCE_CCSID, C_UTF8_CCSID, 0); + + if (argc != 4) { + fprintf(stderr, "Usage: %s ", *argv); + fprintf(stderr, " \n"); + exit(1); + } + + /** + *** Read CCSID/MIBenum associations. Define special names. + **/ + + read_assocs(argv[1]); + + /** + *** Read character set names and establish the case-independent + *** name DFA in all possible CCSIDs. + **/ + + read_iana(argv[2]); + + /** + *** Build DFA from NFA. + **/ + + builddfa(); + + /** + *** Delete NFA. + **/ + + deletenfa(); + + /** + *** Minimize the DFA state count. + **/ + + optimizedfa(); + + /** + *** Generate the table. + **/ + + fp = fopen(argv[3], "w+"); + + if (!fp) { + perror(argv[3]); + exit(1); + } + + fprintf(fp, "/**\n"); + fprintf(fp, "*** Character set names table.\n"); + fprintf(fp, "*** Generated by program BLDCSNDFA from"); + fprintf(fp, " IANA character set assignment file\n"); + fprintf(fp, "*** and CCSID/MIBenum equivalence file.\n"); + fprintf(fp, "*** *** Do not edit by hand ***\n"); + fprintf(fp, "**/\n\n"); + listids(fp); + generate(fp); + + if (ferror(fp)) { + perror(argv[3]); + fclose(fp); + exit(1); + } + + fclose(fp); + iconv_close(job2utf8); + iconv_close(utf82job); + exit(0); +} diff --git a/libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.dtd b/libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.dtd new file mode 100644 index 0000000..0c834ec --- /dev/null +++ b/libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.dtd @@ -0,0 +1,15 @@ + + + + + + diff --git a/libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.xml b/libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.xml new file mode 100644 index 0000000..8af38b4 --- /dev/null +++ b/libxml2-2.9.10/os400/iconv/bldcsndfa/ccsid_mibenum.xml @@ -0,0 +1,270 @@ + + + + + + + EBCDIC + + + + + + + + + + + + + + ASCII + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EUC-TH> + eucTH + csEUCTH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UTF16-BE + UTF16BE + UTF-16-BE + + + UTF16-LE + UTF16LE + UTF-16-LE + + + UTF8 + + + UTF32-BE + UTF32BE + UTF-32-BE + + + UTF32-LE + UTF32LE + UTF-32-LE + + + + + + + + + + + + + + + + + + korean + + + + + + + + EUC-CN + + + + + + + + + + + + + + + + + + + + + + + + + + + + + chinese + + + + + + + + + + + + + UCS-2 + UCS2 + + + + + + + + + + + + + diff --git a/libxml2-2.9.10/os400/iconv/bldcsndfa/character-sets.xhtml b/libxml2-2.9.10/os400/iconv/bldcsndfa/character-sets.xhtml new file mode 100644 index 0000000..e1d5a3b --- /dev/null +++ b/libxml2-2.9.10/os400/iconv/bldcsndfa/character-sets.xhtml @@ -0,0 +1,3077 @@ + + + + + + + + Character Sets + + +

Character Sets

+
+
Last Updated
+
2013-01-23
+
Registration Procedure(s)
+
+
Expert Review
+
+
Expert(s)
+
+
Primary Expert Ned Freed and Secondary Expert Martin Dürst
+
+
Reference
+
[RFC2978]
+
Note
+
+
These are the official names for character sets that may be used in
+the Internet and may be referred to in Internet documentation.  These
+names are expressed in ANSI_X3.4-1968 which is commonly called
+US-ASCII or simply ASCII.  The character set most commonly use in the
+Internet and used especially in protocol standards is US-ASCII, this
+is strongly encouraged.  The use of the name US-ASCII is also
+encouraged.
+
+The character set names may be up to 40 characters taken from the
+printable characters of US-ASCII.  However, no distinction is made
+between use of upper and lower case letters.
+
+The MIBenum value is a unique value for use in MIBs to identify coded
+character sets.
+
+The value space for MIBenum values has been divided into three
+regions. The first region (3-999) consists of coded character sets
+that have been standardized by some standard setting organization.
+This region is intended for standards that do not have subset
+implementations. The second region (1000-1999) is for the Unicode and
+ISO/IEC 10646 coded character sets together with a specification of a
+(set of) sub-repertoires that may occur.  The third region (>1999) is
+intended for vendor specific coded character sets.
+
+        Assigned MIB enum Numbers
+        -------------------------
+        0-2             Reserved
+        3-999           Set By Standards Organizations
+        1000-1999       Unicode / 10646
+        2000-2999       Vendor
+
+The aliases that start with "cs" have been added for use with the
+IANA-CHARSET-MIB as originally defined in [RFC3808], and as currently
+maintained by IANA at [IANA registry ianacharset-mib].
+Note that the ianacharset-mib needs to be kept in sync with this
+registry.  These aliases that start with "cs" contain the standard
+numbers along with suggestive names in order to facilitate applications
+that want to display the names in user interfaces.  The "cs" stands
+for character set and is provided for applications that need a lower
+case first letter but want to use mixed case thereafter that cannot
+contain any special characters, such as underbar ("_") and dash ("-").
+
+If the character set is from an ISO standard, its cs alias is the ISO
+standard number or name.  If the character set is not from an ISO
+standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
+Registration Authority), the ISO Registry number is specified as
+ISOnnn followed by letters suggestive of the name or standards number
+of the code set.  When a national or international standard is
+revised, the year of revision is added to the cs alias of the new
+character set entry in the IANA Registry in order to distinguish the
+revised character set from the original character set.
+
+
Alternative Formats
+
+
Plain text
+
+
+
+
Alternative Formats
+
+
CSV
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Preferred MIME NameNameMIBenumSourceReferenceAliasesNote
US-ASCIIUS-ASCII3ANSI X3.4-1986[RFC2046]iso-ir-6
ANSI_X3.4-1968
ANSI_X3.4-1986
ISO_646.irv:1991
ISO646-US
US-ASCII
us
IBM367
cp367
csASCII
ISO-8859-1ISO_8859-1:19874 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-100
ISO_8859-1
ISO-8859-1
latin1
l1
IBM819
CP819
csISOLatin1
ISO-8859-2ISO_8859-2:19875 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-101
ISO_8859-2
ISO-8859-2
latin2
l2
csISOLatin2
ISO-8859-3ISO_8859-3:19886 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-109
ISO_8859-3
ISO-8859-3
latin3
l3
csISOLatin3
ISO-8859-4ISO_8859-4:19887 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-110
ISO_8859-4
ISO-8859-4
latin4
l4
csISOLatin4
ISO-8859-5ISO_8859-5:19888 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-144
ISO_8859-5
ISO-8859-5
cyrillic
csISOLatinCyrillic
ISO-8859-6ISO_8859-6:19879 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-127
ISO_8859-6
ISO-8859-6
ECMA-114
ASMO-708
arabic
csISOLatinArabic
ISO-8859-7ISO_8859-7:198710 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1947][RFC1345][Keld_Simonsen]iso-ir-126
ISO_8859-7
ISO-8859-7
ELOT_928
ECMA-118
greek
greek8
csISOLatinGreek
ISO-8859-8ISO_8859-8:198811 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-138
ISO_8859-8
ISO-8859-8
hebrew
csISOLatinHebrew
ISO-8859-9ISO_8859-9:198912 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-148
ISO_8859-9
ISO-8859-9
latin5
l5
csISOLatin5
ISO-8859-10ISO-8859-1013 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-157
l6
ISO_8859-10:1992
csISOLatin6
latin6
ISO_6937-2-add14 + [ISO-IR: International Register of Escape Sequences] and ISO 6937-2:1983
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-142
csISOTextComm
JIS_X020115JIS X 0201-1976. One byte only, this is equivalent to +JIS/Roman (similar to ASCII) plus eight-bit half-width +Katakana[RFC1345][Keld_Simonsen]X0201
csHalfWidthKatakana
JIS_Encoding16JIS X 0202-1991. Uses ISO 2022 escape sequences to +shift code sets as documented in JIS X 0202-1991.csJISEncoding
Shift_JISShift_JIS17This charset is an extension of csHalfWidthKatakana by +adding graphic characters in JIS X 0208. The CCS's are +JIS X0201:1997 and JIS X0208:1997. The +complete definition is shown in Appendix 1 of JIS +X0208:1997. +This charset can be used for the top-level media type "text".MS_Kanji
csShiftJIS
EUC-JPExtended_UNIX_Code_Packed_Format_for_Japanese18Standardized by OSF, UNIX International, and UNIX Systems +Laboratories Pacific. Uses ISO 2022 rules to select +code set 0: US-ASCII (a single 7-bit byte set) +code set 1: JIS X0208-1990 (a double 8-bit byte set) +restricted to A0-FF in both bytes +code set 2: Half Width Katakana (a single 7-bit byte set) +requiring SS2 as the character prefix +code set 3: JIS X0212-1990 (a double 7-bit byte set) +restricted to A0-FF in both bytes +requiring SS3 as the character prefixcsEUCPkdFmtJapanese
EUC-JP
Extended_UNIX_Code_Fixed_Width_for_Japanese19Used in Japan. Each character is 2 octets. +code set 0: US-ASCII (a single 7-bit byte set) +1st byte = 00 +2nd byte = 20-7E +code set 1: JIS X0208-1990 (a double 7-bit byte set) +restricted to A0-FF in both bytes +code set 2: Half Width Katakana (a single 7-bit byte set) +1st byte = 00 +2nd byte = A0-FF +code set 3: JIS X0212-1990 (a double 7-bit byte set) +restricted to A0-FF in +the first byte +and 21-7E in the second bytecsEUCFixWidJapanese
BS_473020 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-4
ISO646-GB
gb
uk
csISO4UnitedKingdom
SEN_850200_C21 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-11
ISO646-SE2
se2
csISO11SwedishForNames
IT22 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-15
ISO646-IT
csISO15Italian
ES23 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-17
ISO646-ES
csISO17Spanish
DIN_6600324 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-21
de
ISO646-DE
csISO21German
NS_4551-125 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-60
ISO646-NO
no
csISO60DanishNorwegian
csISO60Norwegian1
NF_Z_62-01026 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-69
ISO646-FR
fr
csISO69French
ISO-10646-UTF-127Universal Transfer Format (1), this is the multibyte +encoding, that subsets ASCII-7. It does not have byte +ordering issues.csISO10646UTF1
ISO_646.basic:198328 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]ref
csISO646basic1983
INVARIANT29[RFC1345][Keld_Simonsen]csINVARIANT
ISO_646.irv:198330 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-2
irv
csISO2IntlRefVersion
NATS-SEFI31 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-8-1
csNATSSEFI
NATS-SEFI-ADD32 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-8-2
csNATSSEFIADD
NATS-DANO33 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-9-1
csNATSDANO
NATS-DANO-ADD34 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-9-2
csNATSDANOADD
SEN_850200_B35 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-10
FI
ISO646-FI
ISO646-SE
se
csISO10Swedish
KS_C_5601-198736 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-149
KS_C_5601-1989
KSC_5601
korean
csKSC56011987
ISO-2022-KRISO-2022-KR37[RFC1557] (see also KS_C_5601-1987)[RFC1557][Woohyong_Choi]csISO2022KR
EUC-KREUC-KR38[RFC1557] (see also KS_C_5861-1992)[RFC1557][Woohyong_Choi]csEUCKR
ISO-2022-JPISO-2022-JP39[RFC1468] (see also [RFC2237])[RFC1468][Jun_Murai]csISO2022JP
ISO-2022-JP-2ISO-2022-JP-240 + [RFC1554] + [RFC1554][Masataka_Ohta]csISO2022JP2
JIS_C6220-1969-jp41 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]JIS_C6220-1969
iso-ir-13
katakana
x0201-7
csISO13JISC6220jp
JIS_C6220-1969-ro42 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-14
jp
ISO646-JP
csISO14JISC6220ro
PT43 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-16
ISO646-PT
csISO16Portuguese
greek7-old44 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-18
csISO18Greek7Old
latin-greek45 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-19
csISO19LatinGreek
NF_Z_62-010_(1973)46 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-25
ISO646-FR1
csISO25French
Latin-greek-147 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-27
csISO27LatinGreek1
ISO_542748 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-37
csISO5427Cyrillic
JIS_C6226-197849 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-42
csISO42JISC62261978
BS_viewdata50 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-47
csISO47BSViewdata
INIS51 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-49
csISO49INIS
INIS-852 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-50
csISO50INIS8
INIS-cyrillic53 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-51
csISO51INISCyrillic
ISO_5427:198154 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-54
ISO5427Cyrillic1981
csISO54271981
ISO_5428:198055 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-55
csISO5428Greek
GB_1988-8056 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-57
cn
ISO646-CN
csISO57GB1988
GB_2312-8057 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-58
chinese
csISO58GB231280
NS_4551-258 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]ISO646-NO2
iso-ir-61
no2
csISO61Norwegian2
videotex-suppl59 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-70
csISO70VideotexSupp1
PT260 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-84
ISO646-PT2
csISO84Portuguese2
ES261 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-85
ISO646-ES2
csISO85Spanish2
MSZ_7795.362 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-86
ISO646-HU
hu
csISO86Hungarian
JIS_C6226-198363 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-87
x0208
JIS_X0208-1983
csISO87JISX0208
greek764 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-88
csISO88Greek7
ASMO_44965 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]ISO_9036
arabic7
iso-ir-89
csISO89ASMO449
iso-ir-9066 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]csISO90
JIS_C6229-1984-a67 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-91
jp-ocr-a
csISO91JISC62291984a
JIS_C6229-1984-b68 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-92
ISO646-JP-OCR-B
jp-ocr-b
csISO92JISC62991984b
JIS_C6229-1984-b-add69 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-93
jp-ocr-b-add
csISO93JIS62291984badd
JIS_C6229-1984-hand70 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-94
jp-ocr-hand
csISO94JIS62291984hand
JIS_C6229-1984-hand-add71 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-95
jp-ocr-hand-add
csISO95JIS62291984handadd
JIS_C6229-1984-kana72 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-96
csISO96JISC62291984kana
ISO_2033-198373 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-98
e13b
csISO2033
ANSI_X3.110-198374 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-99
CSA_T500-1983
NAPLPS
csISO99NAPLPS
T.61-7bit75 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-102
csISO102T617bit
T.61-8bit76 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]T.61
iso-ir-103
csISO103T618bit
ECMA-cyrillic77[ISO registry] + (formerly [ECMA + registry])iso-ir-111
KOI8-E
csISO111ECMACyrillic
CSA_Z243.4-1985-178 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-121
ISO646-CA
csa7-1
csa71
ca
csISO121Canadian1
CSA_Z243.4-1985-279 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-122
ISO646-CA2
csa7-2
csa72
csISO122Canadian2
CSA_Z243.4-1985-gr80 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-123
csISO123CSAZ24341985gr
ISO-8859-6-EISO_8859-6-E81 + [RFC1556] + [RFC1556][IANA]csISO88596E
ISO-8859-6-E
ISO-8859-6-IISO_8859-6-I82 + [RFC1556] + [RFC1556][IANA]csISO88596I
ISO-8859-6-I
T.101-G283 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-128
csISO128T101G2
ISO-8859-8-EISO_8859-8-E84 + [RFC1556] + [RFC1556][Hank_Nussbacher]csISO88598E
ISO-8859-8-E
ISO-8859-8-IISO_8859-8-I85 + [RFC1556] + [RFC1556][Hank_Nussbacher]csISO88598I
ISO-8859-8-I
CSN_36910386 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-139
csISO139CSN369103
JUS_I.B1.00287 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-141
ISO646-YU
js
yu
csISO141JUSIB1002
IEC_P27-188 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-143
csISO143IECP271
JUS_I.B1.003-serb89 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-146
serbian
csISO146Serbian
JUS_I.B1.003-mac90 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]macedonian
iso-ir-147
csISO147Macedonian
greek-ccitt91 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-150
csISO150
csISO150GreekCCITT
NC_NC00-10:8192 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]cuba
iso-ir-151
ISO646-CU
csISO151Cuba
ISO_6937-2-2593 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-152
csISO6937Add
GOST_19768-7494 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]ST_SEV_358-88
iso-ir-153
csISO153GOST1976874
ISO_8859-supp95 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-154
latin1-2-5
csISO8859Supp
ISO_10367-box96 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]iso-ir-155
csISO10367Box
latin-lap97 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]lap
iso-ir-158
csISO158Lap
JIS_X0212-199098 + [ISO-IR: International Register of Escape Sequences]
+ Note: The current registration authority is IPSJ/ITSCJ, Japan. +
[RFC1345][Keld_Simonsen]x0212
iso-ir-159
csISO159JISX02121990
DS_208999Danish Standard, DS 2089, February 1974[RFC1345][Keld_Simonsen]DS2089
ISO646-DK
dk
csISO646Danish
us-dk100[RFC1345][Keld_Simonsen]csUSDK
dk-us101[RFC1345][Keld_Simonsen]csDKUS
KSC5636102[RFC1345][Keld_Simonsen]ISO646-KR
csKSC5636
UNICODE-1-1-UTF-7103 + [RFC1642] + [RFC1642]csUnicode11UTF7
ISO-2022-CN104 + [RFC1922] + [RFC1922]csISO2022CN
ISO-2022-CN-EXT105 + [RFC1922] + [RFC1922]csISO2022CNEXT
UTF-8106 + [RFC3629] + [RFC3629]csUTF8
ISO-8859-13109ISO See [http://www.iana.org/assignments/charset-reg/ISO-8859-13][Vladas_Tumasonis]csISO885913
ISO-8859-14110ISO See [http://www.iana.org/assignments/charset-reg/ISO-8859-14] [Keld_Simonsen_2]iso-ir-199
ISO_8859-14:1998
ISO_8859-14
latin8
iso-celtic
l8
csISO885914
ISO-8859-15111ISO +Please see: [http://www.iana.org/assignments/charset-reg/ISO-8859-15]ISO_8859-15
Latin-9
csISO885915
ISO-8859-16112ISOiso-ir-226
ISO_8859-16:2001
ISO_8859-16
latin10
l10
csISO885916
GBK113Chinese IT Standardization Technical Committee +Please see: [http://www.iana.org/assignments/charset-reg/GBK]CP936
MS936
windows-936
csGBK
GB18030114Chinese IT Standardization Technical Committee +Please see: [http://www.iana.org/assignments/charset-reg/GB18030]csGB18030
OSD_EBCDIC_DF04_15115Fujitsu-Siemens standard mainframe EBCDIC encoding +Please see: [http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15]csOSDEBCDICDF0415
OSD_EBCDIC_DF03_IRV116Fujitsu-Siemens standard mainframe EBCDIC encoding +Please see: [http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV]csOSDEBCDICDF03IRV
OSD_EBCDIC_DF04_1117Fujitsu-Siemens standard mainframe EBCDIC encoding +Please see: [http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1]csOSDEBCDICDF041
ISO-11548-1118See [http://www.iana.org/assignments/charset-reg/ISO-11548-1] [Samuel_Thibault]ISO_11548-1
ISO_TR_11548-1
csISO115481
KZ-1048119See [http://www.iana.org/assignments/charset-reg/KZ-1048] [Sairan_M_Kikkarin][Alexei_Veremeev]STRK1048-2002
RK1048
csKZ1048
ISO-10646-UCS-21000the 2-octet Basic Multilingual Plane, aka Unicode +this needs to specify network byte order: the standard +does not specify (it is a 16-bit integer space)csUnicode
ISO-10646-UCS-41001the full code space. (same comment about byte order, +these are 31-bit numbers.csUCS4
ISO-10646-UCS-Basic1002ASCII subset of Unicode. Basic Latin = collection 1 +See ISO 10646, Appendix AcsUnicodeASCII
ISO-10646-Unicode-Latin11003ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 +Supplement = collections 1 and 2. See ISO 10646, +Appendix A. See [RFC1815].csUnicodeLatin1
ISO-10646
ISO-10646-J-11004ISO 10646 Japanese, see [RFC1815].csUnicodeJapanese
ISO-Unicode-IBM-12611005IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261csUnicodeIBM1261
ISO-Unicode-IBM-12681006IBM Latin-4 Extended Presentation Set, GCSGID: 1268csUnicodeIBM1268
ISO-Unicode-IBM-12761007IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276csUnicodeIBM1276
ISO-Unicode-IBM-12641008IBM Arabic Presentation Set, GCSGID: 1264csUnicodeIBM1264
ISO-Unicode-IBM-12651009IBM Hebrew Presentation Set, GCSGID: 1265csUnicodeIBM1265
UNICODE-1-11010 + [RFC1641] + [RFC1641]csUnicode11
SCSU1011SCSU See [http://www.iana.org/assignments/charset-reg/SCSU] [Markus_Scherer]csSCSU
UTF-71012 + [RFC2152] + [RFC2152]csUTF7
UTF-16BE1013 + [RFC2781] + [RFC2781]csUTF16BE
UTF-16LE1014 + [RFC2781] + [RFC2781]csUTF16LE
UTF-161015 + [RFC2781] + [RFC2781]csUTF16
CESU-81016 + [http://www.unicode.org/unicode/reports/tr26] + [Toby_Phipps]csCESU8
csCESU-8
UTF-321017 + [http://www.unicode.org/unicode/reports/tr19/] + [Mark_Davis]csUTF32
UTF-32BE1018 + [http://www.unicode.org/unicode/reports/tr19/] + [Mark_Davis]csUTF32BE
UTF-32LE1019 + [http://www.unicode.org/unicode/reports/tr19/] + [Mark_Davis]csUTF32LE
BOCU-11020 + [http://www.unicode.org/notes/tn6/] + [Markus_Scherer]csBOCU1
csBOCU-1
ISO-8859-1-Windows-3.0-Latin-12000Extended ISO 8859-1 Latin-1 for Windows 3.0. +PCL Symbol Set id: 9U[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csWindows30Latin1
ISO-8859-1-Windows-3.1-Latin-12001Extended ISO 8859-1 Latin-1 for Windows 3.1. +PCL Symbol Set id: 19U[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csWindows31Latin1
ISO-8859-2-Windows-Latin-22002Extended ISO 8859-2. Latin-2 for Windows 3.1. +PCL Symbol Set id: 9E[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csWindows31Latin2
ISO-8859-9-Windows-Latin-52003Extended ISO 8859-9. Latin-5 for Windows 3.1 +PCL Symbol Set id: 5T[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csWindows31Latin5
hp-roman82004LaserJet IIP Printer User's Manual, +HP part no 33471-90901, Hewlet-Packard, June 1989.[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.][RFC1345][Keld_Simonsen]roman8
r8
csHPRoman8
Adobe-Standard-Encoding2005PostScript Language Reference Manual +PCL Symbol Set id: 10J[Adobe Systems Incorporated, PostScript Language Reference +Manual, second edition, Addison-Wesley Publishing Company, +Inc., 1990.]csAdobeStandardEncoding
Ventura-US2006Ventura US. ASCII plus characters typically used in +publishing, like pilcrow, copyright, registered, trade mark, +section, dagger, and double dagger in the range A0 (hex) +to FF (hex). +PCL Symbol Set id: 14J[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csVenturaUS
Ventura-International2007Ventura International. ASCII plus coded characters similar +to Roman8. +PCL Symbol Set id: 13J[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csVenturaInternational
DEC-MCS2008VAX/VMS User's Manual, +Order Number: AI-Y517A-TE, April 1986.[RFC1345][Keld_Simonsen]dec
csDECMCS
IBM8502009IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp850
850
csPC850Multilingual
PC8-Danish-Norwegian2012PC Danish Norwegian +8-bit PC set for Danish Norwegian +PCL Symbol Set id: 11U[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csPC8DanishNorwegian
IBM8622013IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp862
862
csPC862LatinHebrew
PC8-Turkish2014PC Latin Turkish. PCL Symbol Set id: 9T[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csPC8Turkish
IBM-Symbols2015Presentation Set, CPGID: 259[IBM Corporation, "ABOUT TYPE: IBM's Technical Reference +for Core Interchange Digitized Type", Publication number +S544-3708-01]csIBMSymbols
IBM-Thai2016Presentation Set, CPGID: 838[IBM Corporation, "ABOUT TYPE: IBM's Technical Reference +for Core Interchange Digitized Type", Publication number +S544-3708-01]csIBMThai
HP-Legal2017PCL 5 Comparison Guide, Hewlett-Packard, +HP part number 5961-0510, October 1992 +PCL Symbol Set id: 1U[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csHPLegal
HP-Pi-font2018PCL 5 Comparison Guide, Hewlett-Packard, +HP part number 5961-0510, October 1992 +PCL Symbol Set id: 15U[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csHPPiFont
HP-Math82019PCL 5 Comparison Guide, Hewlett-Packard, +HP part number 5961-0510, October 1992 +PCL Symbol Set id: 8M[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csHPMath8
Adobe-Symbol-Encoding2020PostScript Language Reference Manual +PCL Symbol Set id: 5M[Adobe Systems Incorporated, PostScript Language Reference +Manual, second edition, Addison-Wesley Publishing Company, +Inc., 1990.]csHPPSMath
HP-DeskTop2021PCL 5 Comparison Guide, Hewlett-Packard, +HP part number 5961-0510, October 1992 +PCL Symbol Set id: 7J[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csHPDesktop
Ventura-Math2022PCL 5 Comparison Guide, Hewlett-Packard, +HP part number 5961-0510, October 1992 +PCL Symbol Set id: 6M[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csVenturaMath
Microsoft-Publishing2023PCL 5 Comparison Guide, Hewlett-Packard, +HP part number 5961-0510, October 1992 +PCL Symbol Set id: 6J[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]csMicrosoftPublishing
Windows-31J2024Windows Japanese. A further extension of Shift_JIS +to include NEC special characters (Row 13), NEC +selection of IBM extensions (Rows 89 to 92), and IBM +extensions (Rows 115 to 119). The CCS's are +JIS X0201:1997, JIS X0208:1997, and these extensions. +This charset can be used for the top-level media type "text", +but it is of limited or specialized use (see [RFC2278]). +PCL Symbol Set id: 19KcsWindows31J
GB2312GB23122025Chinese for People's Republic of China (PRC) mixed one byte, +two byte set: +20-7E = one byte ASCII +A1-FE = two byte PRC Kanji +See GB 2312-80 +PCL Symbol Set Id: 18CcsGB2312
Big5Big52026Chinese for Taiwan Multi-byte set. +PCL Symbol Set Id: 18TcsBig5
macintosh2027The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991[RFC1345][Keld_Simonsen]mac
csMacintosh
IBM0372028IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp037
ebcdic-cp-us
ebcdic-cp-ca
ebcdic-cp-wt
ebcdic-cp-nl
csIBM037
IBM0382029IBM 3174 Character Set Ref, GA27-3831-02, March 1990[RFC1345][Keld_Simonsen]EBCDIC-INT
cp038
csIBM038
IBM2732030IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP273
csIBM273
IBM2742031IBM 3174 Character Set Ref, GA27-3831-02, March 1990[RFC1345][Keld_Simonsen]EBCDIC-BE
CP274
csIBM274
IBM2752032IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]EBCDIC-BR
cp275
csIBM275
IBM2772033IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]EBCDIC-CP-DK
EBCDIC-CP-NO
csIBM277
IBM2782034IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP278
ebcdic-cp-fi
ebcdic-cp-se
csIBM278
IBM2802035IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP280
ebcdic-cp-it
csIBM280
IBM2812036IBM 3174 Character Set Ref, GA27-3831-02, March 1990[RFC1345][Keld_Simonsen]EBCDIC-JP-E
cp281
csIBM281
IBM2842037IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP284
ebcdic-cp-es
csIBM284
IBM2852038IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP285
ebcdic-cp-gb
csIBM285
IBM2902039IBM 3174 Character Set Ref, GA27-3831-02, March 1990[RFC1345][Keld_Simonsen]cp290
EBCDIC-JP-kana
csIBM290
IBM2972040IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp297
ebcdic-cp-fr
csIBM297
IBM4202041IBM NLS RM Vol2 SE09-8002-01, March 1990, +IBM NLS RM p 11-11[RFC1345][Keld_Simonsen]cp420
ebcdic-cp-ar1
csIBM420
IBM4232042IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp423
ebcdic-cp-gr
csIBM423
IBM4242043IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp424
ebcdic-cp-he
csIBM424
IBM4372011IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp437
437
csPC8CodePage437
IBM5002044IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP500
ebcdic-cp-be
ebcdic-cp-ch
csIBM500
IBM8512045IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp851
851
csIBM851
IBM8522010IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp852
852
csPCp852
IBM8552046IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp855
855
csIBM855
IBM8572047IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp857
857
csIBM857
IBM8602048IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp860
860
csIBM860
IBM8612049IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp861
861
cp-is
csIBM861
IBM8632050IBM Keyboard layouts and code pages, PN 07G4586 June 1991[RFC1345][Keld_Simonsen]cp863
863
csIBM863
IBM8642051IBM Keyboard layouts and code pages, PN 07G4586 June 1991[RFC1345][Keld_Simonsen]cp864
csIBM864
IBM8652052IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)[RFC1345][Keld_Simonsen]cp865
865
csIBM865
IBM8682053IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP868
cp-ar
csIBM868
IBM8692054IBM Keyboard layouts and code pages, PN 07G4586 June 1991[RFC1345][Keld_Simonsen]cp869
869
cp-gr
csIBM869
IBM8702055IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP870
ebcdic-cp-roece
ebcdic-cp-yu
csIBM870
IBM8712056IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP871
ebcdic-cp-is
csIBM871
IBM8802057IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp880
EBCDIC-Cyrillic
csIBM880
IBM8912058IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp891
csIBM891
IBM9032059IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp903
csIBM903
IBM9042060IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]cp904
904
csIBBM904
IBM9052061IBM 3174 Character Set Ref, GA27-3831-02, March 1990[RFC1345][Keld_Simonsen]CP905
ebcdic-cp-tr
csIBM905
IBM9182062IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP918
ebcdic-cp-ar2
csIBM918
IBM10262063IBM NLS RM Vol2 SE09-8002-01, March 1990[RFC1345][Keld_Simonsen]CP1026
csIBM1026
EBCDIC-AT-DE2064IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csIBMEBCDICATDE
EBCDIC-AT-DE-A2065IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICATDEA
EBCDIC-CA-FR2066IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICCAFR
EBCDIC-DK-NO2067IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICDKNO
EBCDIC-DK-NO-A2068IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICDKNOA
EBCDIC-FI-SE2069IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICFISE
EBCDIC-FI-SE-A2070IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICFISEA
EBCDIC-FR2071IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICFR
EBCDIC-IT2072IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICIT
EBCDIC-PT2073IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICPT
EBCDIC-ES2074IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICES
EBCDIC-ES-A2075IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICESA
EBCDIC-ES-S2076IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICESS
EBCDIC-UK2077IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICUK
EBCDIC-US2078IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987[RFC1345][Keld_Simonsen]csEBCDICUS
UNKNOWN-8BIT2079[RFC1428]csUnknown8BiT
MNEMONIC2080[RFC1345], also known as "mnemonic+ascii+38"[RFC1345][Keld_Simonsen]csMnemonic
MNEM2081[RFC1345], also known as "mnemonic+ascii+8200"[RFC1345][Keld_Simonsen]csMnem
VISCII2082 + [RFC1456] + [RFC1456]csVISCII
VIQR2083 + [RFC1456] + [RFC1456]csVIQR
KOI8-RKOI8-R2084[RFC1489], based on GOST-19768-74, ISO-6937/8, +INIS-Cyrillic, ISO-5427.[RFC1489]csKOI8R
HZ-GB-23122085[RFC1842], [RFC1843][RFC1843][RFC1842]
IBM8662086IBM NLDG Volume 2 (SE09-8002-03) August 1994[Rick_Pond]cp866
866
csIBM866
IBM7752087HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996[Hewlett-Packard Company, "HP PCL 5 Comparison Guide", +(P/N 5021-0329) pp B-13, 1996.]cp775
csPC775Baltic
KOI8-U2088 + [RFC2319] + [RFC2319]csKOI8U
IBM008582089IBM See [http://www.iana.org/assignments/charset-reg/IBM00858] [Tamer_Mahdi]CCSID00858
CP00858
PC-Multilingual-850+euro
csIBM00858
IBM009242090IBM See [http://www.iana.org/assignments/charset-reg/IBM00924] [Tamer_Mahdi]CCSID00924
CP00924
ebcdic-Latin9--euro
csIBM00924
IBM011402091IBM See [http://www.iana.org/assignments/charset-reg/IBM01140] [Tamer_Mahdi]CCSID01140
CP01140
ebcdic-us-37+euro
csIBM01140
IBM011412092IBM See [http://www.iana.org/assignments/charset-reg/IBM01141] [Tamer_Mahdi]CCSID01141
CP01141
ebcdic-de-273+euro
csIBM01141
IBM011422093IBM See [http://www.iana.org/assignments/charset-reg/IBM01142] [Tamer_Mahdi]CCSID01142
CP01142
ebcdic-dk-277+euro
ebcdic-no-277+euro
csIBM01142
IBM011432094IBM See [http://www.iana.org/assignments/charset-reg/IBM01143] [Tamer_Mahdi]CCSID01143
CP01143
ebcdic-fi-278+euro
ebcdic-se-278+euro
csIBM01143
IBM011442095IBM See [http://www.iana.org/assignments/charset-reg/IBM01144] [Tamer_Mahdi]CCSID01144
CP01144
ebcdic-it-280+euro
csIBM01144
IBM011452096IBM See [http://www.iana.org/assignments/charset-reg/IBM01145] [Tamer_Mahdi]CCSID01145
CP01145
ebcdic-es-284+euro
csIBM01145
IBM011462097IBM See [http://www.iana.org/assignments/charset-reg/IBM01146] [Tamer_Mahdi]CCSID01146
CP01146
ebcdic-gb-285+euro
csIBM01146
IBM011472098IBM See [http://www.iana.org/assignments/charset-reg/IBM01147] [Tamer_Mahdi]CCSID01147
CP01147
ebcdic-fr-297+euro
csIBM01147
IBM011482099IBM See [http://www.iana.org/assignments/charset-reg/IBM01148] [Tamer_Mahdi]CCSID01148
CP01148
ebcdic-international-500+euro
csIBM01148
IBM011492100IBM See [http://www.iana.org/assignments/charset-reg/IBM01149] [Tamer_Mahdi]CCSID01149
CP01149
ebcdic-is-871+euro
csIBM01149
Big5-HKSCS2101See [http://www.iana.org/assignments/charset-reg/Big5-HKSCS][Nicky_Yick]csBig5HKSCS
IBM10472102IBM1047 (EBCDIC Latin 1/Open Systems) +[http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf][Reuel_Robrigado]IBM-1047
csIBM1047
PTCP1542103See [http://www.iana.org/assignments/charset-reg/PTCP154][Alexander_Uskov]csPTCP154
PT154
CP154
Cyrillic-Asian
csPTCP154
Amiga-12512104See [http://www.amiga.ultranet.ru/Amiga-1251.html]Ami1251
Amiga1251
Ami-1251
csAmiga1251 +(Aliases are provided for historical reasons and should not be used) [Malyshev]
KOI7-switched2105See [http://www.iana.org/assignments/charset-reg/KOI7-switched]csKOI7switched
BRF2106See [http://www.iana.org/assignments/charset-reg/BRF] [Samuel_Thibault]csBRF
TSCII2107See [http://www.iana.org/assignments/charset-reg/TSCII] [Kuppuswamy_Kalyanasu]csTSCII
CP519322108See [http://www.iana.org/assignments/charset-reg/CP51932] [Yui_Naruse]csCP51932
windows-8742109See [http://www.iana.org/assignments/charset-reg/windows-874] [Shawn_Steele]cswindows874
windows-12502250Microsoft [http://www.iana.org/assignments/charset-reg/windows-1250] [Katya_Lazhintseva]cswindows1250
windows-12512251Microsoft [http://www.iana.org/assignments/charset-reg/windows-1251] [Katya_Lazhintseva]cswindows1251
windows-12522252Microsoft [http://www.iana.org/assignments/charset-reg/windows-1252] [Chris_Wendt]cswindows1252
windows-12532253Microsoft [http://www.iana.org/assignments/charset-reg/windows-1253] [Katya_Lazhintseva]cswindows1253
windows-12542254Microsoft [http://www.iana.org/assignments/charset-reg/windows-1254] [Katya_Lazhintseva]cswindows1254
windows-12552255Microsoft [http://www.iana.org/assignments/charset-reg/windows-1255] [Katya_Lazhintseva]cswindows1255
windows-12562256Microsoft [http://www.iana.org/assignments/charset-reg/windows-1256] [Katya_Lazhintseva]cswindows1256
windows-12572257Microsoft [http://www.iana.org/assignments/charset-reg/windows-1257] [Katya_Lazhintseva]cswindows1257
windows-12582258Microsoft [http://www.iana.org/assignments/charset-reg/windows-1258] [Katya_Lazhintseva]cswindows1258
TIS-6202259Thai Industrial Standards Institute (TISI) [Trin_Tantsetthi]csTIS620
CP502202260See [http://www.iana.org/assignments/charset-reg/CP50220] [Yui_Naruse]csCP50220
+

People

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
IDNameContact URILast Updated
+ [Alexander_Uskov] + Alexander Uskov + mailto:auskov&idc.kz + 2002-09
+ [Alexei_Veremeev] + Alexei Veremeev + mailto:Alexey.Veremeev&oracle.com + 2006-12-07
+ [Chris_Wendt] + Chris Wendt + mailto:christw&microsoft.com + 1999-12
+ [Hank_Nussbacher] + Hank Nussbacher + mailto:hank&vm.tau.ac.il +
+ [IANA] + Internet Assigned Numbers Authority + mailto:iana&iana.org +
+ [Jun_Murai] + Jun Murai + mailto:jun&wide.ad.jp +
+ [Katya_Lazhintseva] + Katya Lazhintseva + mailto:katyal&microsoft.com + 1996-05
+ [Keld_Simonsen] + Keld Simonsen + mailto:Keld.Simonsen&dkuug.dk +
+ [Keld_Simonsen_2] + Keld Simonsen + mailto:Keld.Simonsen&rap.dk + 2000-08
+ [Kuppuswamy_Kalyanasu] + Kuppuswamy Kalyanasundaram + mailto:kalyan.geo&yahoo.com + 2007-05-14
+ [Mark_Davis] + Mark Davis + mailto:mark&unicode.org + 2002-04
+ [Markus_Scherer] + Markus Scherer + mailto:markus.scherer&jtcsv.com + 2002-09
+ [Masataka_Ohta] + Masataka Ohta + mailto:mohta&cc.titech.ac.jp + 1995-07
+ [Nicky_Yick] + Nicky Yick + mailto:cliac&itsd.gcn.gov.hk + 2000-10
+ [Reuel_Robrigado] + Reuel Robrigado + mailto:reuelr&ca.ibm.com + 2002-09
+ [Rick_Pond] + Rick Pond + mailto:rickpond&vnet.ibm.com + 1997-03
+ [Sairan_M_Kikkarin] + Sairan M. Kikkarin + mailto:sairan&sci.kz + 2006-12-07
+ [Samuel_Thibault] + Samuel Thibault + mailto:samuel.thibault&ens-lyon.org + 2006-12-07
+ [Shawn_Steele] + Shawn Steele + mailto:Shawn.Steele&microsoft.com + 2010-11-04
+ [Tamer_Mahdi] + Tamer Mahdi + mailto:tamer&ca.ibm.com + 2000-08
+ [Toby_Phipps] + Toby Phipps + mailto:tphipps&peoplesoft.com + 2002-03
+ [Trin_Tantsetthi] + Trin Tantsetthi + mailto:trin&mozart.inet.co.th + 1998-09
+ [Vladas_Tumasonis] + Vladas Tumasonis + mailto:vladas.tumasonis&maf.vu.lt + 2000-08
+ [Woohyong_Choi] + Woohyong Choi + mailto:whchoi&cosmos.kaist.ac.kr +
+ [Yui_Naruse] + Yui Naruse + mailto:naruse&airemix.jp + 2011-09-23
+ + -- cgit v1.2.3