9997 Import iconv modules from g11n consolidation
Reviewed by: Andy Fiddaman <omnios@citrus-it.net>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
diff --git a/usr/src/lib/iconv_modules/zh/common/zh_TW-iso2022-7%zh_TW-big5.c b/usr/src/lib/iconv_modules/zh/common/zh_TW-iso2022-7%zh_TW-big5.c
new file mode 100644
index 0000000..9113b82
--- /dev/null
+++ b/usr/src/lib/iconv_modules/zh/common/zh_TW-iso2022-7%zh_TW-big5.c
@@ -0,0 +1,419 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1995, by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "cns11643_big5.h"   /* CNS 11643 to Big-5 mapping table */
+
+#define	MSB	0x80	/* most significant bit */
+#define	MBYTE	0x8e	/* multi-byte (4 byte character) */
+#define	PMASK	0xa0	/* plane number mask */
+#define ONEBYTE	0xff	/* right most byte */
+#define MSB_OFF	0x7f	/* mask off MBS */
+
+#define SI	0x0f	/* shift in */
+#define SO	0x0e	/* shift out */
+#define ESC	0x1b	/* escape */
+
+/*
+ * static const char plane_char[] = "0GH23456789:;<=>?";
+ * static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
+ * #define	GET_PLANEC(i)	(plane_char[i])
+ */
+
+#define NON_ID_CHAR '_'	/* non-identified character */
+
+typedef struct _icv_state {
+	char	keepc[4];	/* maximum # byte of CNS11643 code */
+	short	cstate;		/* state machine id */
+	int	plane_no;	/* plane number for Chinese character */
+	int	_errno;		/* internal errno */
+} _iconv_st;
+
+enum _CSTATE	{ C0, C1, C2, C3, C4, C5, C6, C7 };
+
+
+static int get_plane_no_by_iso(const char);
+static int iso_to_big5(int, char[], char*, size_t);
+static int binsearch(unsigned long, table_t[], int);
+
+
+/*
+ * Open; called from iconv_open()
+ */
+void *
+_icv_open()
+{
+	_iconv_st *st;
+
+	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
+		errno = ENOMEM;
+		return ((void *) -1);
+	}
+
+	st->cstate = C0;
+	st->plane_no = 0;
+	st->_errno = 0;
+
+#ifdef DEBUG
+    fprintf(stderr, "==========    iconv(): ISO2022-7 --> Big-5    ==========\n");
+#endif
+	return ((void *) st);
+}
+
+
+/*
+ * Close; called from iconv_close()
+ */
+void
+_icv_close(_iconv_st *st)
+{
+	if (!st)
+		errno = EBADF;
+	else
+		free(st);
+}
+
+
+/*
+ * Actual conversion; called from iconv()
+ */
+/*=========================================================================
+ *
+ *             State Machine for interpreting ISO 2022-7 code
+ *
+ *=========================================================================
+ *
+ *                                                        plane 2 - 16
+ *                                                    +---------->-------+
+ *                                    plane           ^                  |
+ *            ESC      $       )      number     SO   | plane 1          v
+ *    +-> C0 ----> C1 ---> C2 ---> C3 ------> C4 --> C5 -------> C6     C7
+ *    |   | ascii  | ascii | ascii |    ascii |   SI | |          |      |
+ *    +----------------------------+    <-----+------+ +------<---+------+
+ *    ^                                 |
+ *    |              ascii              v
+ *    +---------<-------------<---------+
+ *
+ *=========================================================================*/
+size_t
+_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
+				char **outbuf, size_t *outbytesleft)
+{
+	int		n;
+
+	if (st == NULL) {
+		errno = EBADF;
+		return ((size_t) -1);
+	}
+
+	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
+		st->cstate = C0;
+		st->_errno = 0;
+		return ((size_t) 0);
+	}
+
+#ifdef DEBUG
+    fprintf(stderr, "=== (Re-entry)   iconv(): ISO 2022-7 --> Big-5   ===\n");
+#endif
+	st->_errno = 0;         /* reset internal errno */
+	errno = 0;		/* reset external errno */
+
+	/* a state machine for interpreting ISO 2022-7 code */
+	while (*inbytesleft > 0 && *outbytesleft > 0) {
+		switch (st->cstate) {
+		case C0:		/* assuming ASCII in the beginning */
+			if (**inbuf == ESC) {
+				st->cstate = C1;
+			} else {	/* real ASCII */
+				**outbuf = **inbuf;
+				(*outbuf)++;
+				(*outbytesleft)--;
+			}
+			break;
+		case C1:		/* got ESC, expecting $ */
+			if (**inbuf == '$') {
+				st->cstate = C2;
+			} else {
+				**outbuf = ESC;
+				(*outbuf)++;
+				(*outbytesleft)--;
+				st->cstate = C0;
+				st->_errno = 0;
+				continue;	/* don't advance inbuf */
+			}
+			break;
+		case C2:		/* got $, expecting ) */
+			if (**inbuf == ')') {
+				st->cstate = C3;
+			} else {
+				if (*outbytesleft < 2) {
+					st->_errno = errno = E2BIG;
+					return((size_t)-1);
+				}
+				**outbuf = ESC;
+				*(*outbuf+1) = '$';
+				(*outbuf) += 2;
+				(*outbytesleft) -= 2;
+				st->cstate = C0;
+				st->_errno = 0;
+				continue;	/* don't advance inbuf */
+			}
+			break;
+		case C3:		/* got ) expecting G,H,I,...,V */
+			st->plane_no = get_plane_no_by_iso(**inbuf);
+			if (st->plane_no > 0 ) {	/* plane #1 - #16 */
+				st->cstate = C4;
+			} else {
+				if (*outbytesleft < 3) {
+					st->_errno = errno = E2BIG;
+					return((size_t)-1);
+				}
+				**outbuf = ESC;
+				*(*outbuf+1) = '$';
+				*(*outbuf+2) = ')';
+				(*outbuf) += 3;
+				(*outbytesleft) -= 3;
+				st->cstate = C0;
+				st->_errno = 0;
+				continue;	/* don't advance inbuf */
+			}
+			break;
+		case C4:		/* SI (Shift In) */
+			if (**inbuf == ESC) {
+				st->cstate = C1;
+				break;
+			}
+			if (**inbuf == SO) {
+#ifdef DEBUG
+    fprintf(stderr, "<--------------  SO  -------------->\n");
+#endif
+				st->cstate = C5;
+			} else {	/* ASCII */
+				**outbuf = **inbuf;
+				(*outbuf)++;
+				(*outbytesleft)--;
+				st->cstate = C0;
+				st->_errno = 0;
+			}
+			break;
+		case C5:		/* SO (Shift Out) */
+			if (**inbuf == SI) {
+#ifdef DEBUG
+    fprintf(stderr, ">--------------  SI  --------------<\n");
+#endif
+				st->cstate = C4;
+			} else {	/* 1st Chinese character */
+				if (st->plane_no == 1) {
+					st->keepc[0] = (char) (**inbuf | MSB);
+					st->cstate = C6;
+				} else {	/* 4-bypte code: plane #2 - #16 */
+					st->keepc[0] = (char) MBYTE;
+					st->keepc[1] = (char) (PMASK +
+								st->plane_no);
+					st->keepc[2] = (char) (**inbuf | MSB);
+					st->cstate = C7;
+				}
+			}
+			break;
+		case C6:		/* plane #1: 2nd Chinese character */
+			st->keepc[1] = (char) (**inbuf | MSB);
+			st->keepc[2] = st->keepc[3] = NULL;
+			n = iso_to_big5(1, st->keepc, *outbuf, *outbytesleft);
+			if (n > 0) {
+				(*outbuf) += n;
+				(*outbytesleft) -= n;
+			} else {
+				st->_errno = errno;
+				return((size_t)-1);
+			}
+			st->cstate = C5;
+			break;
+		case C7:		/* 4th Chinese character */
+			st->keepc[3] = (char) (**inbuf | MSB);
+			n = iso_to_big5(st->plane_no, st->keepc, *outbuf,
+					*outbytesleft);
+			if (n > 0) {
+				(*outbuf) += n;
+				(*outbytesleft) -= n;
+			} else {
+				st->_errno = errno;
+				return((size_t)-1);
+			}
+			st->cstate = C5;
+			break;
+		default:			/* should never come here */
+			st->_errno = errno = EILSEQ;
+			st->cstate = C0;	/* reset state */
+			break;
+		}
+
+		(*inbuf)++;
+		(*inbytesleft)--;
+
+		if (st->_errno) {
+#ifdef DEBUG
+    fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\tinbuf=%x\n",
+		st->_errno, st->cstate, **inbuf);
+#endif
+			break;
+		}
+		if (errno)
+			return((size_t)-1);
+	}
+
+	if (*inbytesleft > 0 && *outbytesleft == 0) {
+		errno = E2BIG;
+		return((size_t)-1);
+	}
+	return (*inbytesleft);
+}
+
+
+/*
+ * Get plane number by ISO plane char; i.e. 'G' returns 1, 'H' returns 2, etc.
+ * Returns -1 on error conditions
+ */
+static int get_plane_no_by_iso(const char inbuf)
+{
+	int ret;
+	unsigned char uc = (unsigned char) inbuf;
+
+	if (uc == '0')	/* plane #0 */
+		return(0);
+
+	ret = uc - 'F';
+	switch (ret) {
+	case 1:		/* 0x8EA1 - G */
+	case 2:		/* 0x8EA2 - H */
+	case 3:		/* 0x8EA3 - I */
+	case 4:		/* 0x8EA4 - J */
+	case 5:		/* 0x8EA5 - K */
+	case 6:		/* 0x8EA6 - L */
+	case 7:		/* 0x8EA7 - M */
+	case 8:		/* 0x8EA8 - N */
+	case 9:		/* 0x8EA9 - O */
+	case 10:	/* 0x8EAA - P */
+	case 11:	/* 0x8EAB - Q */
+	case 12:	/* 0x8EAC - R */
+	case 13:	/* 0x8EAD - S */
+	case 14:	/* 0x8EAE - T */
+	case 15:	/* 0x8EAF - U */
+	case 16:	/* 0x8EB0 - V */
+		return (ret);
+	default:
+		return (-1);
+	}
+}
+
+
+/*
+ * ISO 2022-7 code --> Big-5 code
+ * Return: > 0 - converted with enough space in output buffer
+ *         = 0 - no space in outbuf
+ */
+static int iso_to_big5(int plane_no, char keepc[], char *buf, size_t buflen)
+{
+	char		cns_str[3];
+	unsigned long	cns_val;	/* MSB mask off CNS 11643 value */
+	int		unidx;		/* binary search index */
+	unsigned long	big5_val, val;	/* Big-5 code */
+
+#ifdef DEBUG
+    fprintf(stderr, "%s %d ", keepc, plane_no);
+#endif
+	if (plane_no == 1) {
+		cns_str[0] = keepc[0] & MSB_OFF;
+		cns_str[1] = keepc[1] & MSB_OFF;
+	} else {
+		cns_str[0] = keepc[2] & MSB_OFF;
+		cns_str[1] = keepc[3] & MSB_OFF;
+	}
+	cns_val = (cns_str[0] << 8) + cns_str[1];
+#ifdef DEBUG
+    fprintf(stderr, "%x\t", cns_val);
+#endif
+
+        if (buflen < 2) {
+                errno = E2BIG;
+                return(0);
+        }
+
+	switch (plane_no) {
+	case 1:
+		unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
+		if (unidx >= 0)
+			big5_val = cns_big5_tab1[unidx].value;
+		break;
+	case 2:
+		unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
+		if (unidx >= 0)
+			big5_val = cns_big5_tab2[unidx].value;
+		break;
+	default:
+		unidx = -1;	/* no mapping from CNS to Big-5 out of plane 1&2 */
+		break;
+	}
+
+#ifdef DEBUG
+    fprintf(stderr, "unidx = %d, big5code = %x\t", unidx, big5_val);
+#endif
+
+	if (unidx < 0) {	/* no match from CNS to Big-5 */
+		*buf = *(buf+1) = NON_ID_CHAR;
+	} else {
+		val = big5_val & 0xffff;
+		*buf = (char) ((val & 0xff00) >> 8);
+		*(buf+1) = (char) (val & 0xff);
+	}
+
+#ifdef DEBUG
+    fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
+#endif
+
+	return(2);
+}
+
+
+/* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
+static int binsearch(unsigned long x, table_t v[], int n)
+{
+	int low, high, mid;
+
+	low = 0;
+	high = n - 1;
+	while (low <= high) {
+		mid = (low + high) / 2;
+		if (x < v[mid].key)
+			high = mid - 1;
+		else if (x > v[mid].key)
+			low = mid + 1;
+		else	/* found match */
+			return mid;
+	}
+	return (-1);	/* no match */
+}