blob: c87c9617faa47e974e301b4afcf60a26348499bf [file] [log] [blame]
Alexander Pyhalov16d86562018-11-21 12:34:20 +03001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <errno.h>
30#include "cns11643_big5.h" /* CNS 11643 to Big-5 mapping table */
31
32#define MSB 0x80 /* most significant bit */
33#define MBYTE 0x8e /* multi-byte (4 byte character) */
34#define PMASK 0xa0 /* plane number mask */
35#define ONEBYTE 0xff /* right most byte */
36#define MSB_OFF 0x7f /* mask off MBS */
37
38#define NON_ID_CHAR '_' /* non-identified character */
39
40typedef struct _icv_state {
41 char keepc[4]; /* maximum # byte of CNS11643 code */
42 short cstate; /* state machine id */
43 int _errno; /* internal errno */
44} _iconv_st;
45
46enum _CSTATE { C0, C1, C2, C3 };
47
48
49static int get_plane_no_by_char(const char);
50static int cns_to_big5(int, char[], char*, size_t);
51static int binsearch(unsigned long, table_t[], int);
52
53
54/*
55 * Open; called from iconv_open()
56 */
57void *
58_icv_open()
59{
60 _iconv_st *st;
61
62 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
63 errno = ENOMEM;
64 return ((void *) -1);
65 }
66
67 st->cstate = C0;
68 st->_errno = 0;
69
70#ifdef DEBUG
71 fprintf(stderr, "========== iconv(): CNS11643 --> Big-5 ==========\n");
72#endif
73
74 return ((void *) st);
75}
76
77
78/*
79 * Close; called from iconv_close()
80 */
81void
82_icv_close(_iconv_st *st)
83{
84 if (!st)
85 errno = EBADF;
86 else
87 free(st);
88}
89
90
91/*
92 * Actual conversion; called from iconv()
93 */
94/*=======================================================
95 *
96 * State Machine for interpreting CNS 11643 code
97 *
98 *=======================================================
99 *
100 * plane 2 - 16
101 * 1st C 2nd C 3rd C
102 * +------> C0 -----> C1 -----------> C2 -----> C3
103 * | ascii | plane 1 | 4th C |
104 * ^ v 2nd C v v
105 * +----<---+-----<----+-------<---------<-------+
106 *
107 *=======================================================*/
108size_t
109_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
110 char **outbuf, size_t *outbytesleft)
111{
112 int plane_no = -1, n;
113
114 if (st == NULL) {
115 errno = EBADF;
116 return ((size_t) -1);
117 }
118
119 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
120 st->cstate = C0;
121 st->_errno = 0;
122 return ((size_t) 0);
123 }
124
125#ifdef DEBUG
126 fprintf(stderr, "=== (Re-entry) iconv(): CNS 11643 --> Big-5 ===\n");
127#endif
128 st->_errno = 0; /* reset internal errno */
129 errno = 0; /* reset external errno */
130
131 /* a state machine for interpreting CNS 11643 code */
132 while (*inbytesleft > 0 && *outbytesleft > 0) {
133 switch (st->cstate) {
134 case C0: /* assuming ASCII in the beginning */
135 if (**inbuf & MSB) {
136 st->keepc[0] = (**inbuf);
137 st->cstate = C1;
138 } else { /* real ASCII */
139 **outbuf = **inbuf;
140 (*outbuf)++;
141 (*outbytesleft)--;
142 }
143 break;
144 case C1: /* Chinese characters: 2nd byte */
145 if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
146 plane_no = get_plane_no_by_char(**inbuf);
147 if (plane_no == -1) { /* illegal plane */
148 st->_errno = errno = EILSEQ;
149 } else { /* 4-byte Chinese character */
150 st->keepc[1] = (**inbuf);
151 st->cstate = C2;
152 }
153 } else { /* 2-byte Chinese character - plane #1 */
154 if (**inbuf & MSB) { /* plane #1 */
155 st->keepc[1] = (**inbuf);
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200156 st->keepc[2] = st->keepc[3] = '\0';
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300157 n = cns_to_big5(1, st->keepc, *outbuf,
158 *outbytesleft);
159 if (n > 0) {
160 (*outbuf) += n;
161 (*outbytesleft) -= n;
162
163 st->cstate = C0;
164 } else { /* don't reset state */
165 st->_errno = errno = E2BIG;
166 }
167 } else { /* input char doesn't belong
168 * to the input code set
169 */
170 st->_errno = errno = EILSEQ;
171 }
172 }
173 break;
174 case C2: /* plane #2 - #16 (4 bytes): get 3nd byte */
175 if (**inbuf & MSB) { /* 3rd byte */
176 st->keepc[2] = (**inbuf);
177 st->cstate = C3;
178 } else {
179 st->_errno = errno = EILSEQ;
180 }
181 break;
182 case C3: /* plane #2 - #16 (4 bytes): get 4th byte */
183 if (**inbuf & MSB) { /* 4th byte */
184 st->keepc[3] = (**inbuf);
185 n = cns_to_big5(plane_no, st->keepc, *outbuf,
186 *outbytesleft );
187 if (n > 0) {
188 (*outbuf) += n;
189 (*outbytesleft) -= n;
190
191 st->cstate = C0; /* reset state */
192 } else { /* don't reset state */
193 st->_errno = errno = E2BIG;
194 }
195 } else {
196 st->_errno = errno = EILSEQ;
197 }
198 break;
199 default: /* should never come here */
200 st->_errno = errno = EILSEQ;
201 st->cstate = C0; /* reset state */
202 break;
203 }
204
205 if (st->_errno) {
206#ifdef DEBUG
207 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
208 st->_errno, st->cstate);
209#endif
210 break;
211 }
212
213 (*inbuf)++;
214 (*inbytesleft)--;
215 }
216
217 if (errno) return ((size_t) -1);
218
219 if (*inbytesleft == 0 && st->cstate != C0) {
220 errno = EINVAL;
221 return ((size_t) -1);
222 }
223
224 if (*inbytesleft > 0 && *outbytesleft == 0) {
225 errno = E2BIG;
226 return((size_t)-1);
227 }
228 return (*inbytesleft);
229}
230
231
232/*
233 * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
234 * Returns -1 on error conditions
235 */
236static int get_plane_no_by_char(const char inbuf)
237{
238 int ret;
239 unsigned char uc = (unsigned char) inbuf;
240
241 ret = uc - PMASK;
242 switch (ret) {
243 case 1: /* 0x8EA1 */
244 case 2: /* 0x8EA2 */
245 case 3: /* 0x8EA3 */
246 case 4: /* 0x8EA4 */
247 case 5: /* 0x8EA5 */
248 case 6: /* 0x8EA6 */
249 case 7: /* 0x8EA7 */
250 case 12: /* 0x8EAC */
251 case 14: /* 0x8EAE */
252 case 15: /* 0x8EAF */
253 case 16: /* 0x8EB0 */
254 return (ret);
255 default:
256 return (-1);
257 }
258}
259
260
261/*
262 * CNS 11643 code --> Big-5
263 * Return: > 0 - converted with enough space in output buffer
264 * = 0 - no space in outbuf
265 */
266static int cns_to_big5(int plane_no, char keepc[], char *buf, size_t buflen)
267{
268 char cns_str[3];
269 unsigned long cns_val; /* MSB mask off CNS 11643 value */
270 int unidx; /* binary search index */
271 unsigned long big5_val, val; /* Big-5 code */
272
273#ifdef DEBUG
274 fprintf(stderr, "%s %d ", keepc, plane_no);
275#endif
276 if (buflen < 2) {
277 errno = E2BIG;
278 return(0);
279 }
280
281 if (plane_no == 1) {
282 cns_str[0] = keepc[0] & MSB_OFF;
283 cns_str[1] = keepc[1] & MSB_OFF;
284 } else {
285 cns_str[0] = keepc[2] & MSB_OFF;
286 cns_str[1] = keepc[3] & MSB_OFF;
287 }
288 cns_val = (cns_str[0] << 8) + cns_str[1];
289#ifdef DEBUG
290 fprintf(stderr, "%x\t", cns_val);
291#endif
292
293 switch (plane_no) {
294 case 1:
295 unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
296 if (unidx >= 0)
297 big5_val = cns_big5_tab1[unidx].value;
298 break;
299 case 2:
300 unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
301 if (unidx >= 0)
302 big5_val = cns_big5_tab2[unidx].value;
303 break;
304 case 3:
305 unidx = binsearch(cns_val, cns_big5_tab3, MAX_CNS3_NUM);
306 if (unidx >= 0)
307 big5_val = cns_big5_tab3[unidx].value;
308 break;
309 default:
310 unidx = -1; /* no mapping from CNS to Big-5 */
311 break;
312 }
313
314#ifdef DEBUG
315 fprintf(stderr, "unidx = %d, value = %x\t", unidx, big5_val);
316#endif
317
318 if (unidx < 0) { /* no match from CNS to Big-5 */
319 *buf = *(buf+1) = NON_ID_CHAR;
320 } else {
321 val = big5_val & 0xffff;
322 *buf = (char) ((val & 0xff00) >> 8);
323 *(buf+1) = (char) (val & 0xff);
324 }
325
326#ifdef DEBUG
327 fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
328#endif
329
330 return(2);
331}
332
333
334/* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
335static int binsearch(unsigned long x, table_t v[], int n)
336{
337 int low, high, mid;
338
339 low = 0;
340 high = n - 1;
341 while (low <= high) {
342 mid = (low + high) / 2;
343 if (x < v[mid].key)
344 high = mid - 1;
345 else if (x > v[mid].key)
346 low = mid + 1;
347 else /* found match */
348 return mid;
349 }
350 return (-1); /* no match */
351}