blob: 6022084d175b18e8e548cd686d634b869860f6d7 [file] [log] [blame]
Alexander Pyhalov16d86562018-11-21 12:34:20 +03001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <errno.h>
30#include "big5_cns11643.h" /* Big-5 to CNS 11643 mapping table */
31
32#define MSB 0x80 /* most significant bit */
33#define MBYTE 0x8e /* multi-byte (4 byte character) */
34#define PMASK 0xa0 /* plane number mask */
35#define ONEBYTE 0xff /* right most byte */
36
37#define NON_ID_CHAR '_' /* non-identified character */
38
39typedef struct _icv_state {
40 char keepc[2]; /* maximum # byte of Big-5 code */
41 short cstate; /* state machine id */
42 int _errno; /* internal errno */
43} _iconv_st;
44
45enum _CSTATE { C0, C1 };
46
47static int big5_2nd_byte(char);
48static int get_plane_no_by_big5(const char, const char, int*, unsigned long*);
49static int big5_to_cns(int, int, unsigned long, char*, size_t);
50static int binsearch(unsigned long, table_t[], int);
51
52/*
53 * Open; called from iconv_open()
54 */
55void *
56_icv_open()
57{
58 _iconv_st *st;
59
60 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
61 errno = ENOMEM;
62 return ((void *) -1);
63 }
64
65 st->cstate = C0;
66 st->_errno = 0;
67
68#ifdef DEBUG
69 fprintf(stderr, "========== iconv(): Big-5 --> CNS 11643 ==========\n");
70#endif
71 return ((void *) st);
72}
73
74
75/*
76 * Close; called from iconv_close()
77 */
78void
79_icv_close(_iconv_st *st)
80{
81 if (!st)
82 errno = EBADF;
83 else
84 free(st);
85}
86
87
88/*
89 * Actual conversion; called from iconv()
90 */
91/*=======================================================
92 *
93 * State Machine for interpreting Big-5 code
94 *
95 *=======================================================
96 *
97 * 1st C
98 * +--------> C0 ----------> C1
99 * | ascii | 2nd C |
100 * ^ v v
101 * +----<-----+-----<--------+
102 *
103 *=======================================================*/
104/*
105 * Big-5 encoding range:
106 * High byte: 0xA1 - 0xFE ( 94 encoding space)
107 * Low byte: 0x40 - 0x7E, 0xA1 - 0xFE ( 157 encoding space)
108 * Plane #1: 0xA140 - 0xC8FE ( 6280 encoding space)
109 * Plane #2: 0xC940 - 0xFEFE ( 8478 encoding space)
110 * Total: 94 * 157 = 14,758 (14758 encoding space)
111 */
112size_t
113_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
114 char **outbuf, size_t *outbytesleft)
115{
116 int plane_no, n, unidx;
117 unsigned long cnscode;
118
119#ifdef DEBUG
120 fprintf(stderr, "=== (Re-entry) iconv(): Big-5 --> CNS 11643 ===\n");
121#endif
122 if (st == NULL) {
123 errno = EBADF;
124 return ((size_t) -1);
125 }
126
127 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
128 st->cstate = C0;
129 st->_errno = 0;
130 return ((size_t) 0);
131 }
132
133 st->_errno = 0; /* reset internal errno */
134 errno = 0; /* reset external errno */
135
136 /* a state machine for interpreting Big-5 code */
137 while (*inbytesleft > 0 && *outbytesleft > 0) {
138 switch (st->cstate) {
139 case C0: /* assuming ASCII in the beginning */
140 if (**inbuf & MSB) {
141 st->keepc[0] = (**inbuf);
142 st->cstate = C1;
143 } else { /* real ASCII */
144 **outbuf = **inbuf;
145 (*outbuf)++;
146 (*outbytesleft)--;
147 }
148 break;
149 case C1: /* Chinese characters: 2nd byte */
150 if (big5_2nd_byte(**inbuf) == 0) {
151 st->keepc[1] = (**inbuf);
152 plane_no = get_plane_no_by_big5(st->keepc[0],
153 st->keepc[1], &unidx, &cnscode);
154/* comment these lines, it is legal BIG5 character, but no corresponding CNS character
155 if (plane_no < 0) {
156 st->_errno = errno = EILSEQ;
157 break;
158 }
159*/
160
161 n = big5_to_cns(plane_no, unidx, cnscode,
162 *outbuf, *outbytesleft);
163 if (n > 0) {
164 (*outbuf) += n;
165 (*outbytesleft) -= n;
166
167 st->cstate = C0;
168 } else { /* don't reset state */
169 st->_errno = errno = E2BIG;
170 }
171 } else { /* input char doesn't belong
172 * to the input code set
173 */
174 st->_errno = errno = EILSEQ;
175 }
176 break;
177 default: /* should never come here */
178 st->_errno = errno = EILSEQ;
179 st->cstate = C0; /* reset state */
180 break;
181 }
182
183 if (st->_errno) {
184#ifdef DEBUG
185 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
186 st->_errno, st->cstate);
187#endif
188 break;
189 }
190
191 (*inbuf)++;
192 (*inbytesleft)--;
193 }
194
195 if (errno) return ((size_t) -1);
196
197 if (*inbytesleft == 0 && st->cstate != C0) {
198 errno = EINVAL;
199 return ((size_t) -1);
200 }
201
202 if (*inbytesleft > 0 && *outbytesleft == 0) {
203 errno = E2BIG;
204 return((size_t)-1);
205 }
206 return (*inbytesleft);
207}
208
209
210/*
211 * Test whether inbuf is a valid character for 2nd byte Big-5 code
212 * Return: = 0 - valid Big-5 2nd byte
213 * = 1 - invalid Big-5 2nd byte
214 */
215static int big5_2nd_byte(char inbuf)
216{
217 unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
218
219 if ((buf >= 0x40) && (buf <= 0x7E))
220 return (0);
221 if ((buf >= 0xA1) && (buf <= 0xFE))
222 return (0);
223 return(1);
224}
225
226
227/*
228 * Get plane number by Big-5 code; i.e. plane #1 returns 1, #2 returns 2, etc.
229 * Returns -1 on error conditions
230 *
231 * Since binary search of the Big-5 to CNS table is necessary, might as well
232 * return index and CNS code matching to the unicode.
233 */
234static int get_plane_no_by_big5(const char c1, const char c2,
235 int *unidx, unsigned long *cnscode)
236{
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200237 int ret;
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300238 unsigned long big5code;
239
240 big5code = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
241 *unidx = binsearch(big5code, big5_cns_tab, MAX_BIG5_NUM);
242 if ((*unidx) >= 0)
243 *cnscode = big5_cns_tab[*unidx].value;
244 else
245 return(0); /* match from Big-5 to CNS not found */
246#ifdef DEBUG
247 fprintf(stderr, "Big-5=%04x, idx=%5d, CNS=%x ", big5code, *unidx, *cnscode);
248#endif
249
250 ret = (int) (*cnscode >> 16);
251 switch (ret) {
252 case 0x21: /* 0x8EA1 - G */
253 case 0x22: /* 0x8EA2 - H */
254 case 0x23: /* 0x8EA3 - I */
255 case 0x24: /* 0x8EA4 - J */
256 case 0x25: /* 0x8EA5 - K */
257 case 0x26: /* 0x8EA6 - L */
258 case 0x27: /* 0x8EA7 - M */
259 case 0x28: /* 0x8EA8 - N */
260 case 0x29: /* 0x8EA9 - O */
261 case 0x2a: /* 0x8EAA - P */
262 case 0x2b: /* 0x8EAB - Q */
263 case 0x2c: /* 0x8EAC - R */
264 case 0x2d: /* 0x8EAD - S */
265 case 0x2f: /* 0x8EAF - U */
266 case 0x30: /* 0x8EB0 - V */
267 return (ret - 0x20); /* so that we can use GET_PLANEC() */
268 case 0x2e: /* 0x8EAE - T */
269 return (3); /* CNS 11643-1992 */
270 default:
271 return (-1);
272 }
273}
274
275
276/*
277 * Big-5 code --> CNS 11643 (Chinese EUC)
278 * Return: > 0 - converted with enough space in output buffer
279 * = 0 - no space in outbuf
280 */
281static int big5_to_cns(int plane_no, int unidx, unsigned long cnscode,
282 char *buf, size_t buflen)
283{
284 unsigned long val; /* CNS 11643 value */
285 unsigned char c1 = '\0', c2 = '\0', cns_str[5];
286 int ret_size; /* return buffer size */
287
288 if (unidx < 0) { /* no match from UTF8 to CNS 11643 */
289 if ( buflen < 2 ) goto err;
290 *buf = *(buf+1) = NON_ID_CHAR;
291 ret_size = 2;
292 } else {
293 val = cnscode & 0xffff;
294 c1 = ((val & 0xff00) >> 8) | MSB;
295 c2 = (val & 0xff) | MSB;
296 }
297
298 switch (plane_no) {
299 case 1:
300 if ( buflen < 2 ) goto err;
301 *buf = cns_str[0] = c1;
302 *(buf+1) = cns_str[1] = c2;
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200303 cns_str[2] = cns_str[3] = cns_str[4] = '\0';
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300304 ret_size = 2;
305 break;
306 case 2:
307 case 3:
308 case 4:
309 case 5:
310 case 6:
311 case 7:
312 case 8:
313 case 9:
314 case 10:
315 case 11:
316 case 12:
317 case 13:
318 case 14:
319 case 15:
320 case 16:
321 if ( buflen < 4 ) goto err;
322 *(unsigned char*) buf = cns_str[0] = MBYTE;
323 *(unsigned char*)(buf+1) = cns_str[1] = PMASK + plane_no;
324 *(unsigned char*) (buf+2) = cns_str[2] = c1;
325 *(unsigned char*) (buf+3) = cns_str[3] = c2;
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200326 cns_str[4] = '\0';
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300327 ret_size = 4;
328 break;
329 }
330
331#ifdef DEBUG
332 fprintf(stderr, "\t#%d ->%s<-\n", plane_no, cns_str);
333#endif
334
335 return(ret_size);
336
337err:
338 errno = E2BIG;
339 return(0);
340}
341
342
343/* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
344static int binsearch(unsigned long x, table_t v[], int n)
345{
346 int low, high, mid;
347
348 low = 0;
349 high = n - 1;
350 while (low <= high) {
351 mid = (low + high) / 2;
352 if (x < v[mid].key)
353 high = mid - 1;
354 else if (x > v[mid].key)
355 low = mid + 1;
356 else /* found match */
357 return mid;
358 }
359 return (-1); /* no match */
360}