blob: a2db1a3379d753eb0a56bf0454bff68917e73667 [file] [log] [blame]
Alexander Pyhalov16d86562018-11-21 12:34:20 +03001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <errno.h>
30#include "cns11643_big5.h" /* CNS 11643 to Big-5 mapping table */
31
32#define MSB 0x80 /* most significant bit */
33#define MBYTE 0x8e /* multi-byte (4 byte character) */
34#define PMASK 0xa0 /* plane number mask */
35#define ONEBYTE 0xff /* right most byte */
36#define MSB_OFF 0x7f /* mask off MBS */
37
38#define SI 0x0f /* shift in */
39#define SO 0x0e /* shift out */
40#define ESC 0x1b /* escape */
41
42/*
43 * static const char plane_char[] = "0GH23456789:;<=>?";
44 * static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
45 * #define GET_PLANEC(i) (plane_char[i])
46 */
47
48#define NON_ID_CHAR '_' /* non-identified character */
49
50typedef struct _icv_state {
51 char keepc[4]; /* maximum # byte of CNS11643 code */
52 short cstate; /* state machine id */
53 int plane_no; /* plane number for Chinese character */
54 int _errno; /* internal errno */
55} _iconv_st;
56
57enum _CSTATE { C0, C1, C2, C3, C4, C5, C6, C7 };
58
59
60static int get_plane_no_by_iso(const char);
61static int iso_to_big5(int, char[], char*, size_t);
62static int binsearch(unsigned long, table_t[], int);
63
64
65/*
66 * Open; called from iconv_open()
67 */
68void *
69_icv_open()
70{
71 _iconv_st *st;
72
73 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
74 errno = ENOMEM;
75 return ((void *) -1);
76 }
77
78 st->cstate = C0;
79 st->plane_no = 0;
80 st->_errno = 0;
81
82#ifdef DEBUG
83 fprintf(stderr, "========== iconv(): ISO2022-7 --> Big-5 ==========\n");
84#endif
85 return ((void *) st);
86}
87
88
89/*
90 * Close; called from iconv_close()
91 */
92void
93_icv_close(_iconv_st *st)
94{
95 if (!st)
96 errno = EBADF;
97 else
98 free(st);
99}
100
101
102/*
103 * Actual conversion; called from iconv()
104 */
105/*=========================================================================
106 *
107 * State Machine for interpreting ISO 2022-7 code
108 *
109 *=========================================================================
110 *
111 * plane 2 - 16
112 * +---------->-------+
113 * plane ^ |
114 * ESC $ ) number SO | plane 1 v
115 * +-> C0 ----> C1 ---> C2 ---> C3 ------> C4 --> C5 -------> C6 C7
116 * | | ascii | ascii | ascii | ascii | SI | | | |
117 * +----------------------------+ <-----+------+ +------<---+------+
118 * ^ |
119 * | ascii v
120 * +---------<-------------<---------+
121 *
122 *=========================================================================*/
123size_t
124_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
125 char **outbuf, size_t *outbytesleft)
126{
127 int n;
128
129 if (st == NULL) {
130 errno = EBADF;
131 return ((size_t) -1);
132 }
133
134 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
135 st->cstate = C0;
136 st->_errno = 0;
137 return ((size_t) 0);
138 }
139
140#ifdef DEBUG
141 fprintf(stderr, "=== (Re-entry) iconv(): ISO 2022-7 --> Big-5 ===\n");
142#endif
143 st->_errno = 0; /* reset internal errno */
144 errno = 0; /* reset external errno */
145
146 /* a state machine for interpreting ISO 2022-7 code */
147 while (*inbytesleft > 0 && *outbytesleft > 0) {
148 switch (st->cstate) {
149 case C0: /* assuming ASCII in the beginning */
150 if (**inbuf == ESC) {
151 st->cstate = C1;
152 } else { /* real ASCII */
153 **outbuf = **inbuf;
154 (*outbuf)++;
155 (*outbytesleft)--;
156 }
157 break;
158 case C1: /* got ESC, expecting $ */
159 if (**inbuf == '$') {
160 st->cstate = C2;
161 } else {
162 **outbuf = ESC;
163 (*outbuf)++;
164 (*outbytesleft)--;
165 st->cstate = C0;
166 st->_errno = 0;
167 continue; /* don't advance inbuf */
168 }
169 break;
170 case C2: /* got $, expecting ) */
171 if (**inbuf == ')') {
172 st->cstate = C3;
173 } else {
174 if (*outbytesleft < 2) {
175 st->_errno = errno = E2BIG;
176 return((size_t)-1);
177 }
178 **outbuf = ESC;
179 *(*outbuf+1) = '$';
180 (*outbuf) += 2;
181 (*outbytesleft) -= 2;
182 st->cstate = C0;
183 st->_errno = 0;
184 continue; /* don't advance inbuf */
185 }
186 break;
187 case C3: /* got ) expecting G,H,I,...,V */
188 st->plane_no = get_plane_no_by_iso(**inbuf);
189 if (st->plane_no > 0 ) { /* plane #1 - #16 */
190 st->cstate = C4;
191 } else {
192 if (*outbytesleft < 3) {
193 st->_errno = errno = E2BIG;
194 return((size_t)-1);
195 }
196 **outbuf = ESC;
197 *(*outbuf+1) = '$';
198 *(*outbuf+2) = ')';
199 (*outbuf) += 3;
200 (*outbytesleft) -= 3;
201 st->cstate = C0;
202 st->_errno = 0;
203 continue; /* don't advance inbuf */
204 }
205 break;
206 case C4: /* SI (Shift In) */
207 if (**inbuf == ESC) {
208 st->cstate = C1;
209 break;
210 }
211 if (**inbuf == SO) {
212#ifdef DEBUG
213 fprintf(stderr, "<-------------- SO -------------->\n");
214#endif
215 st->cstate = C5;
216 } else { /* ASCII */
217 **outbuf = **inbuf;
218 (*outbuf)++;
219 (*outbytesleft)--;
220 st->cstate = C0;
221 st->_errno = 0;
222 }
223 break;
224 case C5: /* SO (Shift Out) */
225 if (**inbuf == SI) {
226#ifdef DEBUG
227 fprintf(stderr, ">-------------- SI --------------<\n");
228#endif
229 st->cstate = C4;
230 } else { /* 1st Chinese character */
231 if (st->plane_no == 1) {
232 st->keepc[0] = (char) (**inbuf | MSB);
233 st->cstate = C6;
234 } else { /* 4-bypte code: plane #2 - #16 */
235 st->keepc[0] = (char) MBYTE;
236 st->keepc[1] = (char) (PMASK +
237 st->plane_no);
238 st->keepc[2] = (char) (**inbuf | MSB);
239 st->cstate = C7;
240 }
241 }
242 break;
243 case C6: /* plane #1: 2nd Chinese character */
244 st->keepc[1] = (char) (**inbuf | MSB);
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200245 st->keepc[2] = st->keepc[3] = '\0';
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300246 n = iso_to_big5(1, st->keepc, *outbuf, *outbytesleft);
247 if (n > 0) {
248 (*outbuf) += n;
249 (*outbytesleft) -= n;
250 } else {
251 st->_errno = errno;
252 return((size_t)-1);
253 }
254 st->cstate = C5;
255 break;
256 case C7: /* 4th Chinese character */
257 st->keepc[3] = (char) (**inbuf | MSB);
258 n = iso_to_big5(st->plane_no, st->keepc, *outbuf,
259 *outbytesleft);
260 if (n > 0) {
261 (*outbuf) += n;
262 (*outbytesleft) -= n;
263 } else {
264 st->_errno = errno;
265 return((size_t)-1);
266 }
267 st->cstate = C5;
268 break;
269 default: /* should never come here */
270 st->_errno = errno = EILSEQ;
271 st->cstate = C0; /* reset state */
272 break;
273 }
274
275 (*inbuf)++;
276 (*inbytesleft)--;
277
278 if (st->_errno) {
279#ifdef DEBUG
280 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\tinbuf=%x\n",
281 st->_errno, st->cstate, **inbuf);
282#endif
283 break;
284 }
285 if (errno)
286 return((size_t)-1);
287 }
288
289 if (*inbytesleft > 0 && *outbytesleft == 0) {
290 errno = E2BIG;
291 return((size_t)-1);
292 }
293 return (*inbytesleft);
294}
295
296
297/*
298 * Get plane number by ISO plane char; i.e. 'G' returns 1, 'H' returns 2, etc.
299 * Returns -1 on error conditions
300 */
301static int get_plane_no_by_iso(const char inbuf)
302{
303 int ret;
304 unsigned char uc = (unsigned char) inbuf;
305
306 if (uc == '0') /* plane #0 */
307 return(0);
308
309 ret = uc - 'F';
310 switch (ret) {
311 case 1: /* 0x8EA1 - G */
312 case 2: /* 0x8EA2 - H */
313 case 3: /* 0x8EA3 - I */
314 case 4: /* 0x8EA4 - J */
315 case 5: /* 0x8EA5 - K */
316 case 6: /* 0x8EA6 - L */
317 case 7: /* 0x8EA7 - M */
318 case 8: /* 0x8EA8 - N */
319 case 9: /* 0x8EA9 - O */
320 case 10: /* 0x8EAA - P */
321 case 11: /* 0x8EAB - Q */
322 case 12: /* 0x8EAC - R */
323 case 13: /* 0x8EAD - S */
324 case 14: /* 0x8EAE - T */
325 case 15: /* 0x8EAF - U */
326 case 16: /* 0x8EB0 - V */
327 return (ret);
328 default:
329 return (-1);
330 }
331}
332
333
334/*
335 * ISO 2022-7 code --> Big-5 code
336 * Return: > 0 - converted with enough space in output buffer
337 * = 0 - no space in outbuf
338 */
339static int iso_to_big5(int plane_no, char keepc[], char *buf, size_t buflen)
340{
341 char cns_str[3];
342 unsigned long cns_val; /* MSB mask off CNS 11643 value */
343 int unidx; /* binary search index */
344 unsigned long big5_val, val; /* Big-5 code */
345
346#ifdef DEBUG
347 fprintf(stderr, "%s %d ", keepc, plane_no);
348#endif
349 if (plane_no == 1) {
350 cns_str[0] = keepc[0] & MSB_OFF;
351 cns_str[1] = keepc[1] & MSB_OFF;
352 } else {
353 cns_str[0] = keepc[2] & MSB_OFF;
354 cns_str[1] = keepc[3] & MSB_OFF;
355 }
356 cns_val = (cns_str[0] << 8) + cns_str[1];
357#ifdef DEBUG
358 fprintf(stderr, "%x\t", cns_val);
359#endif
360
361 if (buflen < 2) {
362 errno = E2BIG;
363 return(0);
364 }
365
366 switch (plane_no) {
367 case 1:
368 unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
369 if (unidx >= 0)
370 big5_val = cns_big5_tab1[unidx].value;
371 break;
372 case 2:
373 unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
374 if (unidx >= 0)
375 big5_val = cns_big5_tab2[unidx].value;
376 break;
377 default:
378 unidx = -1; /* no mapping from CNS to Big-5 out of plane 1&2 */
379 break;
380 }
381
382#ifdef DEBUG
383 fprintf(stderr, "unidx = %d, big5code = %x\t", unidx, big5_val);
384#endif
385
386 if (unidx < 0) { /* no match from CNS to Big-5 */
387 *buf = *(buf+1) = NON_ID_CHAR;
388 } else {
389 val = big5_val & 0xffff;
390 *buf = (char) ((val & 0xff00) >> 8);
391 *(buf+1) = (char) (val & 0xff);
392 }
393
394#ifdef DEBUG
395 fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
396#endif
397
398 return(2);
399}
400
401
402/* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
403static int binsearch(unsigned long x, table_t v[], int n)
404{
405 int low, high, mid;
406
407 low = 0;
408 high = n - 1;
409 while (low <= high) {
410 mid = (low + high) / 2;
411 if (x < v[mid].key)
412 high = mid - 1;
413 else if (x > v[mid].key)
414 low = mid + 1;
415 else /* found match */
416 return mid;
417 }
418 return (-1); /* no match */
419}