blob: 889d6cbe11b3587cbaeb7dd231592ac8be90dc8f [file] [log] [blame]
Alexander Pyhalov16d86562018-11-21 12:34:20 +03001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <errno.h>
30#include <libintl.h>
31
32#define MSB 0x80 /* most significant bit */
33#define MBYTE 0x8e /* multi-byte (4 byte character) */
34#define PMASK 0xa0 /* plane number mask */
35#define ONEBYTE 0xff /* right most byte */
36#define MSB_OFF 0x7f /* mask off MSB */
37
38#define SI 0x0f /* shift in */
39#define SO 0x0e /* shift out */
40#define ESC 0x1b /* escape */
41
42/* static const char plane_char[] = "0GH23456789:;<=>?"; */
43static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
44
45#define GET_PLANEC(i) (plane_char[i])
46
47#define NON_ID_CHAR '_' /* non-identified character */
48
49typedef struct _icv_state {
50 char keepc[4]; /* maximum # byte of CNS11643 code */
51 short cstate; /* state machine id (CNS) */
52 short istate; /* state machine id (ISO) */
53 int _errno; /* internal errno */
54} _iconv_st;
55
56enum _CSTATE { C0, C1, C2, C3, C4 };
57enum _ISTATE { IN, OUT };
58
59
60static int get_plane_no_by_char(const char);
61static int cns_to_iso(int, char[], char*, size_t);
62
63static int get_plane_no_by_str(const char *);
64struct _cv_state {
65 int plane_no;
66 int get_a_mbchar;
67 int more_bytes;
68 int first_byte;
69 int plane_changed;
70 char planec;
71 char *p;
72 char keepc[4];
73};
74
75/*
76 * Open; called from iconv_open()
77 */
78void *
79_icv_open()
80{
81 _iconv_st *st;
82
83 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
84 errno = ENOMEM;
85 return ((void *) -1);
86 }
87
88 st->cstate = C0;
89 st->istate = IN;
90 st->_errno = 0;
91
92#ifdef DEBUG
93 fprintf(stderr, "========== iconv(): CNS11643 --> ISO 2022-7 ==========\n");
94#endif
95
96 return ((void *) st);
97}
98
99
100/*
101 * Close; called from iconv_close()
102 */
103void
104_icv_close(_iconv_st *st)
105{
106 if (!st)
107 errno = EBADF;
108 else
109 free(st);
110}
111
112
113/*
114 * Actual conversion; called from iconv()
115 */
116/*=======================================================
117 *
118 * State Machine for interpreting CNS 11643 code
119 *
120 *=======================================================
121 *
122 * (ESC,SO) plane 2 - 16
123 * 1st C 2nd C 3rd C
124 * +------> C0 -----> C1 -----------> C2 -----> C3
125 * | ascii | plane 1 | 4th C |
126 * ^ | 2nd C v v
127 * | | C4 <------<--------<-------+
128 * | v | (SI)
129 * +----<---+-----<----v
130 *
131 *=======================================================*/
132size_t
133_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
134 char **outbuf, size_t *outbytesleft)
135{
136 int plane_no = -1, n;
137 /* pre_plane_no: need to be static when re-entry occurs on errno set */
138 static int pre_plane_no = -1; /* previous plane number */
139
140 if (st == NULL) {
141 errno = EBADF;
142 return ((size_t) -1);
143 }
144
145 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
146 if (st->cstate == C1) {
147 if (outbytesleft && *outbytesleft >= 1
148 && outbuf && *outbuf) {
149 **outbuf = SI;
150 (*outbuf)++;
151 (*outbytesleft)--;
152 } else {
153 errno = E2BIG;
154 return((size_t) -1);
155 }
156 }
157 st->cstate = C0;
158 st->istate = IN;
159 st->_errno = 0;
160 return ((size_t) 0);
161 }
162
163#ifdef DEBUG
164 fprintf(stderr, "=== (Re-entry) iconv(): CNS11643 --> ISO 2022-7 ===\n");
165 fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n",
166 st->cstate, st->istate, st->_errno, plane_no);
167#endif
168 st->_errno = 0; /* reset internal errno */
169 errno = 0; /* reset external errno */
170
171 /* a state machine for interpreting CNS 11643 code */
172 while (*inbytesleft > 0 && *outbytesleft > 0) {
173 switch (st->cstate) {
174 case C0: /* assuming ASCII in the beginning */
175 if (**inbuf & MSB) {
176 st->keepc[0] = (**inbuf);
177 st->cstate = C1;
178 } else { /* real ASCII */
179 if (st->istate == OUT) {
180 st->cstate = C0;
181 st->istate = IN;
182 **outbuf = SI;
183 (*outbuf)++;
184 (*outbytesleft)--;
185 if (*outbytesleft <= 0) {
186 errno = E2BIG;
187 return((size_t)-1);
188 }
189 }
190 **outbuf = **inbuf;
191 (*outbuf)++;
192 (*outbytesleft)--;
193 }
194 break;
195 case C1: /* Chinese characters: 2nd byte */
196 if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
197 plane_no = get_plane_no_by_char(**inbuf);
198 if (plane_no == -1) { /* illegal plane */
199 st->cstate = C0;
200 st->istate = IN;
201 st->_errno = errno = EILSEQ;
202 } else { /* 4-byte Chinese character */
203 st->keepc[1] = (**inbuf);
204 st->cstate = C2;
205 }
206 } else { /* 2-byte Chinese character - plane #1 */
207 if (**inbuf & MSB) { /* plane #1 */
208 st->cstate = C4;
209 st->keepc[1] = (**inbuf);
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200210 st->keepc[2] = st->keepc[3] = '\0';
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300211 plane_no = 1;
212 continue; /* should not advance *inbuf */
213 } else { /* input char doesn't belong
214 * to the input code set
215 */
216 st->cstate = C0;
217 st->istate = IN;
218 st->_errno = errno = EINVAL;
219 }
220 }
221 break;
222 case C2: /* plane #2 - #16 (4 bytes): get 3nd byte */
223 if (**inbuf & MSB) { /* 3rd byte */
224 st->keepc[2] = (**inbuf);
225 st->cstate = C3;
226 } else {
227 st->_errno = errno = EINVAL;
228 st->cstate = C0;
229 }
230 break;
231 case C3: /* plane #2 - #16 (4 bytes): get 4th byte */
232 if (**inbuf & MSB) { /* 4th byte */
233 st->cstate = C4;
234 st->keepc[3] = (**inbuf);
235 continue; /* should not advance *inbuf */
236 } else {
237 st->_errno = errno = EINVAL;
238 st->cstate = C0;
239 }
240 break;
241 case C4: /* Convert code from CNS 11643 to ISO 2022-7 */
242 if ((st->istate == IN) || (pre_plane_no != plane_no)) {
243 /* change plane # in Chinese mode */
244 if (st->istate == OUT) {
245 **outbuf = SI;
246 (*outbuf)++;
247 (*outbytesleft)--;
248#ifdef DEBUG
249fprintf(stderr, "(plane #=%d\tpre_plane #=%d)\t", plane_no, pre_plane_no);
250#endif
251 }
252 if (*outbytesleft < 4) {
253 st->_errno = errno = E2BIG;
254 return((size_t)-1);
255 }
256 pre_plane_no = plane_no;
257 st->istate = OUT; /* shift out */
258 **outbuf = ESC;
259 *(*outbuf+1) = '$';
260 *(*outbuf+2) = ')';
261 *(*outbuf+3) = GET_PLANEC(plane_no);
262#ifdef DEBUG
263fprintf(stderr, "ESC $ ) %c\n", *(*outbuf+3));
264#endif
265 (*outbuf) += 4;
266 (*outbytesleft) -= 4;
267 if (*outbytesleft <= 0) {
268 st->_errno = errno = E2BIG;
269 return((size_t)-1);
270 }
271 **outbuf = SO;
272 (*outbuf)++;
273 (*outbytesleft)--;
274 }
275 n = cns_to_iso(plane_no, st->keepc, *outbuf, *outbytesleft);
276 if (n > 0) {
277 (*outbuf) += n;
278 (*outbytesleft) -= n;
279 } else {
280 st->_errno = errno;
281 return((size_t)-1);
282 }
283 st->cstate = C0;
284 break;
285 default: /* should never come here */
286 st->_errno = errno = EILSEQ;
287 st->cstate = C0; /* reset state */
288 break;
289 }
290
291 (*inbuf)++;
292 (*inbytesleft)--;
293
294 if (st->_errno) {
295#ifdef DEBUG
296 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
297 st->_errno, st->cstate);
298#endif
299 break;
300 }
301 if (errno)
302 return((size_t)-1);
303 }
304
305 if (*inbytesleft > 0 && *outbytesleft == 0) {
306 errno = E2BIG;
307 return((size_t)-1);
308 }
309 return (*inbytesleft);
310}
311
312
313/*
314 * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
315 * Returns -1 on error conditions
316 */
317static int get_plane_no_by_char(const char inbuf)
318{
319 int ret;
320 unsigned char uc = (unsigned char) inbuf;
321
322 ret = uc - PMASK;
323 switch (ret) {
324 case 1: /* 0x8EA1 */
325 case 2: /* 0x8EA2 */
326 case 3: /* 0x8EA3 */
327 case 4: /* 0x8EA4 */
328 case 5: /* 0x8EA5 */
329 case 6: /* 0x8EA6 */
330 case 7: /* 0x8EA7 */
331 case 12: /* 0x8EAC */
332 case 14: /* 0x8EAE */
333 case 15: /* 0x8EAF */
334 case 16: /* 0x8EB0 */
335 return (ret);
336 default:
337 return (-1);
338 }
339}
340
341
342/*
343 * CNS 11643 code --> ISO 2022-7
344 * Return: > 0 - converted with enough space in output buffer
345 * = 0 - no space in outbuf
346 */
347static int cns_to_iso(int plane_no, char keepc[], char *buf, size_t buflen)
348{
349 char cns_str[3];
350 unsigned long cns_val; /* MSB mask off CNS 11643 value */
351
352#ifdef DEBUG
353 fprintf(stderr, "%s %d ", keepc, plane_no);
354#endif
355 if (buflen < 2) {
356 errno = E2BIG;
357 return(0);
358 }
359
360 if (plane_no == 1) {
361 cns_str[0] = keepc[0] & MSB_OFF;
362 cns_str[1] = keepc[1] & MSB_OFF;
363 } else {
364 cns_str[0] = keepc[2] & MSB_OFF;
365 cns_str[1] = keepc[3] & MSB_OFF;
366 }
367 cns_val = (cns_str[0] << 8) + cns_str[1];
368#ifdef DEBUG
369 fprintf(stderr, "%x\t", cns_val);
370#endif
371
372 *buf = (cns_val & 0xff00) >> 8;
373 *(buf+1) = cns_val & 0xff;
374
375#ifdef DEBUG
376 fprintf(stderr, "->%x %x<-\t->%c %c<-\n", *buf, *(buf+1), *buf, *(buf+1));
377#endif
378 return(2);
379}
380void *
381_cv_open()
382{
383 struct _cv_state *st;
384
385 if ((st = (struct _cv_state *)malloc(sizeof(struct _cv_state))) == NULL)
386 return ((void *)-1);
387
388 st->plane_no = 0;
389 st->get_a_mbchar = 1;
390 st->first_byte = 1;
391
392 return (st);
393}
394
395void
396_cv_close(struct _cv_state *st)
397{
398 free(st);
399}
400
401
402size_t
403_cv_enconv(struct _cv_state *st, char **cvinbuf, size_t *cvinbytesleft,
404 char **cvoutbuf, size_t *cvoutbytesleft)
405{
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200406 char *inbuf;
407 char *outbuf;
408 size_t insize;
409 size_t outsize;
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300410
411 unsigned char uc;
412 int i;
413
414 if (cvinbuf == NULL || *cvinbuf == NULL) { /* Reset request. */
415 if (cvoutbuf && *cvoutbuf != NULL &&
416 *cvoutbytesleft > 0 && st->plane_no != 0) {
417 **cvoutbuf = SI;
418 (*cvoutbytesleft)--;
419 (*cvoutbuf)++;
420 }
421 st->plane_no = 0;
422 st->get_a_mbchar = 1;
423 st->first_byte = 1;
424
425 return (0);
426 }
427
428
429 inbuf = *cvinbuf;
430 outbuf = *cvoutbuf;
431 insize = *cvinbytesleft;
432 outsize = *cvoutbytesleft;
433
434 while ((int) insize > 0 && (int) outsize > 0) {
435
436 if (st->get_a_mbchar) {
437 if (st->plane_no == 0) { /* short cut */
438 do {
439 uc = *inbuf;
440 if ((uc & MSB) == 0) {
441 *outbuf++ = uc;
442 outsize--;
443 inbuf++;
444 insize--;
445 } else
446 goto non_plane_0;
447 } while ((int) insize > 0 && (int) outsize > 0);
448 goto success;
449 }
450
451non_plane_0:
452 if (st->first_byte) {
453 st->first_byte = 0;
454 st->keepc[0] = uc = *inbuf++;
455 insize--;
456 if (uc & MSB) {
457 if (uc == 0x8e)
458 st->more_bytes = 3;
459 else
460 st->more_bytes = 1;
461 st->p = st->keepc + 1;
462 } else
463 st->more_bytes = 0;
464 }
465 while (st->more_bytes > 0 && (int) insize > 0) {
466 *st->p++ = *inbuf++;
467 st->more_bytes--;
468 insize--;
469 }
470 if (st->more_bytes == 0)
471 st->get_a_mbchar = 0;
472
473 /* up to this point, st->keepc contains a complete mb char */
474
475 i = get_plane_no_by_str(st->keepc);
476 st->plane_changed = (st->plane_no != i);
477 if (st->plane_changed) { /* generate SI */
478 st->planec = GET_PLANEC(i);
479 if (st->plane_no != 0) {
480 *outbuf++ = SI;
481 outsize--;
482 st->plane_no = i;
483 if ((int) outsize <= 0)
484 goto success;
485 } else
486 st->plane_no = i;
487 }
488 }
489
490 /*
491 * up to this point, st->keepc contains a complete mb char and
492 * we know the plane_no
493 */
494
495 switch (st->plane_no) {
496 case 0:
497 *outbuf++ = st->keepc[0];
498 outsize--;
499 break;
500 case 1:
501 if (st->plane_changed) {
502 if (outsize < 7)
503 goto success;
504 *outbuf++ = ESC;
505 *outbuf++ = '$';
506 *outbuf++ = ')';
507 *outbuf++ = 'G';
508 *outbuf++ = SO;
509 *outbuf++ = st->keepc[0] & MSB_OFF;
510 *outbuf++ = st->keepc[1] & MSB_OFF;
511 outsize -= 7;
512 } else { /* don't need the escape sequence */
513 if (outsize < 2)
514 goto success;
515 *outbuf++ = st->keepc[0] & MSB_OFF;
516 *outbuf++ = st->keepc[1] & MSB_OFF;
517 outsize -= 2;
518 }
519 break;
520 default:
521 if (st->plane_changed) {
522 if (outsize < 7)
523 goto success;
524 *outbuf++ = ESC;
525 *outbuf++ = '$';
526 *outbuf++ = ')';
527 *outbuf++ = st->planec;
528 *outbuf++ = SO;
529 *outbuf++ = st->keepc[2] & MSB_OFF;
530 *outbuf++ = st->keepc[3] & MSB_OFF;
531 outsize -= 7;
532 } else { /* don't need the escape sequence */
533 if (outsize < 2)
534 goto success;
535 *outbuf++ = st->keepc[2] & MSB_OFF;
536 *outbuf++ = st->keepc[3] & MSB_OFF;
537 outsize -= 2;
538 }
539 break;
540 }
541 /*
542 * up to this point, a complete multibyte character has been
543 * converted and written to outbuf, so need to grab the next
544 * mb char from inbuf
545 */
546 st->get_a_mbchar = 1;
547 st->first_byte = 1;
548 }
549
550success:
551 *cvinbytesleft = insize;
552 *cvoutbytesleft = outsize;
553 *cvinbuf = inbuf;
554 *cvoutbuf = outbuf;
555
556 return (insize);
557}
558
559static int get_plane_no_by_str(const char *inbuf) {
560 unsigned char uc = (unsigned char) *inbuf;
561
562 if (uc & MSB) {
563 if (uc != 0x8e)
564 return (1);
565 uc = *(++inbuf);
566 return (uc - 0xa0);
567 } else
568 return (0);
569}