blob: a7a45db720f84ed6d1764ee4e3576c919f4f9059 [file] [log] [blame]
Alexander Pyhalov16d86562018-11-21 12:34:20 +03001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <errno.h>
30#include <libintl.h>
31
32
33#define MSB 0x80 /* most significant bit */
34#define MBYTE 0x8e /* multi-byte (4 byte character) */
35#define PMASK 0xa0 /* plane number mask */
36#define ONEBYTE 0xff /* right most byte */
37#define MSB_OFF 0x7f /* mask off MBS */
38
39#define SI 0x0f /* shift in */
40#define SO 0x0e /* shift out */
41#define ESC 0x1b /* escape */
42
43/*
44 * static const char plane_char[] = "0GH23456789:;<=>?";
45 * static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
46 * #define GET_PLANEC(i) (plane_char[i])
47 */
48
49#define NON_ID_CHAR '_' /* non-identified character */
50
51typedef struct _icv_state {
52 char keepc[4]; /* maximum # byte of CNS11643 code */
53 short cstate; /* state machine id */
54 int plane_no; /* plane number for Chinese character */
55 int _errno; /* internal errno */
56} _iconv_st;
57
58enum _CSTATE { C0, C1, C2, C3, C4, C5, C6, C7 };
59
60
61static int get_plane_no_by_iso(const char);
62static int iso_to_cns(int, char[], char*, size_t);
63
64#define LSG2 0x4e
65#define LSG3 0x4f
66
67
68typedef struct IOBuf {
69 char * myin;
70 char * myout;
71 size_t insize;
72 size_t outsize;
73
74 char mybuf[8];
75 int bufc;
76} IOBuf;
77
78typedef struct Conversion {
79 int myplane;
80} Conversion;
81
82typedef struct GxCntl {
83
84 int gxplane[4];
85 char gxc;
86
87 int mygx;
88 int inHLE1xConv;
89 int inHLE1xSO;
90 Conversion *convobj;
91
92} GxCntl;
93
94
95typedef struct TWNiconv {
96 GxCntl *cntl;
97 Conversion *conv;
98 IOBuf *iobuf;
99
100} TWNiconv;
101
102struct _cv_state {
103 TWNiconv * iconvobj;
104};
105
106extern TWNiconv * aTWNiconv();
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200107extern void adeTWNiconv(TWNiconv *);
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300108extern size_t aisotoeuc(TWNiconv *, char **, size_t *, char **, size_t *);
109extern void areset(TWNiconv *);
110
111extern Conversion * zConversion();
112extern void zdeConversion(Conversion *);
113extern void zsetplane(Conversion *, int);
114extern int zconversion(Conversion *, IOBuf *);
115
116extern GxCntl * yGxCntl(Conversion *);
117extern void ydeGxCntl(GxCntl *);
118extern int ygetplaneno(GxCntl *, char c);
119extern int yescSeq(GxCntl *, IOBuf *);
120
121extern IOBuf * xIOBuf();
122extern void xdeIOBuf(IOBuf *);
123extern int xgetc(IOBuf *);
124extern void xbackup(IOBuf *, int);
125extern int xputc(IOBuf *, int);
126extern int xoutsize(IOBuf *);
127
128
129/*
130 * Open; called from iconv_open()
131 */
132void *
133_icv_open()
134{
135 _iconv_st *st;
136
137 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
138 errno = ENOMEM;
139 return ((void *) -1);
140 }
141
142 st->cstate = C0;
143 st->plane_no = 0;
144 st->_errno = 0;
145
146#ifdef DEBUG
147 fprintf(stderr, "========== iconv(): ISO2022-7 --> CNS 11643 ==========\n");
148#endif
149 return ((void *) st);
150}
151
152
153/*
154 * Close; called from iconv_close()
155 */
156void
157_icv_close(_iconv_st *st)
158{
159 if (!st)
160 errno = EBADF;
161 else
162 free(st);
163}
164
165
166/*
167 * Actual conversion; called from iconv()
168 */
169/*=========================================================================
170 *
171 * State Machine for interpreting ISO 2022-7 code
172 *
173 *=========================================================================
174 *
175 * plane 2 - 16
176 * +---------->-------+
177 * plane ^ |
178 * ESC $ ) number SO | plane 1 v
179 * +-> C0 ----> C1 ---> C2 ---> C3 ------> C4 --> C5 -------> C6 C7
180 * | | ascii | ascii | ascii | ascii | SI | | | |
181 * +----------------------------+ <-----+------+ +------<---+------+
182 * ^ |
183 * | ascii v
184 * +---------<-------------<---------+
185 *
186 *=========================================================================*/
187size_t
188_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
189 char **outbuf, size_t *outbytesleft)
190{
191 int n;
192
193 if (st == NULL) {
194 errno = EBADF;
195 return ((size_t) -1);
196 }
197
198 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
199 st->cstate = C0;
200 st->_errno = 0;
201 return ((size_t) 0);
202 }
203
204#ifdef DEBUG
205 fprintf(stderr, "=== (Re-entry) iconv(): ISO 2022-7 --> CNS 11643 ===\n");
206#endif
207 st->_errno = 0; /* reset internal errno */
208 errno = 0; /* reset external errno */
209
210 /* a state machine for interpreting ISO 2022-7 code */
211 while (*inbytesleft > 0 && *outbytesleft > 0) {
212 switch (st->cstate) {
213 case C0: /* assuming ASCII in the beginning */
214 if (**inbuf == ESC) {
215 st->cstate = C1;
216 } else { /* real ASCII */
217 **outbuf = **inbuf;
218 (*outbuf)++;
219 (*outbytesleft)--;
220 }
221 break;
222 case C1: /* got ESC, expecting $ */
223 if (**inbuf == '$') {
224 st->cstate = C2;
225 } else {
226 **outbuf = ESC;
227 (*outbuf)++;
228 (*outbytesleft)--;
229 st->cstate = C0;
230 st->_errno = 0;
231 continue; /* don't advance inbuf */
232 }
233 break;
234 case C2: /* got $, expecting ) */
235 if ((**inbuf == ')') || (**inbuf == '*')) {
236 st->cstate = C3;
237 } else {
238 if (*outbytesleft < 2) {
239 st->_errno = errno = E2BIG;
240 return((size_t)-1);
241 }
242 **outbuf = ESC;
243 *(*outbuf+1) = '$';
244 (*outbuf) += 2;
245 (*outbytesleft) -= 2;
246 st->cstate = C0;
247 st->_errno = 0;
248 continue; /* don't advance inbuf */
249 }
250 break;
251 case C3: /* got ) expecting G,H,I,...,V */
252 st->plane_no = get_plane_no_by_iso(**inbuf);
253 if (st->plane_no > 0 ) { /* plane #1 - #16 */
254 st->cstate = C4;
255 } else {
256 if (*outbytesleft < 3) {
257 st->_errno = errno = E2BIG;
258 return((size_t)-1);
259 }
260 **outbuf = ESC;
261 *(*outbuf+1) = '$';
262 *(*outbuf+2) = ')';
263 (*outbuf) += 3;
264 (*outbytesleft) -= 3;
265 st->cstate = C0;
266 st->_errno = 0;
267 continue; /* don't advance inbuf */
268 }
269 break;
270 case C4: /* SI (Shift In) */
271 if (**inbuf == ESC) {
272 st->cstate = C1;
273 break;
274 }
275 if (**inbuf == SO) {
276#ifdef DEBUG
277 fprintf(stderr, "<-------------- SO -------------->\n");
278#endif
279 st->cstate = C5;
280 } else { /* ASCII */
281 **outbuf = **inbuf;
282 (*outbuf)++;
283 (*outbytesleft)--;
284 st->cstate = C0;
285 st->_errno = 0;
286 }
287 break;
288 case C5: /* SO (Shift Out) */
289 if (**inbuf == SI) {
290#ifdef DEBUG
291 fprintf(stderr, ">-------------- SI --------------<\n");
292#endif
293 st->cstate = C4;
294 } else { /* 1st Chinese character */
295 if (st->plane_no == 1) {
296 st->keepc[0] = (char) (**inbuf | MSB);
297 st->cstate = C6;
298 } else { /* 4-bypte code: plane #2 - #16 */
299 st->keepc[0] = (char) MBYTE;
300 st->keepc[1] = (char) (PMASK +
301 st->plane_no);
302 st->keepc[2] = (char) (**inbuf | MSB);
303 st->cstate = C7;
304 }
305 }
306 break;
307 case C6: /* plane #1: 2nd Chinese character */
308 st->keepc[1] = (char) (**inbuf | MSB);
Toomas Soomea7fb1da2019-01-28 09:59:47 +0200309 st->keepc[2] = st->keepc[3] = '\0';
Alexander Pyhalov16d86562018-11-21 12:34:20 +0300310 n = iso_to_cns(1, st->keepc, *outbuf, *outbytesleft);
311 if (n > 0) {
312 (*outbuf) += n;
313 (*outbytesleft) -= n;
314 } else {
315 st->_errno = errno;
316 return((size_t)-1);
317 }
318 st->cstate = C5;
319 break;
320 case C7: /* 4th Chinese character */
321 st->keepc[3] = (char) (**inbuf | MSB);
322 n = iso_to_cns(st->plane_no, st->keepc, *outbuf,
323 *outbytesleft);
324 if (n > 0) {
325 (*outbuf) += n;
326 (*outbytesleft) -= n;
327 } else {
328 st->_errno = errno;
329 return((size_t)-1);
330 }
331 st->cstate = C5;
332 break;
333 default: /* should never come here */
334 st->_errno = errno = EILSEQ;
335 st->cstate = C0; /* reset state */
336 break;
337 }
338
339 (*inbuf)++;
340 (*inbytesleft)--;
341
342 if (st->_errno) {
343#ifdef DEBUG
344 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\tinbuf=%x\n",
345 st->_errno, st->cstate, **inbuf);
346#endif
347 break;
348 }
349 if (errno)
350 return((size_t)-1);
351 }
352
353 if (*inbytesleft > 0 && *outbytesleft == 0) {
354 errno = E2BIG;
355 return((size_t)-1);
356 }
357 return (*inbytesleft);
358}
359
360
361/*
362 * Get plane number by ISO plane char; i.e. 'G' returns 1, 'H' returns 2, etc.
363 * Returns -1 on error conditions
364 */
365static int get_plane_no_by_iso(const char inbuf)
366{
367 int ret;
368 unsigned char uc = (unsigned char) inbuf;
369
370 if (uc == '0') /* plane #0 */
371 return(0);
372
373 ret = uc - 'F';
374 switch (ret) {
375 case 1: /* 0x8EA1 - G */
376 case 2: /* 0x8EA2 - H */
377 case 3: /* 0x8EA3 - I */
378 case 4: /* 0x8EA4 - J */
379 case 5: /* 0x8EA5 - K */
380 case 6: /* 0x8EA6 - L */
381 case 7: /* 0x8EA7 - M */
382 case 8: /* 0x8EA8 - N */
383 case 9: /* 0x8EA9 - O */
384 case 10: /* 0x8EAA - P */
385 case 11: /* 0x8EAB - Q */
386 case 12: /* 0x8EAC - R */
387 case 13: /* 0x8EAD - S */
388 case 14: /* 0x8EAE - T */
389 case 15: /* 0x8EAF - U */
390 case 16: /* 0x8EB0 - V */
391 return (ret);
392 default:
393 return (-1);
394 }
395}
396
397
398/*
399 * ISO 2022-7 code --> CNS 11643-1992 (Chinese EUC)
400 * Return: > 0 - converted with enough space in output buffer
401 * = 0 - no space in outbuf
402 */
403static int iso_to_cns(int plane_no, char keepc[], char *buf, size_t buflen)
404{
405 int ret_size; /* return buffer size */
406
407#ifdef DEBUG
408 fprintf(stderr, "%s %d ", keepc, plane_no);
409#endif
410 if (plane_no == 1)
411 ret_size = 2;
412 else
413 ret_size = 4;
414
415 if (buflen < ret_size) {
416 errno = E2BIG;
417 return(0);
418 }
419
420 switch (plane_no) {
421 case 1:
422 *buf = keepc[0];
423 *(buf+1) = keepc[1];
424 break;
425 case 2:
426 case 3:
427 case 4:
428 case 5:
429 case 6:
430 case 7:
431 case 8:
432 case 9:
433 case 10:
434 case 11:
435 case 12:
436 case 13:
437 case 14:
438 case 15:
439 case 16:
440 *buf = keepc[0];
441 *(buf+1) = keepc[1];
442 *(buf+2) = keepc[2];
443 *(buf+3) = keepc[3];
444 break;
445 }
446
447#ifdef DEBUG
448 fprintf(stderr, "\t#%d ->%s<-\n", plane_no, keepc);
449#endif
450
451 return(ret_size);
452}
453void *
454_cv_open(void)
455{
456 struct _cv_state *st;
457
458 if ((st = (struct _cv_state *) malloc(sizeof (struct _cv_state))) ==
459 NULL)
460 return ((void *) -1);
461
462 if ((st->iconvobj = aTWNiconv()) == NULL) {
463 free(st);
464 return ((void *) -1);
465 }
466
467 return ((void *) st);
468}
469
470void
471_cv_close(struct _cv_state *st)
472{
473 adeTWNiconv(st->iconvobj);
474 free(st);
475}
476
477
478size_t
479_cv_enconv(struct _cv_state *st, char **cvinbuf, size_t *cvinbytesleft,
480 char **cvoutbuf, size_t *cvoutbytesleft)
481{
482 if (cvinbuf == NULL || *cvinbuf == NULL) { /* Reset request. */
483 /*
484 * Note that no shift sequence is needed for
485 * the target encoding.
486 */
487 areset(st->iconvobj);
488 return (0);
489 }
490
491 return (aisotoeuc(st->iconvobj, cvinbuf, cvinbytesleft,
492 cvoutbuf, cvoutbytesleft));
493}
494
495TWNiconv * aTWNiconv() {
496 TWNiconv *ret = (TWNiconv *) malloc(sizeof (TWNiconv));
497 if (ret == NULL)
498 return (NULL);
499 if ((ret->conv = zConversion()) == NULL) {
500 free(ret);
501 return (NULL);
502 }
503 if ((ret->cntl = yGxCntl(ret->conv)) == NULL) {
504 free(ret->conv);
505 free(ret);
506 return (NULL);
507 }
508 if ((ret->iobuf = xIOBuf()) == NULL) {
509 free(ret->cntl);
510 free(ret->conv);
511 free(ret);
512 return (NULL);
513 }
514 return (ret);
515}
516
517size_t
518aisotoeuc(TWNiconv *this, char **inbuf, size_t *inbufsize,
519 char **outbuf, size_t *outbufsize) {
520
521 this->iobuf->myin = *inbuf;
522 this->iobuf->myout = *outbuf;
523 this->iobuf->insize = *inbufsize;
524 this->iobuf->outsize = *outbufsize;
525
526 while (1) {
527 int ret;
528 if ((ret = yescSeq(this->cntl, this->iobuf)) == -1)
529 break;
530 else if (ret != 0)
531 continue;
532
533 if (zconversion(this->conv, this->iobuf) == -1)
534 break;
535 }
536
537 *inbuf = this->iobuf->myin;
538 *outbuf = this->iobuf->myout;
539 *inbufsize = this->iobuf->insize;
540 *outbufsize = this->iobuf->outsize;
541
542 return (*inbufsize);
543}
544
545void
546adeTWNiconv(TWNiconv *this) {
547 zdeConversion(this->conv);
548 ydeGxCntl(this->cntl);
549 xdeIOBuf(this->iobuf);
550 free(this);
551}
552
553void
554areset(TWNiconv *this) {
555 zdeConversion(this->conv);
556 ydeGxCntl(this->cntl);
557 xdeIOBuf(this->iobuf);
558 this->conv = zConversion();
559 this->cntl = yGxCntl(this->conv);
560 this->iobuf = xIOBuf();
561}
562
563Conversion *
564zConversion() {
565 Conversion *ret = (Conversion *) malloc(sizeof (Conversion));
566 if (ret == NULL)
567 return (NULL);
568 ret->myplane = 0;
569 return (ret);
570}
571
572void
573zdeConversion(Conversion *this) { free(this); }
574
575void
576zsetplane(Conversion *this, int i) { this->myplane = i; }
577
578int
579zconversion(Conversion *this, IOBuf *ioobj) {
580 int c1, c2, c;
581
582 switch (this->myplane) {
583
584 case 0:
585 if (xoutsize(ioobj) < 1)
586 return (-1);
587
588 if ((c = xgetc(ioobj)) == -1)
589 return (-1);
590 xputc(ioobj, c);
591 return (0);
592 case 1:
593 if (xoutsize(ioobj) < 2)
594 return (-1);
595
596 if ((c1 = xgetc(ioobj)) == -1)
597 return (-1);
598 if ((c2 = xgetc(ioobj)) == -1) {
599 xbackup(ioobj, c1);
600 return (-1);
601 }
602 xputc(ioobj, c1 | MSB);
603 xputc(ioobj, c2 | MSB);
604 return (0);
605 default: /* plane 2 to 15 */
606 if (xoutsize(ioobj) < 4)
607 return (-1);
608
609 if ((c1 = xgetc(ioobj)) == -1)
610 return (-1);
611 if ((c2 = xgetc(ioobj)) == -1) {
612 xbackup(ioobj, c1);
613 return (-1);
614 }
615 xputc(ioobj, 0x8e);
616 xputc(ioobj, 0xa0 + this->myplane);
617 xputc(ioobj, c1 | MSB);
618 xputc(ioobj, c2 | MSB);
619 return (0);
620 }
621}
622
623GxCntl *
624yGxCntl(Conversion *obj) {
625 GxCntl *ret = (GxCntl *) malloc(sizeof (GxCntl));
626 if (ret == NULL)
627 return (NULL);
628
629 ret->convobj = obj;
630 ret->gxplane[0] = ret->gxplane[1] = ret->gxplane[2] =
631 ret->gxplane[3] = 0;
632 ret->inHLE1xConv = 0;
633 return (ret);
634}
635
636void
637ydeGxCntl(GxCntl *this) {
638 free(this);
639}
640
641int
642yescSeq(GxCntl *this, IOBuf *obj) {
643 int c = xgetc(obj);
644
645 if (c == -1)
646 return (-1);
647
648 switch (c) {
649 case ESC:
650 break;
651 case SI:
652 zsetplane(this->convobj, this->gxplane[0]);
653 if (this->inHLE1xConv == 1)
654 this->inHLE1xSO = 0;
655 return (1);
656 case SO:
657 if (this->inHLE1xConv == 1) {
658 if (this->inHLE1xSO != 0) {
659 xbackup(obj, SO);
660 return (0);
661 } else
662 this->inHLE1xSO = 1;
663
664 }
665 zsetplane(this->convobj, this->gxplane[1]);
666 return (1);
667 default:
668 xbackup(obj, c);
669 return (0);
670 }
671
672 if ((c = xgetc(obj)) == -1) {
673 xbackup(obj, ESC);
674 return (1);
675 }
676
677 switch (c) {
678
679 case LSG2:
680 zsetplane(this->convobj, this->gxplane[2]);
681 return (1);
682 case LSG3:
683 zsetplane(this->convobj, this->gxplane[3]);
684 return (1);
685 case '$':
686 break;
687 case '(':
688 if (xgetc(obj) != -1) {
689 this->gxplane[0] = 0;
690 break;
691 }
692 /* else fall through */
693 default:
694 xbackup(obj, c);
695 xbackup(obj, ESC);
696 return (0);
697 }
698
699 if ((this->gxc = xgetc(obj)) == -1) {
700 xbackup(obj, '$');
701 xbackup(obj, ESC);
702 return (-1);
703 }
704
705 switch (this->gxc) {
706
707 case '(':
708 this->mygx = 0;
709 break;
710 case ')':
711 this->mygx = 1;
712 break;
713 case '*':
714 this->mygx = 2;
715 break;
716 case '+':
717 this->mygx = 3;
718 break;
719 default:
720 xbackup(obj, this->gxc);
721 xbackup(obj, '$');
722 xbackup(obj, ESC);
723 return (0);
724 }
725
726 if ((c = xgetc(obj)) == -1) {
727 xbackup(obj, this->gxc);
728 xbackup(obj, '$');
729 xbackup(obj, ESC);
730 return (-1);
731 }
732
733 if (c == '0' && this->mygx == 1) { /* HLE 1.x */
734 this->inHLE1xConv = 1;
735 this->inHLE1xSO = 0;
736 this->gxplane[1] = 1;
737 } else {
738 this->inHLE1xConv = 0;
739 this->gxplane[this->mygx] = ygetplaneno(this, c);
740 }
741 return (1);
742}
743
744int
745ygetplaneno(GxCntl *dummy, char c) {
746 if (c == 'G')
747 return (1);
748 else if (c == 'H')
749 return (2);
750 else
751 return (c - '0' + 1);
752}
753
754IOBuf *
755xIOBuf() {
756 IOBuf *ret = (IOBuf *) malloc(sizeof (IOBuf));
757 if (ret == NULL)
758 return (NULL);
759 ret->bufc = 0;
760 return (ret);
761}
762
763void
764xdeIOBuf(IOBuf *this) {
765 free(this);
766}
767
768int
769xgetc(IOBuf *this) {
770 if (this->bufc > 0)
771 return (this->mybuf[--this->bufc]);
772
773 if (this->insize == 0)
774 return (-1);
775 else {
776 this->insize--;
777 return (*this->myin++);
778 }
779}
780
781int
782xputc(IOBuf *this, int c) {
783 if (this->outsize <= 0)
784 return (-1);
785 *(this->myout)++ = c;
786 this->outsize--;
787 return (0);
788}
789
790void
791xbackup(IOBuf *this, int c) { this->mybuf[this->bufc++] = c; }
792
793int
794xoutsize(IOBuf *this) { return (this->outsize); }