| /* |
| * Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| /* Copyright 1984, 1986, 1987, 1988, 1989 AT&T */ |
| /* All Rights Reserved */ |
| |
| /* |
| * Copyright 1980 Regents of the University of California. |
| * All rights reserved. The Berkeley software License Agreement |
| * specifies the terms and conditions for redistribution. |
| */ |
| |
| #pragma ident "%Z%%M% %I% %E% SMI" |
| |
| /* |
| * Get name sections from manual pages. |
| * -t for building toc |
| * -i for building intro entries |
| * other apropos database |
| */ |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <stdarg.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <limits.h> |
| #include <locale.h> |
| #include <wchar.h> |
| #include <errno.h> |
| #include <sys/param.h> |
| |
| #define PLEN 3 /* prefix length "man" */ |
| |
| static char path[MAXPATHLEN+1]; |
| static int tocrc; |
| static int intro; |
| static char *progname; |
| |
| static void trimln(char *); |
| static void roff_trim(char *cp); |
| static void doname(char *); |
| static void section(char *, char *); |
| static void split(char *, char *); |
| static void dorefname(char *); |
| static void troffpage(char *); |
| static void sgmlpage(char *); |
| |
| /* |
| * Test to see if this is an SGML manpage or a regular manpage |
| * Unless the first line begins with <!DOCTYPE, we assume it isn't. |
| */ |
| static int |
| issgml(FILE *fp) |
| { |
| static const char magic[] = "<!DOCTYPE"; |
| char buf[sizeof (magic)]; |
| size_t n = sizeof (magic) - 1; |
| |
| if (read(fileno(fp), buf, n) != n || |
| lseek(fileno(fp), 0, SEEK_SET) != 0) |
| return (0); |
| return (strncmp(magic, buf, n) == 0); |
| } |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| int c; |
| |
| (void) setlocale(LC_ALL, ""); |
| |
| progname = argv[0]; |
| |
| while ((c = getopt(argc, argv, "it")) != EOF) |
| switch (c) { |
| case 't': |
| tocrc++; |
| break; |
| case 'i': |
| intro++; |
| break; |
| case '?': |
| default: |
| (void) fprintf(stderr, |
| "usage: %s [-i][-t] files..\n", progname); |
| exit(1); |
| } |
| |
| if (getcwd(path, sizeof (path)) == NULL) { |
| (void) fprintf(stderr, "%s: getcwd: %s\n", progname, path); |
| exit(1); |
| } |
| |
| for (; optind < argc; optind++) { |
| char *name = argv[optind]; |
| |
| if (freopen(name, "r", stdin) == 0) { |
| (void) fprintf(stderr, |
| "%s: %s: %s\n", progname, name, strerror(errno)); |
| continue; |
| } |
| |
| /* |
| * Most of the info we care about is in the first kbyte |
| */ |
| (void) setvbuf(stdin, NULL, _IOFBF, 1024); |
| |
| if (issgml(stdin)) |
| sgmlpage(name); |
| else |
| troffpage(name); |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * Parse a troff-format manpage |
| */ |
| static void |
| troffpage(char *name) |
| { |
| char headbuf[BUFSIZ]; |
| char linbuf[BUFSIZ]; |
| char *strptr; |
| int i = 0; |
| |
| for (;;) { |
| if (fgets(headbuf, sizeof (headbuf), stdin) == NULL) |
| return; |
| if (headbuf[0] != '.') |
| continue; |
| if (headbuf[1] == 'T' && headbuf[2] == 'H') |
| break; |
| if (headbuf[1] == 't' && headbuf[2] == 'h') |
| break; |
| } |
| for (;;) { |
| if (fgets(linbuf, sizeof (linbuf), stdin) == NULL) |
| return; |
| if (linbuf[0] != '.') |
| continue; |
| if (linbuf[1] == 'S' && linbuf[2] == 'H') |
| break; |
| if (linbuf[1] == 's' && linbuf[2] == 'h') |
| break; |
| } |
| trimln(headbuf); |
| if (tocrc) |
| doname(name); |
| if (!intro) |
| section(name, headbuf); |
| for (;;) { |
| if (fgets(linbuf, sizeof (linbuf), stdin) == NULL) |
| break; |
| if (linbuf[0] == '.') { |
| if (linbuf[1] == 'S' && linbuf[2] == 'H') |
| break; |
| if (linbuf[1] == 's' && linbuf[2] == 'h') |
| break; |
| if (linbuf[1] == '\\' && linbuf[2] == '"') |
| continue; |
| } |
| trimln(linbuf); |
| roff_trim(linbuf); |
| if (intro) { |
| split(linbuf, name); |
| continue; |
| } |
| if (i != 0) |
| (void) printf(" "); |
| i++; |
| (void) printf("%s", linbuf); |
| } |
| (void) printf("\n"); |
| } |
| |
| |
| /* |
| * Substitute section defined in page with new section spec |
| * of the form xx/yy where xx is the section suffix of the |
| * directory and yy is the filename extension (unless xx |
| * and yy are equal, in which case xx is the section). |
| * Pages should be placed in their proper directory with the |
| * proper name to simplify things. |
| * |
| * For example take the following names: |
| * man1/ar.1v (1/1V) |
| * man1/find.1 (1) |
| * man1/loco (1/) |
| * |
| */ |
| static void |
| section(char *name, char *buf) |
| { |
| char scratch[MAXPATHLEN+1]; |
| char *p = buf; |
| char *dir, *fname; |
| char *dp, *np; |
| int i; |
| int plen = PLEN; |
| |
| /* |
| * split dirname and filename |
| */ |
| (void) strcpy(scratch, name); |
| if ((fname = strrchr(scratch, '/')) == NULL) { |
| fname = name; |
| dir = path; |
| } else { |
| dir = scratch; |
| *fname = 0; |
| fname++; |
| } |
| dp = strrchr(dir, '/'); |
| |
| if (*(dp+1) == 's') |
| plen = PLEN + 1; |
| |
| if (dp != NULL) { |
| dp = dp+plen+1; |
| } else { |
| dp = dir+plen; |
| } |
| np = strrchr(fname, '.'); |
| if (np != NULL) { |
| ++np; |
| } else { |
| np = ""; |
| } |
| for (i = 0; i < 2; i++) { |
| while (*p && *p != ' ' && *p != '\t') |
| p++; |
| if (!*p) |
| break; |
| while (*p && (*p == ' ' || *p == '\t')) |
| p++; |
| if (!*p) |
| break; |
| } |
| *p++ = 0; |
| (void) printf("%s", buf); |
| if (strcmp(np, dp) == 0) |
| (void) printf("%s", dp); |
| else |
| (void) printf("%s/%s", dp, np); |
| while (*p && *p != ' ' && *p != '\t') |
| p++; |
| (void) printf("%s\t", p); |
| } |
| |
| static void |
| trimln(char *cp) |
| { |
| while (*cp) |
| cp++; |
| if (*--cp == '\n') |
| *cp = 0; |
| } |
| |
| static void |
| roff_trim(char *cp) |
| { |
| if (*cp == '.') { |
| while ((*cp != ' ') && (*cp != '\0')) { |
| strcpy(cp, cp+1); |
| } |
| strcpy(cp, cp+1); |
| } |
| while (*cp) { |
| if (strncmp(cp, "\\f", 2) == 0) { |
| if ((*(cp+2) >= 48) && (*(cp+2) <= 57)) { |
| strcpy(cp, cp+3); |
| } |
| if (*(cp+2) == '(') { |
| strcpy(cp, cp+5); |
| } |
| } |
| cp++; |
| } |
| } |
| |
| static void |
| doname(char *name) |
| { |
| char *dp = name, *ep; |
| |
| again: |
| while (*dp && *dp != '.') |
| (void) putchar(*dp++); |
| if (*dp) |
| for (ep = dp+1; *ep; ep++) |
| if (*ep == '.') { |
| (void) putchar(*dp++); |
| goto again; |
| } |
| (void) putchar('('); |
| if (*dp) |
| dp++; |
| while (*dp) |
| (void) putchar(*dp++); |
| (void) putchar(')'); |
| (void) putchar(' '); |
| } |
| |
| static void |
| split(char *line, char *name) |
| { |
| char *cp, *dp; |
| char *sp, *sep; |
| |
| cp = strchr(line, '-'); |
| if (cp == 0) |
| return; |
| sp = cp + 1; |
| for (--cp; *cp == ' ' || *cp == '\t' || *cp == '\\'; cp--) |
| ; |
| *++cp = '\0'; |
| while (*sp && (*sp == ' ' || *sp == '\t')) |
| sp++; |
| for (sep = "", dp = line; dp && *dp; dp = cp, sep = "\n") { |
| cp = strchr(dp, ','); |
| if (cp) { |
| char *tp; |
| |
| for (tp = cp - 1; *tp == ' ' || *tp == '\t'; tp--) |
| ; |
| *++tp = '\0'; |
| for (++cp; *cp == ' ' || *cp == '\t'; cp++) |
| ; |
| } |
| (void) printf("%s%s\t", sep, dp); |
| dorefname(name); |
| (void) printf("\t%s", sp); |
| } |
| } |
| |
| static void |
| dorefname(char *name) |
| { |
| char *dp = name, *ep; |
| |
| again: |
| while (*dp && *dp != '.') |
| (void) putchar(*dp++); |
| if (*dp) |
| for (ep = dp+1; *ep; ep++) |
| if (*ep == '.') { |
| (void) putchar(*dp++); |
| goto again; |
| } |
| (void) putchar('.'); |
| if (*dp) |
| dp++; |
| while (*dp) |
| (void) putchar(*dp++); |
| } |
| |
| /* |
| * The rest of the routines in the file form a simplistic parser |
| * for SGML manpages. We assume the input is syntactically correct |
| * SGML, and that the fields occur in the input file in order. |
| */ |
| |
| /* |
| * Some utilities for constructing arbitrary length wide character strings |
| */ |
| |
| typedef struct { |
| wchar_t *str; |
| size_t size; |
| long index; |
| } string_t; |
| |
| #define DEF_STR_SIZE 16 |
| #define DEF_STR_GROWTH 16 |
| |
| static void |
| outofspace(char *where) |
| { |
| (void) fprintf(stderr, "%s: '%s' - out of memory\n", progname, where); |
| exit(1); |
| } |
| |
| static string_t * |
| newstring(size_t initial) |
| { |
| string_t *s = malloc(sizeof (*s)); |
| |
| if (s == NULL) |
| outofspace("new s"); |
| |
| initial *= sizeof (wchar_t); |
| if (initial < DEF_STR_SIZE) |
| initial = DEF_STR_SIZE; |
| |
| s->str = malloc(initial); |
| if (s->str == NULL) |
| outofspace("new str"); |
| |
| s->size = initial; |
| s->index = 0; |
| *s->str = L'\0'; |
| return (s); |
| } |
| |
| static void |
| delstring(string_t **s) |
| { |
| free((*s)->str); |
| (*s)->str = NULL; |
| free(*s); |
| *s = NULL; |
| } |
| |
| static wchar_t * |
| getwstring(string_t *s) |
| { |
| static const wchar_t wnull = L'\0'; |
| |
| if (s) |
| return (s->str); |
| return ((wchar_t *)&wnull); |
| } |
| |
| static char * |
| getcstring(string_t *s) |
| { |
| size_t len = (wcslen(s->str) + 1) * MB_CUR_MAX; |
| char *cstr = malloc(len); |
| char *p = cstr; |
| wchar_t *wp = s->str; |
| |
| if (p == NULL) |
| outofspace("getc"); |
| while (*wp) |
| p += wctomb(p, *wp++); |
| *p = '\0'; |
| return (cstr); |
| } |
| |
| static void |
| appendwstring(string_t *s, const wchar_t *str) |
| { |
| size_t len = wcslen(str) + 1; |
| |
| s->size += sizeof (wchar_t) * len; |
| s->str = realloc(s->str, s->size); |
| if (s->str == NULL) |
| outofspace("appendw"); |
| (void) wcscat(s->str, str); |
| s->index = wcslen(s->str) + 1; |
| } |
| |
| static void |
| putwstring(string_t *s, wchar_t wc) |
| { |
| if ((s->index + 1) * sizeof (wchar_t) >= s->size) { |
| s->size += DEF_STR_GROWTH; |
| s->str = realloc(s->str, s->size); |
| if (s->str == NULL) |
| outofspace("put"); |
| } |
| s->str[s->index++] = wc; |
| } |
| |
| /* |
| * Find the closing > of an SGML comment block |
| * (allowing for multibyte, embedded, comments) |
| */ |
| static void |
| eatcomments(void) |
| { |
| int pending = 1; |
| |
| while (pending) |
| switch (getwchar()) { |
| default: |
| break; |
| case L'<': |
| pending++; |
| break; |
| case L'>': |
| pending--; |
| break; |
| case WEOF: |
| return; |
| } |
| } |
| |
| /* |
| * Find the next token on stdin. |
| * Handles nested comment strings, and removes any trailing newlines |
| * from the stream after the closing '>'. |
| */ |
| static int |
| find_token(char *tokbuf, size_t tokbuflen) |
| { |
| int c; |
| wint_t wc; |
| char *tokp; |
| |
| top: |
| while ((wc = getwchar()) != WEOF) |
| if (wc == L'<') |
| break; |
| |
| if (wc == WEOF && errno == EILSEQ) |
| return (0); |
| |
| switch (c = getchar()) { |
| case EOF: |
| return (0); |
| default: |
| (void) ungetc(c, stdin); |
| break; |
| case '!': |
| eatcomments(); |
| goto top; |
| } |
| |
| tokp = tokbuf; |
| |
| while ((c = getchar()) != EOF) { |
| if (c == '>') { |
| while ((c = getchar()) != EOF) |
| if (c != '\n') { |
| (void) ungetc(c, stdin); |
| break; |
| } |
| *tokp = '\0'; |
| return (1); |
| } |
| if (tokp - tokbuf < tokbuflen) |
| *tokp++ = (char)c; |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * This structure is filled out during the parsing of each page we encounter |
| */ |
| typedef struct { |
| char *name; |
| string_t *title; |
| string_t *volnum; |
| string_t *date; |
| string_t *names; |
| string_t *purpose; |
| } manpage_t; |
| |
| static void |
| warning(manpage_t *m, const char *fmt, ...) |
| { |
| va_list ap; |
| va_start(ap, fmt); |
| (void) fprintf(stderr, "%s: %s - ", progname, m->name); |
| (void) vfprintf(stderr, fmt, ap); |
| va_end(ap); |
| } |
| |
| /* |
| * Fetch a string from stdin, terminated by the endtoken. |
| * These strings may be localized, so do this with wide characters. |
| * Hack: skip over (completely ignore) all other tokens |
| * Hack: map all &blort; constructs to spaces. |
| */ |
| static string_t * |
| filestring(manpage_t *m, size_t initial, char *endtoken) |
| { |
| char tokbuf[BUFSIZ * MB_LEN_MAX]; |
| string_t *s = newstring(initial); |
| wint_t wc; |
| |
| while ((wc = getwchar()) != WEOF) |
| switch (wc) { |
| case L'\n': |
| if ((wc = getwchar()) != WEOF) |
| (void) ungetwc(wc, stdin); |
| if (wc != L'<') |
| putwstring(s, L' '); |
| break; |
| case L'<': |
| (void) ungetwc(wc, stdin); |
| if (!find_token(tokbuf, sizeof (tokbuf)) || |
| strcasecmp(endtoken, tokbuf) == 0) |
| goto done; |
| break; |
| case L'&': |
| while ((wc = getwchar()) != WEOF) |
| if (wc == L';') |
| break; |
| wc = L' '; |
| /* FALLTHROUGH */ |
| default: |
| putwstring(s, wc); |
| break; |
| } |
| |
| if (errno == EILSEQ) |
| warning(m, "%s while parsing %s\n", strerror(errno), endtoken); |
| done: |
| putwstring(s, L'\0'); |
| return (s); |
| } |
| |
| /* |
| * <refentrytitle> TITLE </refentrytitle> |
| */ |
| static int |
| refentrytitle(manpage_t *m) |
| { |
| if (m->title != NULL) |
| warning(m, "repeated refentrytitle\n"); |
| m->title = filestring(m, 8, "/refentrytitle"); |
| return (1); |
| } |
| |
| /* |
| * <manvolnum> MANVOLNUM </manvolnum> |
| */ |
| static int |
| manvolnum(manpage_t *m) |
| { |
| if (m->volnum != NULL) |
| warning(m, "repeated manvolnum\n"); |
| m->volnum = filestring(m, 3, "/manvolnum"); |
| return (1); |
| } |
| |
| /* |
| * <refmiscinfo class="date"> DATE </refmiscinfo> |
| */ |
| static int |
| refmiscinfo_date(manpage_t *m) |
| { |
| if (m->date != NULL) |
| warning(m, "repeated date\n"); |
| m->date = filestring(m, 11, "/refmiscinfo"); |
| return (1); |
| } |
| |
| /* |
| * .. </refmeta> |
| */ |
| static int |
| print_refmeta(manpage_t *m) |
| { |
| char headbuf[BUFSIZ]; |
| |
| (void) snprintf(headbuf, sizeof (headbuf), ".TH %ws %ws \"%ws\"", |
| getwstring(m->title), getwstring(m->volnum), getwstring(m->date)); |
| |
| trimln(headbuf); |
| if (tocrc) |
| doname(m->name); |
| if (!intro) |
| section(m->name, headbuf); |
| |
| if (m->title) |
| delstring(&m->title); |
| if (m->volnum) |
| delstring(&m->volnum); |
| if (m->date) |
| delstring(&m->date); |
| |
| return (1); |
| } |
| |
| static int |
| appendname(manpage_t *m, char *term) |
| { |
| string_t *r = filestring(m, 0, term); |
| |
| if (m->names) { |
| appendwstring(m->names, L", "); |
| appendwstring(m->names, getwstring(r)); |
| delstring(&r); |
| } else |
| m->names = r; |
| return (1); |
| } |
| |
| /* |
| * <refdescriptor> REFDESCRIPTOR </refdescriptor> |
| */ |
| static int |
| refdescriptor(manpage_t *m) |
| { |
| return (appendname(m, "/refdescriptor")); |
| } |
| |
| /* |
| * <refname> REFNAME </refname> |
| */ |
| static int |
| refname(manpage_t *m) |
| { |
| return (appendname(m, "/refname")); |
| } |
| |
| /* |
| * <refpurpose> PURPOSE </refpurpose> |
| */ |
| static int |
| refpurpose(manpage_t *m) |
| { |
| if (m->purpose != NULL) |
| warning(m, "repeated refpurpose\n"); |
| m->purpose = filestring(m, 0, "/refpurpose"); |
| return (1); |
| } |
| |
| /* |
| * .. </refnamediv> - this is our chance to bail out. |
| */ |
| static int |
| terminate(manpage_t *m) |
| { |
| if (m->names) { |
| appendwstring(m->names, L" \\- "); |
| appendwstring(m->names, getwstring(m->purpose)); |
| if (intro) { |
| char *buf = getcstring(m->names); |
| split(buf, m->name); |
| free(buf); |
| } else |
| (void) printf("%ws", getwstring(m->names)); |
| } |
| |
| if (m->names) |
| delstring(&m->names); |
| if (m->purpose) |
| delstring(&m->purpose); |
| |
| (void) printf("\n"); |
| return (0); |
| } |
| |
| |
| /* |
| * Basic control structure of the SGML "parser". |
| * It's very simplistic - when named tags are encountered in the |
| * input stream, control is transferred to the corresponding routine. |
| * No checking is done for correct pairing of tags. A few other hacks |
| * are sneaked into the lexical routines above. |
| * Output is generated after seeing the /refmeta and /refnamediv |
| * closing tags. |
| */ |
| static const struct { |
| char *name; |
| int (*action)(manpage_t *); |
| } acts[] = { |
| { "refentrytitle", refentrytitle }, |
| { "manvolnum", manvolnum }, |
| { "refmiscinfo class=\"date\"", refmiscinfo_date }, |
| { "/refmeta", print_refmeta }, |
| { "refdescriptor", refdescriptor }, |
| { "refname", refname }, |
| { "refpurpose", refpurpose }, |
| { "/refnamediv", terminate }, |
| { 0 } |
| }; |
| |
| static void |
| sgmlpage(char *name) |
| { |
| int rc = 1, a; |
| char tokbuf[BUFSIZ]; |
| manpage_t manpage, *m = &manpage; |
| |
| (void) memset(m, 0, sizeof (*m)); |
| m->name = name; |
| |
| do { |
| if (!find_token(tokbuf, sizeof (tokbuf))) |
| break; |
| for (a = 0; acts[a].name; a++) { |
| if (strcasecmp(acts[a].name, tokbuf) != 0) |
| continue; |
| rc = acts[a].action(m); |
| break; |
| } |
| } while (rc); |
| } |