| /* |
| * Copyright (c) 2002 John Rochester |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer, |
| * in this position and unchanged. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* |
| * Copyright 2012 Nexenta Systems, Inc. All rights reserved. |
| * Copyright 2014 Garrett D'Amore <garrett@damore.org> |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/param.h> |
| |
| #include <ctype.h> |
| #include <dirent.h> |
| #include <err.h> |
| #include <signal.h> |
| #include <stddef.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| |
| #include "man.h" |
| #include "stringlist.h" |
| |
| |
| /* Information collected about each man page in a section */ |
| struct page_info { |
| char *filename; |
| char *name; |
| char *suffix; |
| ino_t inode; |
| }; |
| |
| /* An expanding string */ |
| struct sbuf { |
| char *content; /* the start of the buffer */ |
| char *end; /* just past the end of the content */ |
| char *last; /* the last allocated character */ |
| }; |
| |
| /* Remove the last amount characters from the sbuf */ |
| #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount)) |
| /* Return the length of the sbuf content */ |
| #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content) |
| |
| typedef char *edited_copy(char *from, char *to, int length); |
| |
| /* |
| * While the whatis line is being formed, it is stored in whatis_proto. |
| * When finished, it is reformatted into whatis_final and then appended |
| * to whatis_lines. |
| */ |
| static struct sbuf *whatis_proto; |
| static struct sbuf *whatis_final; |
| static stringlist *whatis_lines; /* collected output lines */ |
| |
| static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */ |
| |
| #define MDOC_COMMANDS "ArDvErEvFlLiNmPa" |
| |
| |
| /* Free a struct page_info and its content */ |
| static void |
| free_page_info(struct page_info *info) |
| { |
| |
| free(info->filename); |
| free(info->name); |
| free(info->suffix); |
| free(info); |
| } |
| |
| /* |
| * Allocate and fill in a new struct page_info given the |
| * name of the man section directory and the dirent of the file. |
| * If the file is not a man page, return NULL. |
| */ |
| static struct page_info * |
| new_page_info(char *dir, struct dirent *dirent) |
| { |
| struct page_info *info; |
| int basename_length; |
| char *suffix; |
| struct stat st; |
| |
| if ((info = malloc(sizeof (struct page_info))) == NULL) |
| err(1, "malloc"); |
| basename_length = strlen(dirent->d_name); |
| suffix = &dirent->d_name[basename_length]; |
| if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1) |
| err(1, "asprintf"); |
| for (;;) { |
| if (--suffix == dirent->d_name || !isalnum(*suffix)) { |
| if (*suffix == '.') |
| break; |
| free(info->filename); |
| free(info); |
| return (NULL); |
| } |
| } |
| *suffix++ = '\0'; |
| info->name = strdup(dirent->d_name); |
| info->suffix = strdup(suffix); |
| if (stat(info->filename, &st) < 0) { |
| warn("%s", info->filename); |
| free_page_info(info); |
| return (NULL); |
| } |
| if (!S_ISREG(st.st_mode)) { |
| free_page_info(info); |
| return (NULL); |
| } |
| info->inode = st.st_ino; |
| return (info); |
| } |
| |
| /* |
| * Reset sbuf length to 0. |
| */ |
| static void |
| sbuf_clear(struct sbuf *sbuf) |
| { |
| |
| sbuf->end = sbuf->content; |
| } |
| |
| /* |
| * Allocate a new sbuf. |
| */ |
| static struct sbuf * |
| new_sbuf(void) |
| { |
| struct sbuf *sbuf; |
| |
| if ((sbuf = malloc(sizeof (struct sbuf))) == NULL) |
| err(1, "malloc"); |
| if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL) |
| err(1, "malloc"); |
| sbuf->last = sbuf->content + LINE_ALLOC - 1; |
| sbuf_clear(sbuf); |
| |
| return (sbuf); |
| } |
| |
| /* |
| * Ensure that there is enough room in the sbuf |
| * for nchars more characters. |
| */ |
| static void |
| sbuf_need(struct sbuf *sbuf, int nchars) |
| { |
| char *new_content; |
| size_t size, cntsize; |
| size_t grow = 128; |
| |
| while (grow < nchars) { |
| grow += 128; /* we grow in chunks of 128 bytes */ |
| } |
| |
| /* Grow if the buffer isn't big enough */ |
| if (sbuf->end + nchars > sbuf->last) { |
| size = sbuf->last + 1 - sbuf->content; |
| size += grow; |
| cntsize = sbuf->end - sbuf->content; |
| |
| if ((new_content = realloc(sbuf->content, size)) == NULL) { |
| perror("realloc"); |
| if (tempfile[0] != '\0') |
| (void) unlink(tempfile); |
| exit(1); |
| } |
| sbuf->content = new_content; |
| sbuf->end = new_content + cntsize; |
| sbuf->last = new_content + size - 1; |
| } |
| } |
| |
| /* |
| * Append a string of a given length to the sbuf. |
| */ |
| static void |
| sbuf_append(struct sbuf *sbuf, const char *text, int length) |
| { |
| if (length > 0) { |
| sbuf_need(sbuf, length); |
| (void) memcpy(sbuf->end, text, length); |
| sbuf->end += length; |
| } |
| } |
| |
| /* |
| * Append a null-terminated string to the sbuf. |
| */ |
| static void |
| sbuf_append_str(struct sbuf *sbuf, char *text) |
| { |
| |
| sbuf_append(sbuf, text, strlen(text)); |
| } |
| |
| /* |
| * Append an edited null-terminated string to the sbuf. |
| */ |
| static void |
| sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) |
| { |
| int length; |
| |
| if ((length = strlen(text)) > 0) { |
| sbuf_need(sbuf, length); |
| sbuf->end = copy(text, sbuf->end, length); |
| } |
| } |
| |
| /* |
| * Strip any of a set of chars from the end of the sbuf. |
| */ |
| static void |
| sbuf_strip(struct sbuf *sbuf, const char *set) |
| { |
| |
| while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) |
| sbuf->end--; |
| } |
| |
| /* |
| * Return the null-terminated string built by the sbuf. |
| */ |
| static char * |
| sbuf_content(struct sbuf *sbuf) |
| { |
| |
| *sbuf->end = '\0'; |
| return (sbuf->content); |
| } |
| |
| /* |
| * Return true if no man page exists in the directory with |
| * any of the names in the stringlist. |
| */ |
| static int |
| no_page_exists(char *dir, stringlist *names, char *suffix) |
| { |
| char path[MAXPATHLEN]; |
| char *suffixes[] = { "", ".gz", ".bz2", NULL }; |
| size_t i; |
| int j; |
| |
| for (i = 0; i < names->sl_cur; i++) { |
| for (j = 0; suffixes[j] != NULL; j++) { |
| (void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s", |
| dir, names->sl_str[i], suffix, suffixes[j]); |
| if (access(path, F_OK) == 0) { |
| return (0); |
| } |
| } |
| } |
| return (1); |
| } |
| |
| /* ARGSUSED sig */ |
| static void |
| trap_signal(int sig) |
| { |
| |
| if (tempfile[0] != '\0') |
| (void) unlink(tempfile); |
| |
| exit(1); |
| } |
| |
| /* |
| * Attempt to open an output file. |
| * Return NULL if unsuccessful. |
| */ |
| static FILE * |
| open_output(char *name) |
| { |
| FILE *output; |
| |
| whatis_lines = sl_init(); |
| (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name); |
| name = tempfile; |
| if ((output = fopen(name, "w")) == NULL) { |
| warn("%s", name); |
| return (NULL); |
| } |
| return (output); |
| } |
| |
| static int |
| linesort(const void *a, const void *b) |
| { |
| |
| return (strcmp((*(const char * const *)a), (*(const char * const *)b))); |
| } |
| |
| /* |
| * Write the unique sorted lines to the output file. |
| */ |
| static void |
| finish_output(FILE *output, char *name) |
| { |
| size_t i; |
| char *prev = NULL; |
| |
| qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *), |
| linesort); |
| for (i = 0; i < whatis_lines->sl_cur; i++) { |
| char *line = whatis_lines->sl_str[i]; |
| if (i > 0 && strcmp(line, prev) == 0) |
| continue; |
| prev = line; |
| (void) fputs(line, output); |
| (void) putc('\n', output); |
| } |
| (void) fclose(output); |
| sl_free(whatis_lines, 1); |
| (void) rename(tempfile, name); |
| (void) unlink(tempfile); |
| } |
| |
| static FILE * |
| open_whatis(char *mandir) |
| { |
| char filename[MAXPATHLEN]; |
| |
| (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); |
| return (open_output(filename)); |
| } |
| |
| static void |
| finish_whatis(FILE *output, char *mandir) |
| { |
| char filename[MAXPATHLEN]; |
| |
| (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); |
| finish_output(output, filename); |
| } |
| |
| /* |
| * Remove trailing spaces from a string, returning a pointer to just |
| * beyond the new last character. |
| */ |
| static char * |
| trim_rhs(char *str) |
| { |
| char *rhs; |
| |
| rhs = &str[strlen(str)]; |
| while (--rhs > str && isspace(*rhs)) |
| ; |
| *++rhs = '\0'; |
| return (rhs); |
| } |
| |
| /* |
| * Return a pointer to the next non-space character in the string. |
| */ |
| static char * |
| skip_spaces(char *s) |
| { |
| |
| while (*s != '\0' && isspace(*s)) |
| s++; |
| |
| return (s); |
| } |
| |
| /* |
| * Return whether the line is of one of the forms: |
| * .Sh NAME |
| * .Sh "NAME" |
| * etc. |
| * assuming that section_start is ".Sh". |
| */ |
| static int |
| name_section_line(char *line, const char *section_start) |
| { |
| char *rhs; |
| |
| if (strncmp(line, section_start, 3) != 0) |
| return (0); |
| line = skip_spaces(line + 3); |
| rhs = trim_rhs(line); |
| if (*line == '"') { |
| line++; |
| if (*--rhs == '"') |
| *rhs = '\0'; |
| } |
| if (strcmp(line, "NAME") == 0) |
| return (1); |
| |
| return (0); |
| } |
| |
| /* |
| * Copy characters while removing the most common nroff/troff markup: |
| * \(em, \(mi, \s[+-N], \& |
| * \fF, \f(fo, \f[font] |
| * \*s, \*(st, \*[stringvar] |
| */ |
| static char * |
| de_nroff_copy(char *from, char *to, int fromlen) |
| { |
| char *from_end = &from[fromlen]; |
| |
| while (from < from_end) { |
| switch (*from) { |
| case '\\': |
| switch (*++from) { |
| case '(': |
| if (strncmp(&from[1], "em", 2) == 0 || |
| strncmp(&from[1], "mi", 2) == 0) { |
| from += 3; |
| continue; |
| } |
| break; |
| case 's': |
| if (*++from == '-') |
| from++; |
| while (isdigit(*from)) |
| from++; |
| continue; |
| case 'f': |
| case '*': |
| if (*++from == '(') { |
| from += 3; |
| } else if (*from == '[') { |
| while (*++from != ']' && |
| from < from_end) |
| ; |
| from++; |
| } else { |
| from++; |
| } |
| continue; |
| case '&': |
| from++; |
| continue; |
| } |
| break; |
| } |
| *to++ = *from++; |
| } |
| return (to); |
| } |
| |
| /* |
| * Append a string with the nroff formatting removed. |
| */ |
| static void |
| add_nroff(char *text) |
| { |
| |
| sbuf_append_edited(whatis_proto, text, de_nroff_copy); |
| } |
| |
| /* |
| * Appends "name(suffix), " to whatis_final |
| */ |
| static void |
| add_whatis_name(char *name, char *suffix) |
| { |
| |
| if (*name != '\0') { |
| sbuf_append_str(whatis_final, name); |
| sbuf_append(whatis_final, "(", 1); |
| sbuf_append_str(whatis_final, suffix); |
| sbuf_append(whatis_final, "), ", 3); |
| } |
| } |
| |
| /* |
| * Processes an old-style man(7) line. This ignores commands with only |
| * a single number argument. |
| */ |
| static void |
| process_man_line(char *line) |
| { |
| char *p; |
| |
| if (*line == '.') { |
| while (isalpha(*++line)) |
| ; |
| p = line = skip_spaces(line); |
| while (*p != '\0') { |
| if (!isdigit(*p)) |
| break; |
| p++; |
| } |
| if (*p == '\0') |
| return; |
| } else |
| line = skip_spaces(line); |
| if (*line != '\0') { |
| add_nroff(line); |
| sbuf_append(whatis_proto, " ", 1); |
| } |
| } |
| |
| /* |
| * Processes a new-style mdoc(7) line. |
| */ |
| static void |
| process_mdoc_line(char *line) |
| { |
| int xref; |
| int arg = 0; |
| char *line_end = &line[strlen(line)]; |
| int orig_length = sbuf_length(whatis_proto); |
| char *next; |
| |
| if (*line == '\0') |
| return; |
| if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { |
| add_nroff(skip_spaces(line)); |
| sbuf_append(whatis_proto, " ", 1); |
| return; |
| } |
| xref = strncmp(line, ".Xr", 3) == 0; |
| line += 3; |
| while ((line = skip_spaces(line)) < line_end) { |
| if (*line == '"') { |
| next = ++line; |
| for (;;) { |
| next = strchr(next, '"'); |
| if (next == NULL) |
| break; |
| (void) memmove(next, next + 1, strlen(next)); |
| line_end--; |
| if (*next != '"') |
| break; |
| next++; |
| } |
| } else { |
| next = strpbrk(line, " \t"); |
| } |
| if (next != NULL) |
| *next++ = '\0'; |
| else |
| next = line_end; |
| if (isupper(*line) && islower(line[1]) && line[2] == '\0') { |
| if (strcmp(line, "Ns") == 0) { |
| arg = 0; |
| line = next; |
| continue; |
| } |
| if (strstr(line, MDOC_COMMANDS) != NULL) { |
| line = next; |
| continue; |
| } |
| } |
| if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { |
| if (xref) { |
| sbuf_append(whatis_proto, "(", 1); |
| add_nroff(line); |
| sbuf_append(whatis_proto, ")", 1); |
| xref = 0; |
| } else { |
| sbuf_append(whatis_proto, " ", 1); |
| } |
| } |
| add_nroff(line); |
| arg++; |
| line = next; |
| } |
| if (sbuf_length(whatis_proto) > orig_length) |
| sbuf_append(whatis_proto, " ", 1); |
| } |
| |
| /* |
| * Collect a list of comma-separated names from the text. |
| */ |
| static void |
| collect_names(stringlist *names, char *text) |
| { |
| char *arg; |
| |
| for (;;) { |
| arg = text; |
| text = strchr(text, ','); |
| if (text != NULL) |
| *text++ = '\0'; |
| (void) sl_add(names, arg); |
| if (text == NULL) |
| return; |
| if (*text == ' ') |
| text++; |
| } |
| } |
| |
| enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; |
| |
| /* |
| * Process a man page source into a single whatis line and add it |
| * to whatis_lines. |
| */ |
| static void |
| process_page(struct page_info *page, char *section_dir) |
| { |
| FILE *fp; |
| stringlist *names; |
| char *descr; |
| int state = STATE_UNKNOWN; |
| size_t i; |
| char *line = NULL; |
| size_t linecap = 0; |
| |
| sbuf_clear(whatis_proto); |
| if ((fp = fopen(page->filename, "r")) == NULL) { |
| warn("%s", page->filename); |
| return; |
| } |
| while (getline(&line, &linecap, fp) > 0) { |
| /* Skip comments */ |
| if (strncmp(line, ".\\\"", 3) == 0) |
| continue; |
| switch (state) { |
| /* Haven't reached the NAME section yet */ |
| case STATE_UNKNOWN: |
| if (name_section_line(line, ".SH")) |
| state = STATE_MANSTYLE; |
| else if (name_section_line(line, ".Sh")) |
| state = STATE_MDOCNAME; |
| continue; |
| /* Inside an old-style .SH NAME section */ |
| case STATE_MANSTYLE: |
| if (strncmp(line, ".SH", 3) == 0 || |
| strncmp(line, ".SS", 3) == 0) |
| break; |
| (void) trim_rhs(line); |
| if (strcmp(line, ".") == 0) |
| continue; |
| if (strncmp(line, ".IX", 3) == 0) { |
| line += 3; |
| line = skip_spaces(line); |
| } |
| process_man_line(line); |
| continue; |
| /* Inside a new-style .Sh NAME section (the .Nm part) */ |
| case STATE_MDOCNAME: |
| (void) trim_rhs(line); |
| if (strncmp(line, ".Nm", 3) == 0) { |
| process_mdoc_line(line); |
| continue; |
| } else { |
| if (strcmp(line, ".") == 0) |
| continue; |
| sbuf_append(whatis_proto, "- ", 2); |
| state = STATE_MDOCDESC; |
| } |
| /* FALLTHROUGH */ |
| /* Inside a new-style .Sh NAME section (after the .Nm-s) */ |
| case STATE_MDOCDESC: |
| if (strncmp(line, ".Sh", 3) == 0) |
| break; |
| (void) trim_rhs(line); |
| if (strcmp(line, ".") == 0) |
| continue; |
| process_mdoc_line(line); |
| continue; |
| } |
| break; |
| } |
| (void) fclose(fp); |
| sbuf_strip(whatis_proto, " \t.-"); |
| line = sbuf_content(whatis_proto); |
| /* |
| * Line now contains the appropriate data, but without the |
| * proper indentation or the section appended to each name. |
| */ |
| descr = strstr(line, " - "); |
| if (descr == NULL) { |
| descr = strchr(line, ' '); |
| if (descr == NULL) |
| return; |
| *descr++ = '\0'; |
| } else { |
| *descr = '\0'; |
| descr += 3; |
| } |
| names = sl_init(); |
| collect_names(names, line); |
| sbuf_clear(whatis_final); |
| if (!sl_find(names, page->name) && |
| no_page_exists(section_dir, names, page->suffix)) { |
| /* |
| * Add the page name since that's the only |
| * thing that man(1) will find. |
| */ |
| add_whatis_name(page->name, page->suffix); |
| } |
| for (i = 0; i < names->sl_cur; i++) |
| add_whatis_name(names->sl_str[i], page->suffix); |
| sl_free(names, 0); |
| /* Remove last ", " */ |
| sbuf_retract(whatis_final, 2); |
| while (sbuf_length(whatis_final) < INDENT) |
| sbuf_append(whatis_final, " ", 1); |
| sbuf_append(whatis_final, " - ", 3); |
| sbuf_append_str(whatis_final, skip_spaces(descr)); |
| (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); |
| } |
| |
| /* |
| * Sort pages first by inode number, then by name. |
| */ |
| static int |
| pagesort(const void *a, const void *b) |
| { |
| const struct page_info *p1 = *(struct page_info * const *) a; |
| const struct page_info *p2 = *(struct page_info * const *) b; |
| |
| if (p1->inode == p2->inode) |
| return (strcmp(p1->name, p2->name)); |
| |
| return (p1->inode - p2->inode); |
| } |
| |
| /* |
| * Process a single man section. |
| */ |
| static void |
| process_section(char *section_dir) |
| { |
| struct dirent **entries; |
| int nentries; |
| struct page_info **pages; |
| int npages = 0; |
| int i; |
| ino_t prev_inode = 0; |
| |
| /* Scan the man section directory for pages */ |
| nentries = scandir(section_dir, &entries, NULL, alphasort); |
| |
| /* Collect information about man pages */ |
| pages = (struct page_info **)calloc(nentries, |
| sizeof (struct page_info *)); |
| for (i = 0; i < nentries; i++) { |
| struct page_info *info = new_page_info(section_dir, entries[i]); |
| if (info != NULL) |
| pages[npages++] = info; |
| free(entries[i]); |
| } |
| free(entries); |
| qsort(pages, npages, sizeof (struct page_info *), pagesort); |
| |
| /* Process each unique page */ |
| for (i = 0; i < npages; i++) { |
| struct page_info *page = pages[i]; |
| if (page->inode != prev_inode) { |
| prev_inode = page->inode; |
| process_page(page, section_dir); |
| } |
| free_page_info(page); |
| } |
| free(pages); |
| } |
| |
| /* |
| * Return whether the directory entry is a man page section. |
| */ |
| static int |
| select_sections(const struct dirent *entry) |
| { |
| const char *p = &entry->d_name[3]; |
| |
| if (strncmp(entry->d_name, "man", 3) != 0) |
| return (0); |
| while (*p != '\0') { |
| if (!isalnum(*p++)) |
| return (0); |
| } |
| return (1); |
| } |
| |
| /* |
| * Process a single top-level man directory by finding all the |
| * sub-directories named man* and processing each one in turn. |
| */ |
| void |
| mwpath(char *path) |
| { |
| FILE *fp = NULL; |
| struct dirent **entries; |
| int nsections; |
| int i; |
| |
| (void) signal(SIGINT, trap_signal); |
| (void) signal(SIGHUP, trap_signal); |
| (void) signal(SIGQUIT, trap_signal); |
| (void) signal(SIGTERM, trap_signal); |
| |
| whatis_proto = new_sbuf(); |
| whatis_final = new_sbuf(); |
| |
| nsections = scandir(path, &entries, select_sections, alphasort); |
| if ((fp = open_whatis(path)) == NULL) |
| return; |
| for (i = 0; i < nsections; i++) { |
| char section_dir[MAXPATHLEN]; |
| |
| (void) snprintf(section_dir, MAXPATHLEN, "%s/%s", |
| path, entries[i]->d_name); |
| process_section(section_dir); |
| free(entries[i]); |
| } |
| free(entries); |
| finish_whatis(fp, path); |
| } |