| /* |
| * This file and its contents are supplied under the terms of the |
| * Common Development and Distribution License ("CDDL"), version 1.0. |
| * You may only use this file in accordance with the terms of version |
| * 1.0 of the CDDL. |
| * |
| * A full copy of the text of the CDDL should have accompanied this |
| * source. A copy of the CDDL is also available via the Internet at |
| * http://www.illumos.org/license/CDDL. |
| */ |
| |
| /* |
| * Copyright 2017 Joyent, Inc. |
| */ |
| |
| /* |
| * Support for the signalfd facility, a Linux-borne facility for |
| * file descriptor-based synchronous signal consumption. |
| * |
| * As described on the signalfd(3C) man page, the general idea behind these |
| * file descriptors is that they can be used to synchronously consume signals |
| * via the read(2) syscall. While that capability already exists with the |
| * sigwaitinfo(3C) function, signalfd holds an advantage since it is file |
| * descriptor based: It is able use the event facilities (poll(2), /dev/poll, |
| * event ports) to notify interested parties when consumable signals arrive. |
| * |
| * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor |
| * will be allocated for them along with an associated signalfd_state_t struct. |
| * It is there where the mask of desired signals resides. |
| * |
| * Reading from the signalfd is straightforward and mimics the kernel behavior |
| * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or |
| * thread's t_sig, member. During a read operation, those which match the mask |
| * are consumed so they are no longer pending. |
| * |
| * The poll side is more complex. Every time a signal is delivered, all of the |
| * signalfds on the process need to be examined in order to pollwake threads |
| * waiting for signal arrival. |
| * |
| * When a thread polling on a signalfd requires a pollhead, several steps must |
| * be taken to safely ensure the proper result. A sigfd_proc_state_t is |
| * created for the calling process if it does not yet exist. It is there where |
| * a list of sigfd_poll_waiter_t structures reside which associate pollheads to |
| * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a |
| * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the |
| * polled resource. If one is found, it is reused. Otherwise a new one is |
| * created, incrementing the refcount on the signalfd_state_t, and it is added |
| * to the sigfd_poll_waiter_t list. |
| * |
| * The complications imposed by fork(2) are why the pollhead is stored in the |
| * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t. |
| * More than one process can hold a reference to the signalfd at a time but |
| * arriving signals should wake only process-local pollers. Additionally, |
| * signalfd_close is called only when the last referencing fd is closed, hiding |
| * occurrences of preceeding threads which released their references. This |
| * necessitates reference counting on the signalfd_state_t so it is able to |
| * persist after close until all poll references have been cleansed. Doing so |
| * ensures that blocked pollers which hold references to the signalfd_state_t |
| * will be able to do clean-up after the descriptor itself has been closed. |
| * |
| * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb |
| * is called via the pointer in sigfd_proc_state_t. It will walk over the |
| * sigfd_poll_waiter_t entries present in the list, searching for any |
| * associated with a signalfd_state_t with a matching signal mask. The |
| * approach of keeping the poller list in p_sigfd was chosen because a process |
| * is likely to use few signalfds relative to its total file descriptors. It |
| * reduces the work required for each received signal. |
| * |
| * When matching sigfd_poll_waiter_t entries are encountered in the poller list |
| * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to |
| * perform the pollwake. This is due to a lock ordering conflict between |
| * signalfd_poll and signalfd_pollwake_cb. The former acquires |
| * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc) |
| * reverses the order. Defering the pollwake into a taskq means it can be |
| * performed without proc_t`p_lock held, avoiding the deadlock. |
| * |
| * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list |
| * will clear out on its own. Any remaining per-process state which remains |
| * will be cleaned up by the exit helper (signalfd_exit_helper). |
| * |
| * The structures associated with signalfd state are designed to operate |
| * correctly across fork, but there is one caveat that applies. Using |
| * fork-shared signalfd descriptors in conjuction with fork-shared caching poll |
| * descriptors (such as /dev/poll or event ports) will result in missed poll |
| * wake-ups. This is caused by the pollhead identity of signalfd descriptors |
| * being dependent on the process they are polled from. Because it has a |
| * thread-local cache, poll(2) is unaffected by this limitation. |
| * |
| * Lock ordering: |
| * |
| * 1. signalfd_lock |
| * 2. signalfd_state_t`sfd_lock |
| * |
| * 1. proc_t`p_lock (to walk p_sigfd) |
| * 2. signalfd_state_t`sfd_lock |
| * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0) |
| */ |
| |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/signalfd.h> |
| #include <sys/conf.h> |
| #include <sys/sysmacros.h> |
| #include <sys/filio.h> |
| #include <sys/stat.h> |
| #include <sys/file.h> |
| #include <sys/schedctl.h> |
| #include <sys/id_space.h> |
| #include <sys/sdt.h> |
| #include <sys/disp.h> |
| #include <sys/taskq_impl.h> |
| |
| typedef struct signalfd_state signalfd_state_t; |
| |
| struct signalfd_state { |
| list_node_t sfd_list; /* node in global list */ |
| kmutex_t sfd_lock; /* protects fields below */ |
| uint_t sfd_count; /* ref count */ |
| boolean_t sfd_valid; /* valid while open */ |
| k_sigset_t sfd_set; /* signals for this fd */ |
| }; |
| |
| typedef struct sigfd_poll_waiter { |
| list_node_t spw_list; |
| signalfd_state_t *spw_state; |
| pollhead_t spw_pollhd; |
| taskq_ent_t spw_taskent; |
| short spw_pollev; |
| } sigfd_poll_waiter_t; |
| |
| /* |
| * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate, |
| * and signalfd_state (including sfd_list field of members) |
| */ |
| static kmutex_t signalfd_lock; |
| static dev_info_t *signalfd_devi; /* device info */ |
| static id_space_t *signalfd_minor; /* minor number arena */ |
| static void *signalfd_softstate; /* softstate pointer */ |
| static list_t signalfd_state; /* global list of state */ |
| static taskq_t *signalfd_wakeq; /* pollwake event taskq */ |
| |
| |
| static void |
| signalfd_state_enter_locked(signalfd_state_t *state) |
| { |
| ASSERT(MUTEX_HELD(&state->sfd_lock)); |
| ASSERT(state->sfd_count > 0); |
| VERIFY(state->sfd_valid == B_TRUE); |
| |
| state->sfd_count++; |
| } |
| |
| static void |
| signalfd_state_release(signalfd_state_t *state, boolean_t force_invalidate) |
| { |
| mutex_enter(&state->sfd_lock); |
| |
| if (force_invalidate) { |
| state->sfd_valid = B_FALSE; |
| } |
| |
| ASSERT(state->sfd_count > 0); |
| if (state->sfd_count == 1) { |
| VERIFY(state->sfd_valid == B_FALSE); |
| mutex_exit(&state->sfd_lock); |
| if (force_invalidate) { |
| /* |
| * The invalidation performed in signalfd_close is done |
| * while signalfd_lock is held. |
| */ |
| ASSERT(MUTEX_HELD(&signalfd_lock)); |
| list_remove(&signalfd_state, state); |
| } else { |
| ASSERT(MUTEX_NOT_HELD(&signalfd_lock)); |
| mutex_enter(&signalfd_lock); |
| list_remove(&signalfd_state, state); |
| mutex_exit(&signalfd_lock); |
| } |
| kmem_free(state, sizeof (*state)); |
| return; |
| } |
| state->sfd_count--; |
| mutex_exit(&state->sfd_lock); |
| } |
| |
| static sigfd_poll_waiter_t * |
| signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state) |
| { |
| list_t *lst = &pstate->sigfd_list; |
| sigfd_poll_waiter_t *pw; |
| |
| for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) { |
| if (pw->spw_state == state) |
| break; |
| } |
| |
| if (pw == NULL) { |
| pw = kmem_zalloc(sizeof (*pw), KM_SLEEP); |
| |
| mutex_enter(&state->sfd_lock); |
| signalfd_state_enter_locked(state); |
| pw->spw_state = state; |
| mutex_exit(&state->sfd_lock); |
| list_insert_head(lst, pw); |
| } |
| return (pw); |
| } |
| |
| static sigfd_poll_waiter_t * |
| signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state) |
| { |
| list_t *lst = &pstate->sigfd_list; |
| sigfd_poll_waiter_t *pw; |
| |
| for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) { |
| if (pw->spw_state == state) { |
| break; |
| } |
| } |
| |
| if (pw != NULL) { |
| list_remove(lst, pw); |
| pw->spw_state = NULL; |
| signalfd_state_release(state, B_FALSE); |
| } |
| |
| return (pw); |
| } |
| |
| static void |
| signalfd_wake_list_cleanup(proc_t *p) |
| { |
| sigfd_proc_state_t *pstate = p->p_sigfd; |
| sigfd_poll_waiter_t *pw; |
| list_t *lst; |
| |
| ASSERT(MUTEX_HELD(&p->p_lock)); |
| ASSERT(pstate != NULL); |
| |
| lst = &pstate->sigfd_list; |
| while ((pw = list_remove_head(lst)) != NULL) { |
| signalfd_state_t *state = pw->spw_state; |
| |
| pw->spw_state = NULL; |
| signalfd_state_release(state, B_FALSE); |
| |
| pollwakeup(&pw->spw_pollhd, POLLERR); |
| pollhead_clean(&pw->spw_pollhd); |
| kmem_free(pw, sizeof (*pw)); |
| } |
| list_destroy(lst); |
| |
| p->p_sigfd = NULL; |
| kmem_free(pstate, sizeof (*pstate)); |
| } |
| |
| static void |
| signalfd_exit_helper(void) |
| { |
| proc_t *p = curproc; |
| |
| mutex_enter(&p->p_lock); |
| signalfd_wake_list_cleanup(p); |
| mutex_exit(&p->p_lock); |
| } |
| |
| /* |
| * Perform pollwake for a sigfd_poll_waiter_t entry. |
| * Thanks to the strict and conflicting lock orders required for signalfd_poll |
| * (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock), |
| * this is relegated to a taskq to avoid deadlock. |
| */ |
| static void |
| signalfd_wake_task(void *arg) |
| { |
| sigfd_poll_waiter_t *pw = arg; |
| signalfd_state_t *state = pw->spw_state; |
| |
| pw->spw_state = NULL; |
| signalfd_state_release(state, B_FALSE); |
| pollwakeup(&pw->spw_pollhd, pw->spw_pollev); |
| pollhead_clean(&pw->spw_pollhd); |
| kmem_free(pw, sizeof (*pw)); |
| } |
| |
| /* |
| * Called every time a signal is delivered to the process so that we can |
| * see if any signal stream needs a pollwakeup. We maintain a list of |
| * signal state elements so that we don't have to look at every file descriptor |
| * on the process. If necessary, a further optimization would be to maintain a |
| * signal set mask that is a union of all of the sets in the list so that |
| * we don't even traverse the list if the signal is not in one of the elements. |
| * However, since the list is likely to be very short, this is not currently |
| * being done. A more complex data structure might also be used, but it is |
| * unclear what that would be since each signal set needs to be checked for a |
| * match. |
| */ |
| static void |
| signalfd_pollwake_cb(void *arg0, int sig) |
| { |
| proc_t *p = (proc_t *)arg0; |
| sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd; |
| list_t *lst; |
| sigfd_poll_waiter_t *pw; |
| |
| ASSERT(MUTEX_HELD(&p->p_lock)); |
| ASSERT(pstate != NULL); |
| |
| lst = &pstate->sigfd_list; |
| pw = list_head(lst); |
| while (pw != NULL) { |
| signalfd_state_t *state = pw->spw_state; |
| sigfd_poll_waiter_t *next; |
| |
| mutex_enter(&state->sfd_lock); |
| if (!state->sfd_valid) { |
| pw->spw_pollev = POLLERR; |
| } else if (sigismember(&state->sfd_set, sig)) { |
| pw->spw_pollev = POLLRDNORM | POLLIN; |
| } else { |
| mutex_exit(&state->sfd_lock); |
| pw = list_next(lst, pw); |
| continue; |
| } |
| mutex_exit(&state->sfd_lock); |
| |
| /* |
| * Pull the sigfd_poll_waiter_t out of the list and dispatch it |
| * to perform a pollwake. This cannot be done synchronously |
| * since signalfd_poll and signalfd_pollwake_cb have |
| * conflicting lock orders which can deadlock. |
| */ |
| next = list_next(lst, pw); |
| list_remove(lst, pw); |
| taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, pw, 0, |
| &pw->spw_taskent); |
| pw = next; |
| } |
| } |
| |
| _NOTE(ARGSUSED(1)) |
| static int |
| signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) |
| { |
| signalfd_state_t *state, **sstate; |
| major_t major = getemajor(*devp); |
| minor_t minor = getminor(*devp); |
| |
| if (minor != SIGNALFDMNRN_SIGNALFD) |
| return (ENXIO); |
| |
| mutex_enter(&signalfd_lock); |
| |
| minor = (minor_t)id_allocff(signalfd_minor); |
| if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) { |
| id_free(signalfd_minor, minor); |
| mutex_exit(&signalfd_lock); |
| return (ENODEV); |
| } |
| |
| state = kmem_zalloc(sizeof (*state), KM_SLEEP); |
| state->sfd_valid = B_TRUE; |
| state->sfd_count = 1; |
| list_insert_head(&signalfd_state, (void *)state); |
| |
| sstate = ddi_get_soft_state(signalfd_softstate, minor); |
| *sstate = state; |
| *devp = makedevice(major, minor); |
| |
| mutex_exit(&signalfd_lock); |
| |
| return (0); |
| } |
| |
| /* |
| * Consume one signal from our set in a manner similar to sigtimedwait(). |
| * The block parameter is used to control whether we wait for a signal or |
| * return immediately if no signal is pending. We use the thread's t_sigwait |
| * member in the same way that it is used by sigtimedwait. |
| * |
| * Return 0 if we successfully consumed a signal or an errno if not. |
| */ |
| static int |
| consume_signal(k_sigset_t set, uio_t *uio, boolean_t block) |
| { |
| k_sigset_t oldmask; |
| kthread_t *t = curthread; |
| klwp_t *lwp = ttolwp(t); |
| proc_t *p = ttoproc(t); |
| timespec_t now; |
| timespec_t *rqtp = NULL; /* null means blocking */ |
| int timecheck = 0; |
| int ret = 0; |
| k_siginfo_t info, *infop; |
| signalfd_siginfo_t ssi, *ssp = &ssi; |
| |
| if (block == B_FALSE) { |
| timecheck = timechanged; |
| gethrestime(&now); |
| rqtp = &now; /* non-blocking check for pending signals */ |
| } |
| |
| t->t_sigwait = set; |
| |
| mutex_enter(&p->p_lock); |
| /* |
| * set the thread's signal mask to unmask those signals in the |
| * specified set. |
| */ |
| schedctl_finish_sigblock(t); |
| oldmask = t->t_hold; |
| sigdiffset(&t->t_hold, &t->t_sigwait); |
| |
| /* |
| * Based on rqtp, wait indefinitely until we take a signal in our set |
| * or return immediately if there are no signals pending from our set. |
| */ |
| while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp, |
| timecheck)) > 0) |
| continue; |
| |
| /* Restore thread's signal mask to its previous value. */ |
| t->t_hold = oldmask; |
| t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */ |
| |
| if (ret == -1) { |
| /* no signals pending */ |
| mutex_exit(&p->p_lock); |
| sigemptyset(&t->t_sigwait); |
| return (EAGAIN); /* no signals pending */ |
| } |
| |
| /* Don't bother with signal if it is not in request set. */ |
| if (lwp->lwp_cursig == 0 || |
| !sigismember(&t->t_sigwait, lwp->lwp_cursig)) { |
| mutex_exit(&p->p_lock); |
| /* |
| * lwp_cursig is zero if pokelwps() awakened cv_wait_sig(). |
| * This happens if some other thread in this process called |
| * forkall() or exit(). |
| */ |
| sigemptyset(&t->t_sigwait); |
| return (EINTR); |
| } |
| |
| if (lwp->lwp_curinfo) { |
| infop = &lwp->lwp_curinfo->sq_info; |
| } else { |
| infop = &info; |
| bzero(infop, sizeof (info)); |
| infop->si_signo = lwp->lwp_cursig; |
| infop->si_code = SI_NOINFO; |
| } |
| |
| lwp->lwp_ru.nsignals++; |
| |
| DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop); |
| lwp->lwp_cursig = 0; |
| lwp->lwp_extsig = 0; |
| mutex_exit(&p->p_lock); |
| |
| /* Convert k_siginfo into external, datamodel independent, struct. */ |
| bzero(ssp, sizeof (*ssp)); |
| ssp->ssi_signo = infop->si_signo; |
| ssp->ssi_errno = infop->si_errno; |
| ssp->ssi_code = infop->si_code; |
| ssp->ssi_pid = infop->si_pid; |
| ssp->ssi_uid = infop->si_uid; |
| ssp->ssi_fd = infop->si_fd; |
| ssp->ssi_band = infop->si_band; |
| ssp->ssi_trapno = infop->si_trapno; |
| ssp->ssi_status = infop->si_status; |
| ssp->ssi_utime = infop->si_utime; |
| ssp->ssi_stime = infop->si_stime; |
| ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr; |
| |
| ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio); |
| |
| if (lwp->lwp_curinfo) { |
| siginfofree(lwp->lwp_curinfo); |
| lwp->lwp_curinfo = NULL; |
| } |
| sigemptyset(&t->t_sigwait); |
| return (ret); |
| } |
| |
| /* |
| * This is similar to sigtimedwait. Based on the fd mode we may wait until a |
| * signal within our specified set is posted. We consume as many available |
| * signals within our set as we can. |
| */ |
| _NOTE(ARGSUSED(2)) |
| static int |
| signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) |
| { |
| signalfd_state_t *state, **sstate; |
| minor_t minor = getminor(dev); |
| boolean_t block = B_TRUE; |
| k_sigset_t set; |
| boolean_t got_one = B_FALSE; |
| int res; |
| |
| if (uio->uio_resid < sizeof (signalfd_siginfo_t)) |
| return (EINVAL); |
| |
| sstate = ddi_get_soft_state(signalfd_softstate, minor); |
| state = *sstate; |
| |
| if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) |
| block = B_FALSE; |
| |
| mutex_enter(&state->sfd_lock); |
| set = state->sfd_set; |
| mutex_exit(&state->sfd_lock); |
| |
| if (sigisempty(&set)) |
| return (set_errno(EINVAL)); |
| |
| do { |
| res = consume_signal(set, uio, block); |
| |
| if (res == 0) { |
| /* |
| * After consuming one signal, do not block while |
| * trying to consume more. |
| */ |
| got_one = B_TRUE; |
| block = B_FALSE; |
| |
| /* |
| * Refresh the matching signal set in case it was |
| * updated during the wait. |
| */ |
| mutex_enter(&state->sfd_lock); |
| set = state->sfd_set; |
| mutex_exit(&state->sfd_lock); |
| if (sigisempty(&set)) |
| break; |
| } |
| } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t)); |
| |
| if (got_one) |
| res = 0; |
| |
| return (res); |
| } |
| |
| /* |
| * If ksigset_t's were a single word, we would do: |
| * return (((p->p_sig | t->t_sig) & set) & fillset); |
| */ |
| static int |
| signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set) |
| { |
| return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) & |
| set.__sigbits[0]) | |
| ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) & |
| set.__sigbits[1]) | |
| (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) & |
| set.__sigbits[2]) & FILLSET2)); |
| } |
| |
| static int |
| signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, |
| struct pollhead **phpp) |
| { |
| signalfd_state_t *state, **sstate; |
| minor_t minor = getminor(dev); |
| kthread_t *t = curthread; |
| proc_t *p = ttoproc(t); |
| short revents = 0; |
| |
| sstate = ddi_get_soft_state(signalfd_softstate, minor); |
| state = *sstate; |
| |
| mutex_enter(&state->sfd_lock); |
| |
| if (signalfd_sig_pending(p, t, state->sfd_set) != 0) |
| revents |= POLLRDNORM | POLLIN; |
| |
| mutex_exit(&state->sfd_lock); |
| |
| *reventsp = revents & events; |
| if ((*reventsp == 0 && !anyyet) || (events & POLLET)) { |
| sigfd_proc_state_t *pstate; |
| sigfd_poll_waiter_t *pw; |
| |
| /* |
| * Enable pollwakeup handling. |
| */ |
| mutex_enter(&p->p_lock); |
| if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) { |
| |
| mutex_exit(&p->p_lock); |
| pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP); |
| list_create(&pstate->sigfd_list, |
| sizeof (sigfd_poll_waiter_t), |
| offsetof(sigfd_poll_waiter_t, spw_list)); |
| pstate->sigfd_pollwake_cb = signalfd_pollwake_cb; |
| |
| /* Check again, after blocking for the alloc. */ |
| mutex_enter(&p->p_lock); |
| if (p->p_sigfd == NULL) { |
| p->p_sigfd = pstate; |
| } else { |
| /* someone beat us to it */ |
| list_destroy(&pstate->sigfd_list); |
| kmem_free(pstate, sizeof (*pstate)); |
| pstate = p->p_sigfd; |
| } |
| } |
| |
| pw = signalfd_wake_list_add(pstate, state); |
| *phpp = &pw->spw_pollhd; |
| mutex_exit(&p->p_lock); |
| } |
| |
| return (0); |
| } |
| |
| _NOTE(ARGSUSED(4)) |
| static int |
| signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) |
| { |
| signalfd_state_t *state, **sstate; |
| minor_t minor = getminor(dev); |
| sigset_t mask; |
| |
| sstate = ddi_get_soft_state(signalfd_softstate, minor); |
| state = *sstate; |
| |
| switch (cmd) { |
| case SIGNALFDIOC_MASK: |
| if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t), |
| md) != 0) |
| return (set_errno(EFAULT)); |
| |
| mutex_enter(&state->sfd_lock); |
| sigutok(&mask, &state->sfd_set); |
| mutex_exit(&state->sfd_lock); |
| |
| return (0); |
| |
| default: |
| break; |
| } |
| |
| return (ENOTTY); |
| } |
| |
| _NOTE(ARGSUSED(1)) |
| static int |
| signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) |
| { |
| signalfd_state_t *state, **sstate; |
| sigfd_poll_waiter_t *pw = NULL; |
| minor_t minor = getminor(dev); |
| proc_t *p = curproc; |
| |
| sstate = ddi_get_soft_state(signalfd_softstate, minor); |
| state = *sstate; |
| |
| /* Make sure state is removed from this proc's pollwake list. */ |
| mutex_enter(&p->p_lock); |
| if (p->p_sigfd != NULL) { |
| sigfd_proc_state_t *pstate = p->p_sigfd; |
| |
| pw = signalfd_wake_list_rm(pstate, state); |
| if (list_is_empty(&pstate->sigfd_list)) { |
| signalfd_wake_list_cleanup(p); |
| } |
| } |
| mutex_exit(&p->p_lock); |
| |
| if (pw != NULL) { |
| pollwakeup(&pw->spw_pollhd, POLLERR); |
| pollhead_clean(&pw->spw_pollhd); |
| kmem_free(pw, sizeof (*pw)); |
| } |
| |
| mutex_enter(&signalfd_lock); |
| |
| *sstate = NULL; |
| ddi_soft_state_free(signalfd_softstate, minor); |
| id_free(signalfd_minor, minor); |
| |
| signalfd_state_release(state, B_TRUE); |
| |
| mutex_exit(&signalfd_lock); |
| |
| return (0); |
| } |
| |
| static int |
| signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) |
| { |
| if (cmd != DDI_ATTACH || signalfd_devi != NULL) |
| return (DDI_FAILURE); |
| |
| mutex_enter(&signalfd_lock); |
| |
| signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1); |
| if (signalfd_minor == NULL) { |
| cmn_err(CE_WARN, "signalfd couldn't create id space"); |
| mutex_exit(&signalfd_lock); |
| return (DDI_FAILURE); |
| } |
| |
| if (ddi_soft_state_init(&signalfd_softstate, |
| sizeof (signalfd_state_t *), 0) != 0) { |
| cmn_err(CE_WARN, "signalfd failed to create soft state"); |
| id_space_destroy(signalfd_minor); |
| mutex_exit(&signalfd_lock); |
| return (DDI_FAILURE); |
| } |
| |
| if (ddi_create_minor_node(devi, "signalfd", S_IFCHR, |
| SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) { |
| cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node"); |
| ddi_soft_state_fini(&signalfd_softstate); |
| id_space_destroy(signalfd_minor); |
| mutex_exit(&signalfd_lock); |
| return (DDI_FAILURE); |
| } |
| |
| ddi_report_dev(devi); |
| signalfd_devi = devi; |
| |
| sigfd_exit_helper = signalfd_exit_helper; |
| |
| list_create(&signalfd_state, sizeof (signalfd_state_t), |
| offsetof(signalfd_state_t, sfd_list)); |
| |
| signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri, |
| 0, INT_MAX, TASKQ_PREPOPULATE); |
| |
| mutex_exit(&signalfd_lock); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| _NOTE(ARGSUSED(0)) |
| static int |
| signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) |
| { |
| switch (cmd) { |
| case DDI_DETACH: |
| break; |
| |
| default: |
| return (DDI_FAILURE); |
| } |
| |
| mutex_enter(&signalfd_lock); |
| |
| if (!list_is_empty(&signalfd_state)) { |
| /* |
| * There are dangling poll waiters holding signalfd_state_t |
| * entries on the global list. Detach is not possible until |
| * they purge themselves. |
| */ |
| mutex_exit(&signalfd_lock); |
| return (DDI_FAILURE); |
| } |
| list_destroy(&signalfd_state); |
| |
| /* |
| * With no remaining entries in the signalfd_state list, the wake taskq |
| * should be empty with no possibility for new entries. |
| */ |
| taskq_destroy(signalfd_wakeq); |
| |
| id_space_destroy(signalfd_minor); |
| |
| ddi_remove_minor_node(signalfd_devi, NULL); |
| signalfd_devi = NULL; |
| sigfd_exit_helper = NULL; |
| |
| ddi_soft_state_fini(&signalfd_softstate); |
| mutex_exit(&signalfd_lock); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| _NOTE(ARGSUSED(0)) |
| static int |
| signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) |
| { |
| int error; |
| |
| switch (infocmd) { |
| case DDI_INFO_DEVT2DEVINFO: |
| *result = (void *)signalfd_devi; |
| error = DDI_SUCCESS; |
| break; |
| case DDI_INFO_DEVT2INSTANCE: |
| *result = (void *)0; |
| error = DDI_SUCCESS; |
| break; |
| default: |
| error = DDI_FAILURE; |
| } |
| return (error); |
| } |
| |
| static struct cb_ops signalfd_cb_ops = { |
| signalfd_open, /* open */ |
| signalfd_close, /* close */ |
| nulldev, /* strategy */ |
| nulldev, /* print */ |
| nodev, /* dump */ |
| signalfd_read, /* read */ |
| nodev, /* write */ |
| signalfd_ioctl, /* ioctl */ |
| nodev, /* devmap */ |
| nodev, /* mmap */ |
| nodev, /* segmap */ |
| signalfd_poll, /* poll */ |
| ddi_prop_op, /* cb_prop_op */ |
| 0, /* streamtab */ |
| D_NEW | D_MP /* Driver compatibility flag */ |
| }; |
| |
| static struct dev_ops signalfd_ops = { |
| DEVO_REV, /* devo_rev */ |
| 0, /* refcnt */ |
| signalfd_info, /* get_dev_info */ |
| nulldev, /* identify */ |
| nulldev, /* probe */ |
| signalfd_attach, /* attach */ |
| signalfd_detach, /* detach */ |
| nodev, /* reset */ |
| &signalfd_cb_ops, /* driver operations */ |
| NULL, /* bus operations */ |
| nodev, /* dev power */ |
| ddi_quiesce_not_needed, /* quiesce */ |
| }; |
| |
| static struct modldrv modldrv = { |
| &mod_driverops, /* module type (this is a pseudo driver) */ |
| "signalfd support", /* name of module */ |
| &signalfd_ops, /* driver ops */ |
| }; |
| |
| static struct modlinkage modlinkage = { |
| MODREV_1, |
| (void *)&modldrv, |
| NULL |
| }; |
| |
| int |
| _init(void) |
| { |
| return (mod_install(&modlinkage)); |
| } |
| |
| int |
| _info(struct modinfo *modinfop) |
| { |
| return (mod_info(&modlinkage, modinfop)); |
| } |
| |
| int |
| _fini(void) |
| { |
| return (mod_remove(&modlinkage)); |
| } |