blob: 6416d6d45a9fab1df0017f0c6f3998713e73acb1 [file] [log] [blame]
Jerry Jelinek3d729ae2015-10-15 16:26:52 -07001/*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12/*
Patrick Mooney80d56892017-09-22 23:43:19 +000013 * Copyright 2017 Joyent, Inc.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070014 */
15
16/*
17 * Support for the signalfd facility, a Linux-borne facility for
18 * file descriptor-based synchronous signal consumption.
19 *
20 * As described on the signalfd(3C) man page, the general idea behind these
21 * file descriptors is that they can be used to synchronously consume signals
Patrick Mooneyabf99a02016-09-24 09:17:04 -070022 * via the read(2) syscall. While that capability already exists with the
23 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file
24 * descriptor based: It is able use the event facilities (poll(2), /dev/poll,
25 * event ports) to notify interested parties when consumable signals arrive.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070026 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070027 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor
28 * will be allocated for them along with an associated signalfd_state_t struct.
29 * It is there where the mask of desired signals resides.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070030 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070031 * Reading from the signalfd is straightforward and mimics the kernel behavior
32 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
33 * thread's t_sig, member. During a read operation, those which match the mask
34 * are consumed so they are no longer pending.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070035 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070036 * The poll side is more complex. Every time a signal is delivered, all of the
37 * signalfds on the process need to be examined in order to pollwake threads
38 * waiting for signal arrival.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070039 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070040 * When a thread polling on a signalfd requires a pollhead, several steps must
41 * be taken to safely ensure the proper result. A sigfd_proc_state_t is
42 * created for the calling process if it does not yet exist. It is there where
43 * a list of sigfd_poll_waiter_t structures reside which associate pollheads to
44 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a
45 * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the
46 * polled resource. If one is found, it is reused. Otherwise a new one is
47 * created, incrementing the refcount on the signalfd_state_t, and it is added
48 * to the sigfd_poll_waiter_t list.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070049 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070050 * The complications imposed by fork(2) are why the pollhead is stored in the
51 * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t.
52 * More than one process can hold a reference to the signalfd at a time but
53 * arriving signals should wake only process-local pollers. Additionally,
54 * signalfd_close is called only when the last referencing fd is closed, hiding
55 * occurrences of preceeding threads which released their references. This
56 * necessitates reference counting on the signalfd_state_t so it is able to
57 * persist after close until all poll references have been cleansed. Doing so
58 * ensures that blocked pollers which hold references to the signalfd_state_t
59 * will be able to do clean-up after the descriptor itself has been closed.
60 *
61 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
62 * is called via the pointer in sigfd_proc_state_t. It will walk over the
63 * sigfd_poll_waiter_t entries present in the list, searching for any
64 * associated with a signalfd_state_t with a matching signal mask. The
65 * approach of keeping the poller list in p_sigfd was chosen because a process
66 * is likely to use few signalfds relative to its total file descriptors. It
67 * reduces the work required for each received signal.
68 *
69 * When matching sigfd_poll_waiter_t entries are encountered in the poller list
70 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to
71 * perform the pollwake. This is due to a lock ordering conflict between
72 * signalfd_poll and signalfd_pollwake_cb. The former acquires
73 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc)
74 * reverses the order. Defering the pollwake into a taskq means it can be
75 * performed without proc_t`p_lock held, avoiding the deadlock.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070076 *
77 * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
Patrick Mooneyabf99a02016-09-24 09:17:04 -070078 * will clear out on its own. Any remaining per-process state which remains
79 * will be cleaned up by the exit helper (signalfd_exit_helper).
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070080 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070081 * The structures associated with signalfd state are designed to operate
82 * correctly across fork, but there is one caveat that applies. Using
83 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll
84 * descriptors (such as /dev/poll or event ports) will result in missed poll
85 * wake-ups. This is caused by the pollhead identity of signalfd descriptors
86 * being dependent on the process they are polled from. Because it has a
87 * thread-local cache, poll(2) is unaffected by this limitation.
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070088 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070089 * Lock ordering:
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070090 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070091 * 1. signalfd_lock
92 * 2. signalfd_state_t`sfd_lock
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070093 *
Patrick Mooneyabf99a02016-09-24 09:17:04 -070094 * 1. proc_t`p_lock (to walk p_sigfd)
95 * 2. signalfd_state_t`sfd_lock
96 * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0)
Jerry Jelinek3d729ae2015-10-15 16:26:52 -070097 */
98
99#include <sys/ddi.h>
100#include <sys/sunddi.h>
101#include <sys/signalfd.h>
102#include <sys/conf.h>
103#include <sys/sysmacros.h>
104#include <sys/filio.h>
105#include <sys/stat.h>
106#include <sys/file.h>
107#include <sys/schedctl.h>
108#include <sys/id_space.h>
109#include <sys/sdt.h>
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700110#include <sys/disp.h>
111#include <sys/taskq_impl.h>
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700112
113typedef struct signalfd_state signalfd_state_t;
114
115struct signalfd_state {
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700116 list_node_t sfd_list; /* node in global list */
117 kmutex_t sfd_lock; /* protects fields below */
118 uint_t sfd_count; /* ref count */
119 boolean_t sfd_valid; /* valid while open */
120 k_sigset_t sfd_set; /* signals for this fd */
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700121};
122
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700123typedef struct sigfd_poll_waiter {
124 list_node_t spw_list;
125 signalfd_state_t *spw_state;
126 pollhead_t spw_pollhd;
127 taskq_ent_t spw_taskent;
128 short spw_pollev;
129} sigfd_poll_waiter_t;
130
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700131/*
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700132 * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate,
133 * and signalfd_state (including sfd_list field of members)
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700134 */
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700135static kmutex_t signalfd_lock;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700136static dev_info_t *signalfd_devi; /* device info */
137static id_space_t *signalfd_minor; /* minor number arena */
138static void *signalfd_softstate; /* softstate pointer */
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700139static list_t signalfd_state; /* global list of state */
140static taskq_t *signalfd_wakeq; /* pollwake event taskq */
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700141
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700142
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700143static void
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700144signalfd_state_enter_locked(signalfd_state_t *state)
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700145{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700146 ASSERT(MUTEX_HELD(&state->sfd_lock));
147 ASSERT(state->sfd_count > 0);
148 VERIFY(state->sfd_valid == B_TRUE);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700149
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700150 state->sfd_count++;
151}
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700152
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700153static void
154signalfd_state_release(signalfd_state_t *state, boolean_t force_invalidate)
155{
156 mutex_enter(&state->sfd_lock);
157
158 if (force_invalidate) {
159 state->sfd_valid = B_FALSE;
160 }
161
162 ASSERT(state->sfd_count > 0);
163 if (state->sfd_count == 1) {
164 VERIFY(state->sfd_valid == B_FALSE);
165 mutex_exit(&state->sfd_lock);
166 if (force_invalidate) {
167 /*
168 * The invalidation performed in signalfd_close is done
169 * while signalfd_lock is held.
170 */
171 ASSERT(MUTEX_HELD(&signalfd_lock));
172 list_remove(&signalfd_state, state);
173 } else {
174 ASSERT(MUTEX_NOT_HELD(&signalfd_lock));
175 mutex_enter(&signalfd_lock);
176 list_remove(&signalfd_state, state);
177 mutex_exit(&signalfd_lock);
178 }
179 kmem_free(state, sizeof (*state));
180 return;
181 }
182 state->sfd_count--;
183 mutex_exit(&state->sfd_lock);
184}
185
186static sigfd_poll_waiter_t *
187signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state)
188{
189 list_t *lst = &pstate->sigfd_list;
190 sigfd_poll_waiter_t *pw;
191
192 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
193 if (pw->spw_state == state)
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700194 break;
195 }
196
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700197 if (pw == NULL) {
198 pw = kmem_zalloc(sizeof (*pw), KM_SLEEP);
199
200 mutex_enter(&state->sfd_lock);
201 signalfd_state_enter_locked(state);
202 pw->spw_state = state;
203 mutex_exit(&state->sfd_lock);
204 list_insert_head(lst, pw);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700205 }
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700206 return (pw);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700207}
208
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700209static sigfd_poll_waiter_t *
210signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state)
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700211{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700212 list_t *lst = &pstate->sigfd_list;
213 sigfd_poll_waiter_t *pw;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700214
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700215 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
216 if (pw->spw_state == state) {
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700217 break;
218 }
219 }
220
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700221 if (pw != NULL) {
222 list_remove(lst, pw);
223 pw->spw_state = NULL;
224 signalfd_state_release(state, B_FALSE);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700225 }
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700226
227 return (pw);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700228}
229
230static void
231signalfd_wake_list_cleanup(proc_t *p)
232{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700233 sigfd_proc_state_t *pstate = p->p_sigfd;
234 sigfd_poll_waiter_t *pw;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700235 list_t *lst;
236
237 ASSERT(MUTEX_HELD(&p->p_lock));
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700238 ASSERT(pstate != NULL);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700239
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700240 lst = &pstate->sigfd_list;
241 while ((pw = list_remove_head(lst)) != NULL) {
242 signalfd_state_t *state = pw->spw_state;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700243
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700244 pw->spw_state = NULL;
245 signalfd_state_release(state, B_FALSE);
246
247 pollwakeup(&pw->spw_pollhd, POLLERR);
248 pollhead_clean(&pw->spw_pollhd);
249 kmem_free(pw, sizeof (*pw));
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700250 }
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700251 list_destroy(lst);
252
253 p->p_sigfd = NULL;
254 kmem_free(pstate, sizeof (*pstate));
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700255}
256
257static void
258signalfd_exit_helper(void)
259{
260 proc_t *p = curproc;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700261
262 mutex_enter(&p->p_lock);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700263 signalfd_wake_list_cleanup(p);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700264 mutex_exit(&p->p_lock);
265}
266
267/*
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700268 * Perform pollwake for a sigfd_poll_waiter_t entry.
269 * Thanks to the strict and conflicting lock orders required for signalfd_poll
270 * (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock),
271 * this is relegated to a taskq to avoid deadlock.
272 */
273static void
274signalfd_wake_task(void *arg)
275{
276 sigfd_poll_waiter_t *pw = arg;
277 signalfd_state_t *state = pw->spw_state;
278
279 pw->spw_state = NULL;
280 signalfd_state_release(state, B_FALSE);
281 pollwakeup(&pw->spw_pollhd, pw->spw_pollev);
282 pollhead_clean(&pw->spw_pollhd);
283 kmem_free(pw, sizeof (*pw));
284}
285
286/*
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700287 * Called every time a signal is delivered to the process so that we can
288 * see if any signal stream needs a pollwakeup. We maintain a list of
289 * signal state elements so that we don't have to look at every file descriptor
290 * on the process. If necessary, a further optimization would be to maintain a
291 * signal set mask that is a union of all of the sets in the list so that
292 * we don't even traverse the list if the signal is not in one of the elements.
293 * However, since the list is likely to be very short, this is not currently
294 * being done. A more complex data structure might also be used, but it is
295 * unclear what that would be since each signal set needs to be checked for a
296 * match.
297 */
298static void
299signalfd_pollwake_cb(void *arg0, int sig)
300{
301 proc_t *p = (proc_t *)arg0;
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700302 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700303 list_t *lst;
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700304 sigfd_poll_waiter_t *pw;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700305
306 ASSERT(MUTEX_HELD(&p->p_lock));
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700307 ASSERT(pstate != NULL);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700308
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700309 lst = &pstate->sigfd_list;
310 pw = list_head(lst);
311 while (pw != NULL) {
312 signalfd_state_t *state = pw->spw_state;
313 sigfd_poll_waiter_t *next;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700314
315 mutex_enter(&state->sfd_lock);
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700316 if (!state->sfd_valid) {
317 pw->spw_pollev = POLLERR;
318 } else if (sigismember(&state->sfd_set, sig)) {
319 pw->spw_pollev = POLLRDNORM | POLLIN;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700320 } else {
321 mutex_exit(&state->sfd_lock);
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700322 pw = list_next(lst, pw);
323 continue;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700324 }
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700325 mutex_exit(&state->sfd_lock);
326
327 /*
328 * Pull the sigfd_poll_waiter_t out of the list and dispatch it
329 * to perform a pollwake. This cannot be done synchronously
330 * since signalfd_poll and signalfd_pollwake_cb have
331 * conflicting lock orders which can deadlock.
332 */
333 next = list_next(lst, pw);
334 list_remove(lst, pw);
335 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, pw, 0,
336 &pw->spw_taskent);
337 pw = next;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700338 }
339}
340
341_NOTE(ARGSUSED(1))
342static int
343signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
344{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700345 signalfd_state_t *state, **sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700346 major_t major = getemajor(*devp);
347 minor_t minor = getminor(*devp);
348
349 if (minor != SIGNALFDMNRN_SIGNALFD)
350 return (ENXIO);
351
352 mutex_enter(&signalfd_lock);
353
354 minor = (minor_t)id_allocff(signalfd_minor);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700355 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
356 id_free(signalfd_minor, minor);
357 mutex_exit(&signalfd_lock);
358 return (ENODEV);
359 }
360
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700361 state = kmem_zalloc(sizeof (*state), KM_SLEEP);
362 state->sfd_valid = B_TRUE;
363 state->sfd_count = 1;
364 list_insert_head(&signalfd_state, (void *)state);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700365
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700366 sstate = ddi_get_soft_state(signalfd_softstate, minor);
367 *sstate = state;
368 *devp = makedevice(major, minor);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700369
370 mutex_exit(&signalfd_lock);
371
372 return (0);
373}
374
375/*
376 * Consume one signal from our set in a manner similar to sigtimedwait().
377 * The block parameter is used to control whether we wait for a signal or
378 * return immediately if no signal is pending. We use the thread's t_sigwait
379 * member in the same way that it is used by sigtimedwait.
380 *
381 * Return 0 if we successfully consumed a signal or an errno if not.
382 */
383static int
384consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
385{
386 k_sigset_t oldmask;
387 kthread_t *t = curthread;
388 klwp_t *lwp = ttolwp(t);
389 proc_t *p = ttoproc(t);
390 timespec_t now;
391 timespec_t *rqtp = NULL; /* null means blocking */
392 int timecheck = 0;
393 int ret = 0;
394 k_siginfo_t info, *infop;
395 signalfd_siginfo_t ssi, *ssp = &ssi;
396
397 if (block == B_FALSE) {
398 timecheck = timechanged;
399 gethrestime(&now);
400 rqtp = &now; /* non-blocking check for pending signals */
401 }
402
403 t->t_sigwait = set;
404
405 mutex_enter(&p->p_lock);
406 /*
407 * set the thread's signal mask to unmask those signals in the
408 * specified set.
409 */
410 schedctl_finish_sigblock(t);
411 oldmask = t->t_hold;
412 sigdiffset(&t->t_hold, &t->t_sigwait);
413
414 /*
415 * Based on rqtp, wait indefinitely until we take a signal in our set
416 * or return immediately if there are no signals pending from our set.
417 */
418 while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
419 timecheck)) > 0)
420 continue;
421
422 /* Restore thread's signal mask to its previous value. */
423 t->t_hold = oldmask;
424 t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */
425
426 if (ret == -1) {
427 /* no signals pending */
428 mutex_exit(&p->p_lock);
429 sigemptyset(&t->t_sigwait);
430 return (EAGAIN); /* no signals pending */
431 }
432
433 /* Don't bother with signal if it is not in request set. */
434 if (lwp->lwp_cursig == 0 ||
435 !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
436 mutex_exit(&p->p_lock);
437 /*
438 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
439 * This happens if some other thread in this process called
440 * forkall() or exit().
441 */
442 sigemptyset(&t->t_sigwait);
443 return (EINTR);
444 }
445
446 if (lwp->lwp_curinfo) {
447 infop = &lwp->lwp_curinfo->sq_info;
448 } else {
449 infop = &info;
450 bzero(infop, sizeof (info));
451 infop->si_signo = lwp->lwp_cursig;
452 infop->si_code = SI_NOINFO;
453 }
454
455 lwp->lwp_ru.nsignals++;
456
457 DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
458 lwp->lwp_cursig = 0;
459 lwp->lwp_extsig = 0;
460 mutex_exit(&p->p_lock);
461
462 /* Convert k_siginfo into external, datamodel independent, struct. */
463 bzero(ssp, sizeof (*ssp));
464 ssp->ssi_signo = infop->si_signo;
465 ssp->ssi_errno = infop->si_errno;
466 ssp->ssi_code = infop->si_code;
467 ssp->ssi_pid = infop->si_pid;
468 ssp->ssi_uid = infop->si_uid;
469 ssp->ssi_fd = infop->si_fd;
470 ssp->ssi_band = infop->si_band;
471 ssp->ssi_trapno = infop->si_trapno;
472 ssp->ssi_status = infop->si_status;
473 ssp->ssi_utime = infop->si_utime;
474 ssp->ssi_stime = infop->si_stime;
475 ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
476
477 ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
478
479 if (lwp->lwp_curinfo) {
480 siginfofree(lwp->lwp_curinfo);
481 lwp->lwp_curinfo = NULL;
482 }
483 sigemptyset(&t->t_sigwait);
484 return (ret);
485}
486
487/*
488 * This is similar to sigtimedwait. Based on the fd mode we may wait until a
489 * signal within our specified set is posted. We consume as many available
490 * signals within our set as we can.
491 */
492_NOTE(ARGSUSED(2))
493static int
494signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
495{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700496 signalfd_state_t *state, **sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700497 minor_t minor = getminor(dev);
498 boolean_t block = B_TRUE;
499 k_sigset_t set;
500 boolean_t got_one = B_FALSE;
501 int res;
502
503 if (uio->uio_resid < sizeof (signalfd_siginfo_t))
504 return (EINVAL);
505
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700506 sstate = ddi_get_soft_state(signalfd_softstate, minor);
507 state = *sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700508
509 if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
510 block = B_FALSE;
511
512 mutex_enter(&state->sfd_lock);
513 set = state->sfd_set;
514 mutex_exit(&state->sfd_lock);
515
516 if (sigisempty(&set))
517 return (set_errno(EINVAL));
518
519 do {
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700520 res = consume_signal(set, uio, block);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700521
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700522 if (res == 0) {
523 /*
524 * After consuming one signal, do not block while
525 * trying to consume more.
526 */
527 got_one = B_TRUE;
528 block = B_FALSE;
529
530 /*
531 * Refresh the matching signal set in case it was
532 * updated during the wait.
533 */
534 mutex_enter(&state->sfd_lock);
535 set = state->sfd_set;
536 mutex_exit(&state->sfd_lock);
537 if (sigisempty(&set))
538 break;
539 }
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700540 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
541
542 if (got_one)
543 res = 0;
544
545 return (res);
546}
547
548/*
549 * If ksigset_t's were a single word, we would do:
550 * return (((p->p_sig | t->t_sig) & set) & fillset);
551 */
552static int
553signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
554{
555 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
556 set.__sigbits[0]) |
557 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
558 set.__sigbits[1]) |
559 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
560 set.__sigbits[2]) & FILLSET2));
561}
562
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700563static int
564signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
565 struct pollhead **phpp)
566{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700567 signalfd_state_t *state, **sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700568 minor_t minor = getminor(dev);
569 kthread_t *t = curthread;
570 proc_t *p = ttoproc(t);
571 short revents = 0;
572
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700573 sstate = ddi_get_soft_state(signalfd_softstate, minor);
574 state = *sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700575
576 mutex_enter(&state->sfd_lock);
577
578 if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
579 revents |= POLLRDNORM | POLLIN;
580
581 mutex_exit(&state->sfd_lock);
582
Patrick Mooney80d56892017-09-22 23:43:19 +0000583 *reventsp = revents & events;
584 if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700585 sigfd_proc_state_t *pstate;
586 sigfd_poll_waiter_t *pw;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700587
588 /*
589 * Enable pollwakeup handling.
590 */
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700591 mutex_enter(&p->p_lock);
592 if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) {
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700593
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700594 mutex_exit(&p->p_lock);
595 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700596 list_create(&pstate->sigfd_list,
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700597 sizeof (sigfd_poll_waiter_t),
598 offsetof(sigfd_poll_waiter_t, spw_list));
599 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700600
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700601 /* Check again, after blocking for the alloc. */
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700602 mutex_enter(&p->p_lock);
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700603 if (p->p_sigfd == NULL) {
604 p->p_sigfd = pstate;
605 } else {
606 /* someone beat us to it */
607 list_destroy(&pstate->sigfd_list);
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700608 kmem_free(pstate, sizeof (*pstate));
609 pstate = p->p_sigfd;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700610 }
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700611 }
612
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700613 pw = signalfd_wake_list_add(pstate, state);
614 *phpp = &pw->spw_pollhd;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700615 mutex_exit(&p->p_lock);
616 }
617
618 return (0);
619}
620
621_NOTE(ARGSUSED(4))
622static int
623signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
624{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700625 signalfd_state_t *state, **sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700626 minor_t minor = getminor(dev);
627 sigset_t mask;
628
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700629 sstate = ddi_get_soft_state(signalfd_softstate, minor);
630 state = *sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700631
632 switch (cmd) {
633 case SIGNALFDIOC_MASK:
634 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
635 md) != 0)
636 return (set_errno(EFAULT));
637
638 mutex_enter(&state->sfd_lock);
639 sigutok(&mask, &state->sfd_set);
640 mutex_exit(&state->sfd_lock);
641
642 return (0);
643
644 default:
645 break;
646 }
647
648 return (ENOTTY);
649}
650
651_NOTE(ARGSUSED(1))
652static int
653signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
654{
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700655 signalfd_state_t *state, **sstate;
656 sigfd_poll_waiter_t *pw = NULL;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700657 minor_t minor = getminor(dev);
658 proc_t *p = curproc;
659
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700660 sstate = ddi_get_soft_state(signalfd_softstate, minor);
661 state = *sstate;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700662
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700663 /* Make sure state is removed from this proc's pollwake list. */
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700664 mutex_enter(&p->p_lock);
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700665 if (p->p_sigfd != NULL) {
666 sigfd_proc_state_t *pstate = p->p_sigfd;
667
668 pw = signalfd_wake_list_rm(pstate, state);
669 if (list_is_empty(&pstate->sigfd_list)) {
670 signalfd_wake_list_cleanup(p);
671 }
672 }
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700673 mutex_exit(&p->p_lock);
674
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700675 if (pw != NULL) {
676 pollwakeup(&pw->spw_pollhd, POLLERR);
677 pollhead_clean(&pw->spw_pollhd);
678 kmem_free(pw, sizeof (*pw));
679 }
680
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700681 mutex_enter(&signalfd_lock);
682
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700683 *sstate = NULL;
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700684 ddi_soft_state_free(signalfd_softstate, minor);
685 id_free(signalfd_minor, minor);
686
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700687 signalfd_state_release(state, B_TRUE);
688
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700689 mutex_exit(&signalfd_lock);
690
691 return (0);
692}
693
694static int
695signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
696{
697 if (cmd != DDI_ATTACH || signalfd_devi != NULL)
698 return (DDI_FAILURE);
699
700 mutex_enter(&signalfd_lock);
701
702 signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
703 if (signalfd_minor == NULL) {
704 cmn_err(CE_WARN, "signalfd couldn't create id space");
705 mutex_exit(&signalfd_lock);
706 return (DDI_FAILURE);
707 }
708
709 if (ddi_soft_state_init(&signalfd_softstate,
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700710 sizeof (signalfd_state_t *), 0) != 0) {
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700711 cmn_err(CE_WARN, "signalfd failed to create soft state");
712 id_space_destroy(signalfd_minor);
713 mutex_exit(&signalfd_lock);
714 return (DDI_FAILURE);
715 }
716
717 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
718 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
719 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
720 ddi_soft_state_fini(&signalfd_softstate);
721 id_space_destroy(signalfd_minor);
722 mutex_exit(&signalfd_lock);
723 return (DDI_FAILURE);
724 }
725
726 ddi_report_dev(devi);
727 signalfd_devi = devi;
728
729 sigfd_exit_helper = signalfd_exit_helper;
730
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700731 list_create(&signalfd_state, sizeof (signalfd_state_t),
732 offsetof(signalfd_state_t, sfd_list));
733
734 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri,
735 0, INT_MAX, TASKQ_PREPOPULATE);
736
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700737 mutex_exit(&signalfd_lock);
738
739 return (DDI_SUCCESS);
740}
741
742_NOTE(ARGSUSED(0))
743static int
744signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
745{
746 switch (cmd) {
747 case DDI_DETACH:
748 break;
749
750 default:
751 return (DDI_FAILURE);
752 }
753
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700754 mutex_enter(&signalfd_lock);
Patrick Mooneyabf99a02016-09-24 09:17:04 -0700755
756 if (!list_is_empty(&signalfd_state)) {
757 /*
758 * There are dangling poll waiters holding signalfd_state_t
759 * entries on the global list. Detach is not possible until
760 * they purge themselves.
761 */
762 mutex_exit(&signalfd_lock);
763 return (DDI_FAILURE);
764 }
765 list_destroy(&signalfd_state);
766
767 /*
768 * With no remaining entries in the signalfd_state list, the wake taskq
769 * should be empty with no possibility for new entries.
770 */
771 taskq_destroy(signalfd_wakeq);
772
Jerry Jelinek3d729ae2015-10-15 16:26:52 -0700773 id_space_destroy(signalfd_minor);
774
775 ddi_remove_minor_node(signalfd_devi, NULL);
776 signalfd_devi = NULL;
777 sigfd_exit_helper = NULL;
778
779 ddi_soft_state_fini(&signalfd_softstate);
780 mutex_exit(&signalfd_lock);
781
782 return (DDI_SUCCESS);
783}
784
785_NOTE(ARGSUSED(0))
786static int
787signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
788{
789 int error;
790
791 switch (infocmd) {
792 case DDI_INFO_DEVT2DEVINFO:
793 *result = (void *)signalfd_devi;
794 error = DDI_SUCCESS;
795 break;
796 case DDI_INFO_DEVT2INSTANCE:
797 *result = (void *)0;
798 error = DDI_SUCCESS;
799 break;
800 default:
801 error = DDI_FAILURE;
802 }
803 return (error);
804}
805
806static struct cb_ops signalfd_cb_ops = {
807 signalfd_open, /* open */
808 signalfd_close, /* close */
809 nulldev, /* strategy */
810 nulldev, /* print */
811 nodev, /* dump */
812 signalfd_read, /* read */
813 nodev, /* write */
814 signalfd_ioctl, /* ioctl */
815 nodev, /* devmap */
816 nodev, /* mmap */
817 nodev, /* segmap */
818 signalfd_poll, /* poll */
819 ddi_prop_op, /* cb_prop_op */
820 0, /* streamtab */
821 D_NEW | D_MP /* Driver compatibility flag */
822};
823
824static struct dev_ops signalfd_ops = {
825 DEVO_REV, /* devo_rev */
826 0, /* refcnt */
827 signalfd_info, /* get_dev_info */
828 nulldev, /* identify */
829 nulldev, /* probe */
830 signalfd_attach, /* attach */
831 signalfd_detach, /* detach */
832 nodev, /* reset */
833 &signalfd_cb_ops, /* driver operations */
834 NULL, /* bus operations */
835 nodev, /* dev power */
836 ddi_quiesce_not_needed, /* quiesce */
837};
838
839static struct modldrv modldrv = {
840 &mod_driverops, /* module type (this is a pseudo driver) */
841 "signalfd support", /* name of module */
842 &signalfd_ops, /* driver ops */
843};
844
845static struct modlinkage modlinkage = {
846 MODREV_1,
847 (void *)&modldrv,
848 NULL
849};
850
851int
852_init(void)
853{
854 return (mod_install(&modlinkage));
855}
856
857int
858_info(struct modinfo *modinfop)
859{
860 return (mod_info(&modlinkage, modinfop));
861}
862
863int
864_fini(void)
865{
866 return (mod_remove(&modlinkage));
867}