blob: 29eea9b24a080d28262ec4f83fb48ca69f766857 [file] [log] [blame]
Bryan Cantrill6a72db42015-09-04 08:32:01 -07001/*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12/*
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 */
15
16/*
17 * Support for the timerfd facility, a Linux-borne facility that allows
18 * POSIX.1b timers to be created and manipulated via a file descriptor
19 * interface.
20 */
21
22#include <sys/ddi.h>
23#include <sys/sunddi.h>
24#include <sys/timerfd.h>
25#include <sys/conf.h>
26#include <sys/vmem.h>
27#include <sys/sysmacros.h>
28#include <sys/filio.h>
29#include <sys/stat.h>
30#include <sys/file.h>
31#include <sys/timer.h>
32
33struct timerfd_state;
34typedef struct timerfd_state timerfd_state_t;
35
36struct timerfd_state {
37 kmutex_t tfd_lock; /* lock protecting state */
38 kcondvar_t tfd_cv; /* condvar */
39 pollhead_t tfd_pollhd; /* poll head */
40 uint64_t tfd_fired; /* # of times fired */
41 itimer_t tfd_itimer; /* underlying itimer */
42 timerfd_state_t *tfd_next; /* next state on global list */
43};
44
45/*
46 * Internal global variables.
47 */
48static kmutex_t timerfd_lock; /* lock protecting state */
49static dev_info_t *timerfd_devi; /* device info */
50static vmem_t *timerfd_minor; /* minor number arena */
51static void *timerfd_softstate; /* softstate pointer */
52static timerfd_state_t *timerfd_state; /* global list of state */
53
54static itimer_t *
55timerfd_itimer_lock(timerfd_state_t *state)
56{
57 itimer_t *it = &state->tfd_itimer;
58
59 mutex_enter(&state->tfd_lock);
60
61 while (it->it_lock & ITLK_LOCKED) {
62 it->it_blockers++;
63 cv_wait(&it->it_cv, &state->tfd_lock);
64 it->it_blockers--;
65 }
66
67 it->it_lock |= ITLK_LOCKED;
68
69 mutex_exit(&state->tfd_lock);
70
71 return (it);
72}
73
74static void
75timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it)
76{
77 VERIFY(it == &state->tfd_itimer);
78 VERIFY(it->it_lock & ITLK_LOCKED);
79
80 mutex_enter(&state->tfd_lock);
81
82 it->it_lock &= ~ITLK_LOCKED;
83
84 if (it->it_blockers)
85 cv_signal(&it->it_cv);
86
87 mutex_exit(&state->tfd_lock);
88}
89
90static void
91timerfd_fire(itimer_t *it)
92{
93 timerfd_state_t *state = it->it_frontend;
94 uint64_t oval;
95
96 mutex_enter(&state->tfd_lock);
97 oval = state->tfd_fired++;
98 mutex_exit(&state->tfd_lock);
99
100 if (oval == 0) {
101 cv_broadcast(&state->tfd_cv);
102 pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN);
103 }
104}
105
106/*ARGSUSED*/
107static int
108timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
109{
110 timerfd_state_t *state;
111 major_t major = getemajor(*devp);
112 minor_t minor = getminor(*devp);
113
114 if (minor != TIMERFDMNRN_TIMERFD)
115 return (ENXIO);
116
117 mutex_enter(&timerfd_lock);
118
119 minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1,
120 VM_BESTFIT | VM_SLEEP);
121
122 if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) {
123 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
124 mutex_exit(&timerfd_lock);
125 return (NULL);
126 }
127
128 state = ddi_get_soft_state(timerfd_softstate, minor);
129 *devp = makedevice(major, minor);
130
131 state->tfd_next = timerfd_state;
132 timerfd_state = state;
133
134 mutex_exit(&timerfd_lock);
135
136 return (0);
137}
138
139/*ARGSUSED*/
140static int
141timerfd_read(dev_t dev, uio_t *uio, cred_t *cr)
142{
143 timerfd_state_t *state;
144 minor_t minor = getminor(dev);
145 uint64_t val;
146 int err;
147
148 if (uio->uio_resid < sizeof (val))
149 return (EINVAL);
150
151 state = ddi_get_soft_state(timerfd_softstate, minor);
152
153 mutex_enter(&state->tfd_lock);
154
155 while (state->tfd_fired == 0) {
156 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
157 mutex_exit(&state->tfd_lock);
158 return (EAGAIN);
159 }
160
161 if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) {
162 mutex_exit(&state->tfd_lock);
163 return (EINTR);
164 }
165 }
166
167 /*
168 * Our tfd_fired is non-zero; slurp its value and then clear it.
169 */
170 val = state->tfd_fired;
171 state->tfd_fired = 0;
172 mutex_exit(&state->tfd_lock);
173
174 err = uiomove(&val, sizeof (val), UIO_READ, uio);
175
176 return (err);
177}
178
179/*ARGSUSED*/
180static int
181timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
182 struct pollhead **phpp)
183{
184 timerfd_state_t *state;
185 minor_t minor = getminor(dev);
186 short revents = 0;
187
188 state = ddi_get_soft_state(timerfd_softstate, minor);
189
190 mutex_enter(&state->tfd_lock);
191
192 if (state->tfd_fired > 0)
193 revents |= POLLRDNORM | POLLIN;
194
195 if (!(*reventsp = revents & events) && !anyyet)
196 *phpp = &state->tfd_pollhd;
197
198 mutex_exit(&state->tfd_lock);
199
200 return (0);
201}
202
203static int
204timerfd_copyin(uintptr_t addr, itimerspec_t *dest)
205{
206 if (get_udatamodel() == DATAMODEL_NATIVE) {
207 if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0)
208 return (EFAULT);
209 } else {
210 itimerspec32_t dest32;
211
212 if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0)
213 return (EFAULT);
214
215 ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32);
216 }
217
218 if (itimerspecfix(&dest->it_value) ||
219 (itimerspecfix(&dest->it_interval) &&
220 timerspecisset(&dest->it_value))) {
221 return (EINVAL);
222 }
223
224 return (0);
225}
226
227static int
228timerfd_copyout(itimerspec_t *src, uintptr_t addr)
229{
230 if (get_udatamodel() == DATAMODEL_NATIVE) {
231 if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0)
232 return (EFAULT);
233 } else {
234 itimerspec32_t src32;
235
236 if (ITIMERSPEC_OVERFLOW(src))
237 return (EOVERFLOW);
238
239 ITIMERSPEC_TO_ITIMERSPEC32(&src32, src);
240
241 if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0)
242 return (EFAULT);
243 }
244
245 return (0);
246}
247
248/*ARGSUSED*/
249static int
250timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
251{
252 itimerspec_t when, oval;
253 timerfd_state_t *state;
254 minor_t minor = getminor(dev);
255 int err;
256 itimer_t *it;
257
258 state = ddi_get_soft_state(timerfd_softstate, minor);
259
260 switch (cmd) {
261 case TIMERFDIOC_CREATE: {
262 if (arg == TIMERFD_MONOTONIC)
263 arg = CLOCK_MONOTONIC;
264
265 it = timerfd_itimer_lock(state);
266
267 if (it->it_backend != NULL) {
268 timerfd_itimer_unlock(state, it);
269 return (EEXIST);
270 }
271
272 if ((it->it_backend = clock_get_backend(arg)) == NULL) {
273 timerfd_itimer_unlock(state, it);
274 return (EINVAL);
275 }
276
277 /*
278 * We need to provide a proc structure only for purposes
279 * of locking CLOCK_REALTIME-based timers -- it is safe to
280 * provide p0 here.
281 */
282 it->it_proc = &p0;
283
284 err = it->it_backend->clk_timer_create(it, timerfd_fire);
285
286 if (err != 0) {
287 it->it_backend = NULL;
288 timerfd_itimer_unlock(state, it);
289 return (err);
290 }
291
292 it->it_frontend = state;
293 timerfd_itimer_unlock(state, it);
294
295 return (0);
296 }
297
298 case TIMERFDIOC_GETTIME: {
299 it = timerfd_itimer_lock(state);
300
301 if (it->it_backend == NULL) {
302 timerfd_itimer_unlock(state, it);
303 return (ENODEV);
304 }
305
306 err = it->it_backend->clk_timer_gettime(it, &when);
307 timerfd_itimer_unlock(state, it);
308
309 if (err != 0)
310 return (err);
311
312 if ((err = timerfd_copyout(&when, arg)) != 0)
313 return (err);
314
315 return (0);
316 }
317
318 case TIMERFDIOC_SETTIME: {
319 timerfd_settime_t st;
320
321 if (copyin((void *)arg, &st, sizeof (st)) != 0)
322 return (EFAULT);
323
324 if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0)
325 return (err);
326
327 it = timerfd_itimer_lock(state);
328
329 if (it->it_backend == NULL) {
330 timerfd_itimer_unlock(state, it);
331 return (ENODEV);
332 }
333
334 if (st.tfd_settime_ovalue != NULL) {
335 err = it->it_backend->clk_timer_gettime(it, &oval);
336
337 if (err != 0) {
338 timerfd_itimer_unlock(state, it);
339 return (err);
340 }
341 }
342
343 /*
344 * Before we set the time, we're going to clear tfd_fired.
345 * This can potentially race with the (old) timer firing, but
346 * the window is deceptively difficult to close: if we were
347 * to simply clear tfd_fired after the call to the backend
348 * returned, we would run the risk of plowing a firing of the
349 * new timer. Ultimately, the race can only be resolved by
350 * the backend, which would likely need to be extended with a
351 * function to call back into when the timer is between states
352 * (that is, after the timer can no longer fire with the old
353 * timer value, but before it can fire with the new one).
354 * This is straightforward enough for backends that set a
355 * timer's value by deleting the old one and adding the new
356 * one, but for those that modify the timer value in place
357 * (e.g., cyclics), the required serialization is necessarily
358 * delicate: the function would have to be callable from
359 * arbitrary interrupt context. While implementing all of
360 * this is possible, it does not (for the moment) seem worth
361 * it: if the timer is firing at essentially the same moment
362 * that it's being reprogrammed, there is a higher-level race
363 * with respect to timerfd usage that the progam itself will
364 * have to properly resolve -- and it seems reasonable to
365 * simply allow the program to resolve it in this case.
366 */
367 mutex_enter(&state->tfd_lock);
368 state->tfd_fired = 0;
369 mutex_exit(&state->tfd_lock);
370
371 err = it->it_backend->clk_timer_settime(it,
372 st.tfd_settime_flags & TFD_TIMER_ABSTIME ?
373 TIMER_ABSTIME : TIMER_RELTIME, &when);
374 timerfd_itimer_unlock(state, it);
375
376 if (err != 0 || st.tfd_settime_ovalue == NULL)
377 return (err);
378
379 if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0)
380 return (err);
381
382 return (0);
383 }
384
385 default:
386 break;
387 }
388
389 return (ENOTTY);
390}
391
392/*ARGSUSED*/
393static int
394timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
395{
396 timerfd_state_t *state, **sp;
397 itimer_t *it;
398 minor_t minor = getminor(dev);
399
400 state = ddi_get_soft_state(timerfd_softstate, minor);
401
402 if (state->tfd_pollhd.ph_list != NULL) {
403 pollwakeup(&state->tfd_pollhd, POLLERR);
404 pollhead_clean(&state->tfd_pollhd);
405 }
406
407 /*
408 * No one can get to this timer; we don't need to lock it -- we can
409 * just call on the backend to delete it.
410 */
411 it = &state->tfd_itimer;
412
413 if (it->it_backend != NULL)
414 it->it_backend->clk_timer_delete(it);
415
416 mutex_enter(&timerfd_lock);
417
418 /*
419 * Remove our state from our global list.
420 */
421 for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next))
422 VERIFY(*sp != NULL);
423
424 *sp = (*sp)->tfd_next;
425
426 ddi_soft_state_free(timerfd_softstate, minor);
427 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
428
429 mutex_exit(&timerfd_lock);
430
431 return (0);
432}
433
434static int
435timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
436{
437 switch (cmd) {
438 case DDI_ATTACH:
439 break;
440
441 case DDI_RESUME:
442 return (DDI_SUCCESS);
443
444 default:
445 return (DDI_FAILURE);
446 }
447
448 mutex_enter(&timerfd_lock);
449
450 if (ddi_soft_state_init(&timerfd_softstate,
451 sizeof (timerfd_state_t), 0) != 0) {
452 cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state");
453 mutex_exit(&timerfd_lock);
454 return (DDI_FAILURE);
455 }
456
457 if (ddi_create_minor_node(devi, "timerfd", S_IFCHR,
458 TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
459 cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node");
460 ddi_soft_state_fini(&timerfd_softstate);
461 mutex_exit(&timerfd_lock);
462 return (DDI_FAILURE);
463 }
464
465 ddi_report_dev(devi);
466 timerfd_devi = devi;
467
468 timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE,
469 UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
470 VM_SLEEP | VMC_IDENTIFIER);
471
472 mutex_exit(&timerfd_lock);
473
474 return (DDI_SUCCESS);
475}
476
477/*ARGSUSED*/
478static int
479timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
480{
481 switch (cmd) {
482 case DDI_DETACH:
483 break;
484
485 case DDI_SUSPEND:
486 return (DDI_SUCCESS);
487
488 default:
489 return (DDI_FAILURE);
490 }
491
492 mutex_enter(&timerfd_lock);
493 vmem_destroy(timerfd_minor);
494
495 ddi_remove_minor_node(timerfd_devi, NULL);
496 timerfd_devi = NULL;
497
498 ddi_soft_state_fini(&timerfd_softstate);
499 mutex_exit(&timerfd_lock);
500
501 return (DDI_SUCCESS);
502}
503
504/*ARGSUSED*/
505static int
506timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
507{
508 int error;
509
510 switch (infocmd) {
511 case DDI_INFO_DEVT2DEVINFO:
512 *result = (void *)timerfd_devi;
513 error = DDI_SUCCESS;
514 break;
515 case DDI_INFO_DEVT2INSTANCE:
516 *result = (void *)0;
517 error = DDI_SUCCESS;
518 break;
519 default:
520 error = DDI_FAILURE;
521 }
522 return (error);
523}
524
525static struct cb_ops timerfd_cb_ops = {
526 timerfd_open, /* open */
527 timerfd_close, /* close */
528 nulldev, /* strategy */
529 nulldev, /* print */
530 nodev, /* dump */
531 timerfd_read, /* read */
532 nodev, /* write */
533 timerfd_ioctl, /* ioctl */
534 nodev, /* devmap */
535 nodev, /* mmap */
536 nodev, /* segmap */
537 timerfd_poll, /* poll */
538 ddi_prop_op, /* cb_prop_op */
539 0, /* streamtab */
540 D_NEW | D_MP /* Driver compatibility flag */
541};
542
543static struct dev_ops timerfd_ops = {
544 DEVO_REV, /* devo_rev */
545 0, /* refcnt */
546 timerfd_info, /* get_dev_info */
547 nulldev, /* identify */
548 nulldev, /* probe */
549 timerfd_attach, /* attach */
550 timerfd_detach, /* detach */
551 nodev, /* reset */
552 &timerfd_cb_ops, /* driver operations */
553 NULL, /* bus operations */
554 nodev, /* dev power */
555 ddi_quiesce_not_needed, /* quiesce */
556};
557
558static struct modldrv modldrv = {
559 &mod_driverops, /* module type (this is a pseudo driver) */
560 "timerfd support", /* name of module */
561 &timerfd_ops, /* driver ops */
562};
563
564static struct modlinkage modlinkage = {
565 MODREV_1,
566 (void *)&modldrv,
567 NULL
568};
569
570int
571_init(void)
572{
573 return (mod_install(&modlinkage));
574}
575
576int
577_info(struct modinfo *modinfop)
578{
579 return (mod_info(&modlinkage, modinfop));
580}
581
582int
583_fini(void)
584{
585 return (mod_remove(&modlinkage));
586}