stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
| 21 | /* |
Rafael Vanoni | d3d5073 | 2009-11-13 01:32:32 -0800 | [diff] [blame] | 22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 23 | * Use is subject to license terms. |
| 24 | */ |
| 25 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 26 | |
| 27 | /* |
| 28 | * This is the lock device driver. |
| 29 | * |
| 30 | * The lock driver provides a variation of inter-process mutexes with the |
| 31 | * following twist in semantics: |
| 32 | * A waiter for a lock after a set timeout can "break" the lock and |
| 33 | * grab it from the current owner (without informing the owner). |
| 34 | * |
| 35 | * These semantics result in temporarily multiple processes thinking they |
| 36 | * own the lock. This usually does not make sense for cases where locks are |
| 37 | * used to protect a critical region and it is important to serialize access |
| 38 | * to data structures. As breaking the lock will also lose the serialization |
| 39 | * and result in corrupt data structures. |
| 40 | * |
| 41 | * The usage for winlock driver is primarily driven by the graphics system |
| 42 | * when doing DGA (direct graphics access) graphics. The locks are used to |
| 43 | * protect access to the frame buffer (presumably reflects back to the screen) |
| 44 | * between competing processes that directly write to the screen as opposed |
| 45 | * to going through the window server etc. |
| 46 | * In this case, the result of breaking the lock at worst causes the screen |
| 47 | * image to be distorted and is easily fixed by doing a "refresh" |
| 48 | * |
| 49 | * In well-behaved applications, the lock is held for a very short time and |
| 50 | * the breaking semantics do not come into play. Not having this feature and |
| 51 | * using normal inter-process mutexes will result in a misbehaved application |
| 52 | * from grabbing the screen writing capability from the window manager and |
| 53 | * effectively make the system look like it is hung (mouse pointer does not |
| 54 | * move). |
| 55 | * |
| 56 | * A secondary aspect of the winlock driver is that it allows for extremely |
| 57 | * fast lock acquire/release in cases where there is low contention. A memory |
| 58 | * write is all that is needed (not even a function call). And the window |
| 59 | * manager is the only DGA writer usually and this optimized for. Occasionally |
| 60 | * some processes might do DGA graphics and cause kernel faults to handle |
| 61 | * the contention/locking (and that has got to be slow!). |
| 62 | * |
| 63 | * The following IOCTLs are supported: |
| 64 | * |
| 65 | * GRABPAGEALLOC: |
| 66 | * Compatibility with old cgsix device driver lockpage ioctls. |
| 67 | * Lockpages created this way must be an entire page for compatibility with |
| 68 | * older software. This ioctl allocates a lock context with its own |
| 69 | * private lock page. The unique "ident" that identifies this lock is |
| 70 | * returned. |
| 71 | * |
| 72 | * GRABPAGEFREE: |
| 73 | * Compatibility with cgsix device driver lockpage ioctls. This |
| 74 | * ioctl releases the lock context allocated by GRABPAGEALLOC. |
| 75 | * |
| 76 | * GRABLOCKINFO: |
| 77 | * Returns a one-word flag. '1' means that multiple clients may |
| 78 | * access this lock page. Older device drivers returned '0', |
| 79 | * meaning that only two clients could access a lock page. |
| 80 | * |
| 81 | * GRABATTACH: |
| 82 | * Not supported. This ioctl would have grabbed all lock pages |
| 83 | * on behalf of the calling program. |
| 84 | * |
| 85 | * WINLOCKALLOC: |
| 86 | * Allocate a lock context. This ioctl accepts a key value. as |
| 87 | * its argument. If the key is zero, a new lock context is |
| 88 | * created, and its "ident" is returned. If the key is nonzero, |
| 89 | * all existing contexts are checked to see if they match they |
| 90 | * key. If a match is found, its reference count is incremented |
| 91 | * and its ident is returned, otherwise a new context is created |
| 92 | * and its ident is returned. |
| 93 | * |
| 94 | * WINLOCKFREE: |
| 95 | * Free a lock context. This ioctl accepts the ident of a lock |
| 96 | * context and decrements its reference count. Once the reference |
| 97 | * count reaches zero *and* all mappings are released, the lock |
| 98 | * context is freed. When all the lock context in the lock page are |
| 99 | * freed, the lock page is freed as well. |
| 100 | * |
| 101 | * WINLOCKSETTIMEOUT: |
| 102 | * Set lock timeout for a context. This ioctl accepts the ident |
| 103 | * of a lock context and a timeout value in milliseconds. |
| 104 | * Whenever lock contention occurs, the timer is started and the lock is |
| 105 | * broken after the timeout expires. If timeout value is zero, lock does |
| 106 | * not timeout. This value will be rounded to the nearest clock |
| 107 | * tick, so don't try to use it for real-time control or something. |
| 108 | * |
| 109 | * WINLOCKGETTIMEOUT: |
| 110 | * Get lock timeout from a context. |
| 111 | * |
| 112 | * WINLOCKDUMP: |
| 113 | * Dump state of this device. |
| 114 | * |
| 115 | * |
| 116 | * How /dev/winlock works: |
| 117 | * |
| 118 | * Every lock context consists of two mappings for the client to the lock |
| 119 | * page. These mappings are known as the "lock page" and "unlock page" |
| 120 | * to the client. The first mmap to the lock context (identified by the |
| 121 | * sy_ident field returns during alloc) allocates mapping to the lock page, |
| 122 | * the second mmap allocates a mapping to the unlock page. |
| 123 | * The mappings dont have to be ordered in virtual address space, but do |
| 124 | * need to be ordered in time. Mapping and unmapping of these lock and unlock |
| 125 | * pages should happen in pairs. Doing them one at a time or unmapping one |
| 126 | * and leaving one mapped etc cause undefined behaviors. |
| 127 | * The mappings are always of length PAGESIZE, and type MAP_SHARED. |
| 128 | * |
| 129 | * The first ioctl is to ALLOC a lock, either based on a key (if trying to |
| 130 | * grab a preexisting lock) or 0 (gets a default new one) |
| 131 | * This ioctl returns a value in sy_ident which is needed to do the |
| 132 | * later mmaps and FREE/other ioctls. |
| 133 | * |
| 134 | * The "page number" portion of the sy_ident needs to be passed as the |
| 135 | * file offset when doing an mmap for both the lock page and unlock page |
| 136 | * |
| 137 | * The value returned by mmap ( a user virtual address) needs to be |
| 138 | * incremented by the "page offset" portion of sy_ident to obtain the |
| 139 | * pointer to the actual lock. (Skipping this step, does not cause any |
| 140 | * visible error, but the process will be using the wrong lock!) |
| 141 | * |
| 142 | * On a fork(), the child process will inherit the mappings for free, but |
| 143 | * will not inherit the parent's lock ownership if any. The child should NOT |
| 144 | * do an explicit FREE on the lock context unless it did an explicit ALLOC. |
| 145 | * Only one process at a time is allowed to have a valid hat |
| 146 | * mapping to a lock page. This is enforced by this driver. |
| 147 | * A client acquires a lock by writing a '1' to the lock page. |
| 148 | * Note, that it is not necessary to read and veryify that the lock is '0' |
| 149 | * prior to writing a '1' in it. |
| 150 | * If it does not already have a valid mapping to that page, the driver |
| 151 | * takes a fault (devmap_access), loads the client mapping |
| 152 | * and allows the client to continue. The client releases the lock by |
| 153 | * writing a '0' to the unlock page. Again, if it does not have a valid |
| 154 | * mapping to the unlock page, the segment driver takes a fault, |
| 155 | * loads the mapping, and lets the client continue. From this point |
| 156 | * forward, the client can make as many locks and unlocks as it |
| 157 | * wants, without any more faults into the kernel. |
| 158 | * |
| 159 | * If a different process wants to acquire a lock, it takes a page fault |
| 160 | * when it writes the '1' to the lock page. If the segment driver sees |
| 161 | * that the lock page contained a zero, then it invalidates the owner's |
| 162 | * mappings and gives the mappings to this process. |
| 163 | * |
| 164 | * If there is already a '1' in the lock page when the second client |
| 165 | * tries to access the lock page, then a lock exists. The segment |
| 166 | * driver sleeps the second client and, if applicable, starts the |
| 167 | * timeout on the lock. The owner's mapping to the unlock page |
| 168 | * is invalidated so that the driver will be woken again when the owner |
| 169 | * releases the lock. |
| 170 | * |
| 171 | * When the locking client finally writes a '0' to the unlock page, the |
| 172 | * segment driver takes another fault. The client is given a valid |
| 173 | * mapping, not to the unlock page, but to the "trash page", and allowed |
| 174 | * to continue. Meanwhile, the sleeping client is given a valid mapping |
| 175 | * to the lock/unlock pages and allowed to continue as well. |
| 176 | * |
| 177 | * RFE: There is a leak if process exits before freeing allocated locks |
| 178 | * But currently not tracking which locks were allocated by which |
| 179 | * process and we do not have a clean entry point into the driver |
| 180 | * to do garbage collection. If the interface used a file descriptor for each |
| 181 | * lock it allocs, then the driver can free up stuff in the _close routine |
| 182 | */ |
| 183 | |
| 184 | #include <sys/types.h> /* various type defn's */ |
| 185 | #include <sys/debug.h> |
| 186 | #include <sys/param.h> /* various kernel limits */ |
| 187 | #include <sys/time.h> |
| 188 | #include <sys/errno.h> |
| 189 | #include <sys/kmem.h> /* defines kmem_alloc() */ |
| 190 | #include <sys/conf.h> /* defines cdevsw */ |
| 191 | #include <sys/file.h> /* various file modes, etc. */ |
| 192 | #include <sys/uio.h> /* UIO stuff */ |
| 193 | #include <sys/ioctl.h> |
| 194 | #include <sys/cred.h> /* defines cred struct */ |
| 195 | #include <sys/mman.h> /* defines mmap(2) parameters */ |
| 196 | #include <sys/stat.h> /* defines S_IFCHR */ |
| 197 | #include <sys/cmn_err.h> /* use cmn_err */ |
| 198 | #include <sys/ddi.h> /* ddi stuff */ |
| 199 | #include <sys/sunddi.h> /* ddi stuff */ |
| 200 | #include <sys/ddi_impldefs.h> /* ddi stuff */ |
| 201 | #include <sys/winlockio.h> /* defines ioctls, flags, data structs */ |
| 202 | |
| 203 | static int winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); |
| 204 | static int winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t, |
| 205 | size_t *, uint_t); |
| 206 | static int winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t, |
| 207 | uint_t, uint_t, uint_t, cred_t *); |
| 208 | |
| 209 | static struct cb_ops winlock_cb_ops = { |
| 210 | nulldev, /* open */ |
| 211 | nulldev, /* close */ |
| 212 | nodev, /* strategy */ |
| 213 | nodev, /* print */ |
| 214 | nodev, /* dump */ |
| 215 | nodev, /* read */ |
| 216 | nodev, /* write */ |
| 217 | winlock_ioctl, /* ioctl */ |
| 218 | winlock_devmap, /* devmap */ |
| 219 | nodev, /* mmap */ |
| 220 | winlocksegmap, /* segmap */ |
| 221 | nochpoll, /* poll */ |
| 222 | ddi_prop_op, /* prop_op */ |
| 223 | NULL, /* streamtab */ |
| 224 | D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ |
| 225 | 0, /* rev */ |
| 226 | nodev, /* aread */ |
| 227 | nodev /* awrite */ |
| 228 | }; |
| 229 | |
| 230 | static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **); |
| 231 | static int winlock_attach(dev_info_t *, ddi_attach_cmd_t); |
| 232 | static int winlock_detach(dev_info_t *, ddi_detach_cmd_t); |
| 233 | |
| 234 | static struct dev_ops winlock_ops = { |
| 235 | DEVO_REV, |
| 236 | 0, /* refcount */ |
| 237 | winlock_info, /* info */ |
| 238 | nulldev, /* identify */ |
| 239 | nulldev, /* probe */ |
| 240 | winlock_attach, /* attach */ |
| 241 | winlock_detach, /* detach */ |
| 242 | nodev, /* reset */ |
| 243 | &winlock_cb_ops, /* driver ops */ |
| 244 | NULL, /* bus ops */ |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 245 | NULL, /* power */ |
| 246 | ddi_quiesce_not_needed, /* quiesce */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 247 | }; |
| 248 | |
| 249 | static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t, |
| 250 | void **); |
| 251 | static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t, |
| 252 | devmap_cookie_t, void **, devmap_cookie_t, void **); |
| 253 | static int winlockmap_dup(devmap_cookie_t, void *, |
| 254 | devmap_cookie_t, void **); |
| 255 | static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t, |
| 256 | uint_t, uint_t); |
| 257 | |
| 258 | static |
| 259 | struct devmap_callback_ctl winlockmap_ops = { |
| 260 | DEVMAP_OPS_REV, |
| 261 | winlockmap_map, |
| 262 | winlockmap_access, |
| 263 | winlockmap_dup, |
| 264 | winlockmap_unmap, |
| 265 | }; |
| 266 | |
| 267 | #if DEBUG |
| 268 | static int lock_debug = 0; |
| 269 | #define DEBUGF(level, args) { if (lock_debug >= (level)) cmn_err args; } |
| 270 | #else |
| 271 | #define DEBUGF(level, args) |
| 272 | #endif |
| 273 | |
| 274 | /* Driver supports two styles of locks */ |
| 275 | enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK }; |
| 276 | |
| 277 | /* |
| 278 | * These structures describe a lock context. We permit multiple |
| 279 | * clients (not just two) to access a lock page |
| 280 | * |
| 281 | * The "cookie" identifies the lock context. It is the page number portion |
| 282 | * sy_ident returned on lock allocation. Cookie is used in later ioctls. |
| 283 | * "cookie" is lockid * PAGESIZE |
| 284 | * "lockptr" is the kernel virtual address to the lock itself |
| 285 | * The page offset portion of lockptr is the page offset portion of sy_ident |
| 286 | */ |
| 287 | |
| 288 | /* |
| 289 | * per-process information about locks. This is the private field of |
| 290 | * a devmap mapping. Note that usually *two* mappings point to this. |
| 291 | */ |
| 292 | |
| 293 | /* |
| 294 | * Each process using winlock is associated with a segproc structure |
| 295 | * In various driver entry points, we need to search to find the right |
| 296 | * segproc structure (If we were using file handles for each lock this |
| 297 | * would not have been necessary). |
| 298 | * It would have been simple to use the process pid (and ddi_get_pid) |
| 299 | * However, during fork devmap_dup is called in the parent process context |
| 300 | * and using the pid complicates the code by introducing orphans. |
| 301 | * Instead we use the as pointer for the process as a cookie |
| 302 | * which requires delving into various non-DDI kosher structs |
| 303 | */ |
| 304 | typedef struct segproc { |
| 305 | struct segproc *next; /* next client of this lock */ |
| 306 | struct seglock *lp; /* associated lock context */ |
| 307 | devmap_cookie_t lockseg; /* lock mapping, if any */ |
| 308 | devmap_cookie_t unlockseg; /* unlock mapping, if any */ |
| 309 | void *tag; /* process as pointer as tag */ |
| 310 | uint_t flag; /* see "flag bits" in winlockio.h */ |
| 311 | } SegProc; |
| 312 | |
| 313 | #define ID(sdp) ((sdp)->tag) |
| 314 | #define CURPROC_ID (void *)(curproc->p_as) |
| 315 | |
| 316 | /* per lock context information */ |
| 317 | |
| 318 | typedef struct seglock { |
| 319 | struct seglock *next; /* next lock */ |
| 320 | uint_t sleepers; /* nthreads sleeping on this lock */ |
| 321 | uint_t alloccount; /* how many times created? */ |
| 322 | uint_t cookie; /* mmap() offset (page #) into device */ |
| 323 | uint_t key; /* key, if any */ |
| 324 | enum winlock_style style; /* style of lock - OLDSTYLE, NEWSTYLE */ |
| 325 | clock_t timeout; /* sleep time in ticks */ |
| 326 | ddi_umem_cookie_t umem_cookie; /* cookie for umem allocated memory */ |
| 327 | int *lockptr; /* kernel virtual addr of lock */ |
| 328 | struct segproc *clients; /* list of clients of this lock */ |
| 329 | struct segproc *owner; /* current owner of lock */ |
| 330 | kmutex_t mutex; /* mutex for lock */ |
| 331 | kcondvar_t locksleep; /* for sleeping on lock */ |
| 332 | } SegLock; |
| 333 | |
| 334 | #define LOCK(lp) (*((lp)->lockptr)) |
| 335 | |
| 336 | /* |
| 337 | * Number of locks that can fit in a page. Driver can support only that many. |
| 338 | * For oldsytle locks, it is relatively easy to increase the limit as each |
| 339 | * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation |
| 340 | * For newstyle locks, this is trickier as the code needs to allow for mapping |
| 341 | * into the second or third page of the cookie for some locks. |
| 342 | */ |
| 343 | #define MAX_LOCKS (PAGESIZE/sizeof (int)) |
| 344 | |
| 345 | #define LOCKTIME 3 /* Default lock timeout in seconds */ |
| 346 | |
| 347 | |
| 348 | /* Protections setting for winlock user mappings */ |
| 349 | #define WINLOCK_PROT (PROT_READ|PROT_WRITE|PROT_USER) |
| 350 | |
| 351 | /* |
| 352 | * The trash page is where unwanted writes go |
| 353 | * when a process is releasing a lock. |
| 354 | */ |
| 355 | static ddi_umem_cookie_t trashpage_cookie = NULL; |
| 356 | |
| 357 | /* For newstyle allocations a common page of locks is used */ |
| 358 | static caddr_t lockpage = NULL; |
| 359 | static ddi_umem_cookie_t lockpage_cookie = NULL; |
| 360 | |
| 361 | static dev_info_t *winlock_dip = NULL; |
| 362 | static kmutex_t winlock_mutex; |
| 363 | |
| 364 | /* |
| 365 | * winlock_mutex protects |
| 366 | * lock_list |
| 367 | * lock_free_list |
| 368 | * "next" field in SegLock |
| 369 | * next_lock |
| 370 | * trashpage_cookie |
| 371 | * lockpage & lockpage_cookie |
| 372 | * |
| 373 | * SegLock_mutex protects |
| 374 | * rest of fields in SegLock |
| 375 | * All fields in list of SegProc (lp->clients) |
| 376 | * |
| 377 | * Lock ordering is winlock_mutex->SegLock_mutex |
| 378 | * During devmap/seg operations SegLock_mutex acquired without winlock_mutex |
| 379 | * |
| 380 | * During devmap callbacks, the pointer to SegProc is stored as the private |
| 381 | * data in the devmap handle. This pointer will not go stale (i.e., the |
| 382 | * SegProc getting deleted) as the SegProc is not deleted until both the |
| 383 | * lockseg and unlockseg have been unmapped and the pointers stored in |
| 384 | * the devmap handles have been NULL'ed. |
| 385 | * But before this pointer is used to access any fields (other than the 'lp') |
| 386 | * lp->mutex must be held. |
| 387 | */ |
| 388 | |
| 389 | /* |
| 390 | * The allocation code tries to allocate from lock_free_list |
| 391 | * first, otherwise it uses kmem_zalloc. When lock list is idle, all |
| 392 | * locks in lock_free_list are kmem_freed |
| 393 | */ |
| 394 | static SegLock *lock_list = NULL; /* in-use locks */ |
| 395 | static SegLock *lock_free_list = NULL; /* free locks */ |
| 396 | static int next_lock = 0; /* next lock cookie */ |
| 397 | |
| 398 | /* Routines to find a lock in lock_list based on offset or key */ |
| 399 | static SegLock *seglock_findlock(uint_t); |
| 400 | static SegLock *seglock_findkey(uint_t); |
| 401 | |
| 402 | /* Routines to find and allocate SegProc structures */ |
| 403 | static SegProc *seglock_find_specific(SegLock *, void *); |
| 404 | static SegProc *seglock_alloc_specific(SegLock *, void *); |
| 405 | #define seglock_findclient(lp) seglock_find_specific((lp), CURPROC_ID) |
| 406 | #define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID) |
| 407 | |
| 408 | /* Delete client from lock's client list */ |
| 409 | static void seglock_deleteclient(SegLock *, SegProc *); |
| 410 | static void garbage_collect_lock(SegLock *, SegProc *); |
| 411 | |
| 412 | /* Create a new lock */ |
| 413 | static SegLock *seglock_createlock(enum winlock_style); |
| 414 | /* Destroy lock */ |
| 415 | static void seglock_destroylock(SegLock *); |
| 416 | static void lock_destroyall(void); |
| 417 | |
| 418 | /* Helper functions in winlockmap_access */ |
| 419 | static int give_mapping(SegLock *, SegProc *, uint_t); |
| 420 | static int lock_giveup(SegLock *, int); |
| 421 | static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t); |
| 422 | |
| 423 | /* routines called from ioctl */ |
| 424 | static int seglock_graballoc(intptr_t, enum winlock_style, int); |
| 425 | static int seglock_grabinfo(intptr_t, int); |
| 426 | static int seglock_grabfree(intptr_t, int); |
| 427 | static int seglock_gettimeout(intptr_t, int); |
| 428 | static int seglock_settimeout(intptr_t, int); |
| 429 | static void seglock_dump_all(void); |
| 430 | |
| 431 | static int |
| 432 | winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) |
| 433 | { |
| 434 | DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 435 | (void *)devi, (int)cmd)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 436 | if (cmd != DDI_ATTACH) |
| 437 | return (DDI_FAILURE); |
| 438 | if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0) |
| 439 | == DDI_FAILURE) { |
| 440 | return (DDI_FAILURE); |
| 441 | } |
| 442 | winlock_dip = devi; |
| 443 | ddi_report_dev(devi); |
| 444 | return (DDI_SUCCESS); |
| 445 | } |
| 446 | |
| 447 | /*ARGSUSED*/ |
| 448 | static int |
| 449 | winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) |
| 450 | { |
| 451 | DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 452 | (void *)devi, (int)cmd)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 453 | if (cmd != DDI_DETACH) |
| 454 | return (DDI_FAILURE); |
| 455 | |
| 456 | mutex_enter(&winlock_mutex); |
| 457 | if (lock_list != NULL) { |
| 458 | mutex_exit(&winlock_mutex); |
| 459 | return (DDI_FAILURE); |
| 460 | } |
| 461 | ASSERT(lock_free_list == NULL); |
| 462 | |
| 463 | DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n")); |
| 464 | /* destroy any common stuff created */ |
| 465 | if (trashpage_cookie != NULL) { |
| 466 | ddi_umem_free(trashpage_cookie); |
| 467 | trashpage_cookie = NULL; |
| 468 | } |
| 469 | if (lockpage != NULL) { |
| 470 | ddi_umem_free(lockpage_cookie); |
| 471 | lockpage = NULL; |
| 472 | lockpage_cookie = NULL; |
| 473 | } |
| 474 | winlock_dip = NULL; |
| 475 | mutex_exit(&winlock_mutex); |
| 476 | return (DDI_SUCCESS); |
| 477 | } |
| 478 | |
| 479 | /*ARGSUSED*/ |
| 480 | static int |
| 481 | winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) |
| 482 | { |
| 483 | register int error; |
| 484 | |
| 485 | /* initialize result */ |
| 486 | *result = NULL; |
| 487 | |
| 488 | /* only valid instance (i.e., getminor) is 0 */ |
| 489 | if (getminor((dev_t)arg) >= 1) |
| 490 | return (DDI_FAILURE); |
| 491 | |
| 492 | switch (infocmd) { |
| 493 | case DDI_INFO_DEVT2DEVINFO: |
| 494 | if (winlock_dip == NULL) |
| 495 | error = DDI_FAILURE; |
| 496 | else { |
| 497 | *result = (void *)winlock_dip; |
| 498 | error = DDI_SUCCESS; |
| 499 | } |
| 500 | break; |
| 501 | case DDI_INFO_DEVT2INSTANCE: |
| 502 | *result = (void *)0; |
| 503 | error = DDI_SUCCESS; |
| 504 | break; |
| 505 | default: |
| 506 | error = DDI_FAILURE; |
| 507 | } |
| 508 | return (error); |
| 509 | } |
| 510 | |
| 511 | |
| 512 | /*ARGSUSED*/ |
| 513 | int |
| 514 | winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, |
| 515 | cred_t *cred, int *rval) |
| 516 | { |
| 517 | DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 518 | cmd, (void *)arg)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 519 | |
| 520 | switch (cmd) { |
| 521 | /* |
| 522 | * ioctls that used to be handled by framebuffers (defined in fbio.h) |
| 523 | * RFE: No code really calls the GRAB* ioctls now. Should EOL. |
| 524 | */ |
| 525 | |
| 526 | case GRABPAGEALLOC: |
| 527 | return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode)); |
| 528 | case GRABPAGEFREE: |
| 529 | return (seglock_grabfree(arg, mode)); |
| 530 | case GRABLOCKINFO: |
| 531 | return (seglock_grabinfo(arg, mode)); |
| 532 | case GRABATTACH: |
| 533 | return (EINVAL); /* GRABATTACH is not supported (never was) */ |
| 534 | |
| 535 | case WINLOCKALLOC: |
| 536 | return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode)); |
| 537 | case WINLOCKFREE: |
| 538 | return (seglock_grabfree(arg, mode)); |
| 539 | case WINLOCKSETTIMEOUT: |
| 540 | return (seglock_settimeout(arg, mode)); |
| 541 | case WINLOCKGETTIMEOUT: |
| 542 | return (seglock_gettimeout(arg, mode)); |
| 543 | case WINLOCKDUMP: |
| 544 | seglock_dump_all(); |
| 545 | return (0); |
| 546 | |
| 547 | #ifdef DEBUG |
| 548 | case (WIOC|255): |
| 549 | lock_debug = arg; |
| 550 | return (0); |
| 551 | #endif |
| 552 | |
| 553 | default: |
| 554 | return (ENOTTY); /* Why is this not EINVAL */ |
| 555 | } |
| 556 | } |
| 557 | |
| 558 | int |
| 559 | winlocksegmap( |
| 560 | dev_t dev, /* major:minor */ |
| 561 | off_t off, /* device offset from mmap(2) */ |
| 562 | struct as *as, /* user's address space. */ |
| 563 | caddr_t *addr, /* address from mmap(2) */ |
| 564 | off_t len, /* length from mmap(2) */ |
| 565 | uint_t prot, /* user wants this access */ |
| 566 | uint_t maxprot, /* this is the maximum the user can have */ |
| 567 | uint_t flags, /* flags from mmap(2) */ |
| 568 | cred_t *cred) |
| 569 | { |
| 570 | DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len)); |
| 571 | |
| 572 | /* Only MAP_SHARED mappings are supported */ |
| 573 | if ((flags & MAP_TYPE) == MAP_PRIVATE) { |
| 574 | return (EINVAL); |
| 575 | } |
| 576 | |
| 577 | /* Use devmap_setup to setup the mapping */ |
| 578 | return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 579 | maxprot, flags, cred)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 580 | } |
| 581 | |
| 582 | /*ARGSUSED*/ |
| 583 | int |
| 584 | winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, |
| 585 | size_t *maplen, uint_t model) |
| 586 | { |
| 587 | SegLock *lp; |
| 588 | int err; |
| 589 | |
| 590 | DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 591 | off, len, (void *)dhp)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 592 | |
| 593 | *maplen = 0; |
| 594 | |
| 595 | /* Check if the lock exists, i.e., has been created by alloc */ |
| 596 | /* off is the sy_ident returned in the alloc ioctl */ |
| 597 | if ((lp = seglock_findlock((uint_t)off)) == NULL) { |
| 598 | return (ENXIO); |
| 599 | } |
| 600 | |
| 601 | /* |
| 602 | * The offset bits in mmap(2) offset has to be same as in lockptr |
| 603 | * OR the offset should be 0 (i.e. masked off) |
| 604 | */ |
| 605 | if (((off & PAGEOFFSET) != 0) && |
| 606 | ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) { |
| 607 | DEBUGF(2, (CE_CONT, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 608 | "mmap offset %llx mismatch with lockptr %p\n", |
| 609 | off, (void *)lp->lockptr)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 610 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 611 | return (EINVAL); |
| 612 | } |
| 613 | |
| 614 | /* Only supports PAGESIZE length mappings */ |
| 615 | if (len != PAGESIZE) { |
| 616 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 617 | return (EINVAL); |
| 618 | } |
| 619 | |
| 620 | /* |
| 621 | * Set up devmap to point at page associated with lock |
| 622 | * RFE: At this point we dont know if this is a lockpage or unlockpage |
| 623 | * a lockpage would not need DEVMAP_ALLOW_REMAP setting |
| 624 | * We could have kept track of the mapping order here, |
| 625 | * but devmap framework does not support storing any state in this |
| 626 | * devmap callback as it does not callback for error cleanup if some |
| 627 | * other error happens in the framework. |
| 628 | * RFE: We should modify the winlock mmap interface so that the |
| 629 | * user process marks in the offset passed in whether this is for a |
| 630 | * lock or unlock mapping instead of guessing based on order of maps |
| 631 | * This would cleanup other things (such as in fork) |
| 632 | */ |
| 633 | if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops, |
| 634 | lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, |
| 635 | DEVMAP_ALLOW_REMAP, 0)) < 0) { |
| 636 | mutex_exit(&lp->mutex); /* held by seglock_findlock */ |
| 637 | return (err); |
| 638 | } |
| 639 | /* |
| 640 | * No mappings are loaded to those segments yet. The correctness |
| 641 | * of the winlock semantics depends on the devmap framework/seg_dev NOT |
| 642 | * loading the translations without calling _access callback. |
| 643 | */ |
| 644 | |
| 645 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 646 | *maplen = PAGESIZE; |
| 647 | return (0); |
| 648 | } |
| 649 | |
| 650 | /* |
| 651 | * This routine is called by the devmap framework after the devmap entry point |
| 652 | * above and the mapping is setup in seg_dev. |
| 653 | * We store the pointer to the per-process context in the devmap private data. |
| 654 | */ |
| 655 | /*ARGSUSED*/ |
| 656 | static int |
| 657 | winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, |
| 658 | size_t len, void **pvtp) |
| 659 | { |
| 660 | SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */ |
| 661 | SegProc *sdp; |
| 662 | |
| 663 | ASSERT(len == PAGESIZE); |
| 664 | |
| 665 | /* Find the per-process context for this lock, alloc one if not found */ |
| 666 | sdp = seglock_allocclient(lp); |
| 667 | |
| 668 | /* |
| 669 | * RFE: Determining which is a lock vs unlock seg is based on order |
| 670 | * of mmaps, we should change that to be derivable from off |
| 671 | */ |
| 672 | if (sdp->lockseg == NULL) { |
| 673 | sdp->lockseg = dhp; |
| 674 | } else if (sdp->unlockseg == NULL) { |
| 675 | sdp->unlockseg = dhp; |
| 676 | } else { |
| 677 | /* attempting to map lock more than twice */ |
| 678 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 679 | return (ENOMEM); |
| 680 | } |
| 681 | |
| 682 | *pvtp = sdp; |
| 683 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 684 | return (DDI_SUCCESS); |
| 685 | } |
| 686 | |
| 687 | /* |
| 688 | * duplicate a segment, as in fork() |
| 689 | * On fork, the child inherits the mappings to the lock |
| 690 | * lp->alloccount is NOT incremented, so child should not do a free(). |
| 691 | * Semantics same as if done an alloc(), map(), map(). |
| 692 | * This way it would work fine if doing an exec() variant later |
| 693 | * Child does not inherit any UFLAGS set in parent |
| 694 | * The lock and unlock pages are started off unmapped, i.e., child does not |
| 695 | * own the lock. |
| 696 | * The code assumes that the child process has a valid pid at this point |
| 697 | * RFE: This semantics depends on fork not duplicating the hat mappings |
| 698 | * (which is the current implementation). To enforce it would need to |
| 699 | * call devmap_unload from here - not clear if that is allowed. |
| 700 | */ |
| 701 | |
| 702 | static int |
| 703 | winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, |
| 704 | void **newpvt) |
| 705 | { |
| 706 | SegProc *sdp = (SegProc *)oldpvt; |
| 707 | SegProc *ndp; |
| 708 | SegLock *lp = sdp->lp; |
| 709 | |
| 710 | mutex_enter(&lp->mutex); |
| 711 | ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); |
| 712 | |
| 713 | /* |
| 714 | * Note: At this point, the child process does have a pid, but |
| 715 | * the arguments passed to as_dup and hence to devmap_dup dont pass it |
| 716 | * down. So we cannot use normal seglock_findclient - which finds the |
| 717 | * parent sdp itself! |
| 718 | * Instead we allocate the child's SegProc by using the child as pointer |
| 719 | * RFE: we are using the as stucture which means peeking into the |
| 720 | * devmap_cookie. This is not DDI-compliant. Need a compliant way of |
| 721 | * getting at either the as or, better, a way to get the child's new pid |
| 722 | */ |
| 723 | ndp = seglock_alloc_specific(lp, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 724 | (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 725 | ASSERT(ndp != sdp); |
| 726 | |
| 727 | if (sdp->lockseg == dhp) { |
| 728 | ASSERT(ndp->lockseg == NULL); |
| 729 | ndp->lockseg = new_dhp; |
| 730 | } else { |
| 731 | ASSERT(sdp->unlockseg == dhp); |
| 732 | ASSERT(ndp->unlockseg == NULL); |
| 733 | ndp->unlockseg = new_dhp; |
| 734 | if (sdp->flag & TRASHPAGE) { |
| 735 | ndp->flag |= TRASHPAGE; |
| 736 | } |
| 737 | } |
| 738 | mutex_exit(&lp->mutex); |
| 739 | *newpvt = (void *)ndp; |
| 740 | return (0); |
| 741 | } |
| 742 | |
| 743 | |
| 744 | /*ARGSUSED*/ |
| 745 | static void |
| 746 | winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, |
| 747 | devmap_cookie_t new_dhp1, void **newpvtp1, |
| 748 | devmap_cookie_t new_dhp2, void **newpvtp2) |
| 749 | { |
| 750 | SegProc *sdp = (SegProc *)pvtp; |
| 751 | SegLock *lp = sdp->lp; |
| 752 | |
| 753 | /* |
| 754 | * We always create PAGESIZE length mappings, so there should never |
| 755 | * be a partial unmapping case |
| 756 | */ |
| 757 | ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL)); |
| 758 | |
| 759 | mutex_enter(&lp->mutex); |
| 760 | ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); |
| 761 | /* make sure this process doesn't own the lock */ |
| 762 | if (sdp == lp->owner) { |
| 763 | /* |
| 764 | * Not handling errors - i.e., errors in unloading mapping |
| 765 | * As part of unmapping hat/seg structure get torn down anyway |
| 766 | */ |
| 767 | (void) lock_giveup(lp, 0); |
| 768 | } |
| 769 | |
| 770 | ASSERT(sdp != lp->owner); |
| 771 | if (sdp->lockseg == dhp) { |
| 772 | sdp->lockseg = NULL; |
| 773 | } else { |
| 774 | ASSERT(sdp->unlockseg == dhp); |
| 775 | sdp->unlockseg = NULL; |
| 776 | sdp->flag &= ~TRASHPAGE; /* clear flag if set */ |
| 777 | } |
| 778 | |
| 779 | garbage_collect_lock(lp, sdp); |
| 780 | } |
| 781 | |
| 782 | /*ARGSUSED*/ |
| 783 | static int |
| 784 | winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len, |
| 785 | uint_t type, uint_t rw) |
| 786 | { |
| 787 | SegProc *sdp = (SegProc *)pvt; |
| 788 | SegLock *lp = sdp->lp; |
| 789 | int err; |
| 790 | |
| 791 | /* Driver handles only DEVMAP_ACCESS type of faults */ |
| 792 | if (type != DEVMAP_ACCESS) |
| 793 | return (-1); |
| 794 | |
| 795 | mutex_enter(&lp->mutex); |
| 796 | ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); |
| 797 | |
| 798 | /* should be using a SegProc that corresponds to current process */ |
| 799 | ASSERT(ID(sdp) == CURPROC_ID); |
| 800 | |
| 801 | /* |
| 802 | * If process is faulting but does not have both segments mapped |
| 803 | * return error (should cause a segv). |
| 804 | * RFE: could give it a permanent trashpage |
| 805 | */ |
| 806 | if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) { |
| 807 | err = -1; |
| 808 | } else { |
| 809 | err = seglock_lockfault(dhp, sdp, lp, rw); |
| 810 | } |
| 811 | mutex_exit(&lp->mutex); |
| 812 | return (err); |
| 813 | } |
| 814 | |
| 815 | /* INTERNAL ROUTINES START HERE */ |
| 816 | |
| 817 | |
| 818 | |
| 819 | /* |
| 820 | * search the lock_list list for the specified cookie |
| 821 | * The cookie is the sy_ident field returns by ALLOC ioctl. |
| 822 | * This has two parts: |
| 823 | * the pageoffset bits contain offset into the lock page. |
| 824 | * the pagenumber bits contain the lock id. |
| 825 | * The user code is supposed to pass in only the pagenumber portion |
| 826 | * (i.e. mask off the pageoffset bits). However the code below |
| 827 | * does the mask in case the users are not diligent |
| 828 | * if found, returns with mutex for SegLock structure held |
| 829 | */ |
| 830 | static SegLock * |
| 831 | seglock_findlock(uint_t cookie) |
| 832 | { |
| 833 | SegLock *lp; |
| 834 | |
| 835 | cookie &= (uint_t)PAGEMASK; /* remove pageoffset bits to get cookie */ |
| 836 | mutex_enter(&winlock_mutex); |
| 837 | for (lp = lock_list; lp != NULL; lp = lp->next) { |
| 838 | mutex_enter(&lp->mutex); |
| 839 | if (cookie == lp->cookie) { |
| 840 | break; /* return with lp->mutex held */ |
| 841 | } |
| 842 | mutex_exit(&lp->mutex); |
| 843 | } |
| 844 | mutex_exit(&winlock_mutex); |
| 845 | return (lp); |
| 846 | } |
| 847 | |
| 848 | /* |
| 849 | * search the lock_list list for the specified non-zero key |
| 850 | * if found, returns with lock for SegLock structure held |
| 851 | */ |
| 852 | static SegLock * |
| 853 | seglock_findkey(uint_t key) |
| 854 | { |
| 855 | SegLock *lp; |
| 856 | |
| 857 | ASSERT(MUTEX_HELD(&winlock_mutex)); |
| 858 | /* The driver allows multiple locks with key 0, dont search */ |
| 859 | if (key == 0) |
| 860 | return (NULL); |
| 861 | for (lp = lock_list; lp != NULL; lp = lp->next) { |
| 862 | mutex_enter(&lp->mutex); |
| 863 | if (key == lp->key) |
| 864 | break; |
| 865 | mutex_exit(&lp->mutex); |
| 866 | } |
| 867 | return (lp); |
| 868 | } |
| 869 | |
| 870 | /* |
| 871 | * Create a new lock context. |
| 872 | * Returns with SegLock mutex held |
| 873 | */ |
| 874 | |
| 875 | static SegLock * |
| 876 | seglock_createlock(enum winlock_style style) |
| 877 | { |
| 878 | SegLock *lp; |
| 879 | |
| 880 | DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 881 | (void *)lock_free_list, next_lock)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 882 | |
| 883 | ASSERT(MUTEX_HELD(&winlock_mutex)); |
| 884 | if (lock_free_list != NULL) { |
| 885 | lp = lock_free_list; |
| 886 | lock_free_list = lp->next; |
| 887 | } else if (next_lock >= MAX_LOCKS) { |
| 888 | return (NULL); |
| 889 | } else { |
| 890 | lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP); |
| 891 | lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE; |
| 892 | mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL); |
| 893 | cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL); |
| 894 | ++next_lock; |
| 895 | } |
| 896 | |
| 897 | mutex_enter(&lp->mutex); |
| 898 | ASSERT((lp->cookie/PAGESIZE) <= next_lock); |
| 899 | |
| 900 | if (style == OLDSTYLE_LOCK) { |
| 901 | lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 902 | DDI_UMEM_SLEEP, &(lp->umem_cookie)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 903 | } else { |
| 904 | lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1); |
| 905 | lp->umem_cookie = lockpage_cookie; |
| 906 | } |
| 907 | |
| 908 | ASSERT(lp->lockptr != NULL); |
| 909 | lp->style = style; |
| 910 | lp->sleepers = 0; |
| 911 | lp->alloccount = 1; |
| 912 | lp->timeout = LOCKTIME*hz; |
| 913 | lp->clients = NULL; |
| 914 | lp->owner = NULL; |
| 915 | LOCK(lp) = 0; |
| 916 | lp->next = lock_list; |
| 917 | lock_list = lp; |
| 918 | return (lp); |
| 919 | } |
| 920 | |
| 921 | /* |
| 922 | * Routine to destory a lock structure. |
| 923 | * This routine is called while holding the lp->mutex but not the |
| 924 | * winlock_mutex. |
| 925 | */ |
| 926 | |
| 927 | static void |
| 928 | seglock_destroylock(SegLock *lp) |
| 929 | { |
| 930 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 931 | ASSERT(!MUTEX_HELD(&winlock_mutex)); |
| 932 | |
| 933 | DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 934 | lp->cookie, lp->key)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 935 | |
| 936 | ASSERT(lp->alloccount == 0); |
| 937 | ASSERT(lp->clients == NULL); |
| 938 | ASSERT(lp->owner == NULL); |
| 939 | ASSERT(lp->sleepers == 0); |
| 940 | |
| 941 | /* clean up/release fields in lp */ |
| 942 | if (lp->style == OLDSTYLE_LOCK) { |
| 943 | ddi_umem_free(lp->umem_cookie); |
| 944 | } |
| 945 | lp->umem_cookie = NULL; |
| 946 | lp->lockptr = NULL; |
| 947 | lp->key = 0; |
| 948 | |
| 949 | /* |
| 950 | * Reduce cookie by 1, makes it non page-aligned and invalid |
| 951 | * This prevents any valid lookup from finding this lock |
| 952 | * so when we drop the lock and regrab it it will still |
| 953 | * be there and nobody else would have attached to it |
| 954 | */ |
| 955 | lp->cookie--; |
| 956 | |
| 957 | /* Drop and reacquire mutexes in right order */ |
| 958 | mutex_exit(&lp->mutex); |
| 959 | mutex_enter(&winlock_mutex); |
| 960 | mutex_enter(&lp->mutex); |
| 961 | |
| 962 | /* reincrement the cookie to get the original valid cookie */ |
| 963 | lp->cookie++; |
| 964 | ASSERT((lp->cookie & PAGEOFFSET) == 0); |
| 965 | ASSERT(lp->alloccount == 0); |
| 966 | ASSERT(lp->clients == NULL); |
| 967 | ASSERT(lp->owner == NULL); |
| 968 | ASSERT(lp->sleepers == 0); |
| 969 | |
| 970 | /* Remove lp from lock_list */ |
| 971 | if (lock_list == lp) { |
| 972 | lock_list = lp->next; |
| 973 | } else { |
| 974 | SegLock *tmp = lock_list; |
| 975 | while (tmp->next != lp) { |
| 976 | tmp = tmp->next; |
| 977 | ASSERT(tmp != NULL); |
| 978 | } |
| 979 | tmp->next = lp->next; |
| 980 | } |
| 981 | |
| 982 | /* Add to lock_free_list */ |
| 983 | lp->next = lock_free_list; |
| 984 | lock_free_list = lp; |
| 985 | mutex_exit(&lp->mutex); |
| 986 | |
| 987 | /* Check if all locks deleted and cleanup */ |
| 988 | if (lock_list == NULL) { |
| 989 | lock_destroyall(); |
| 990 | } |
| 991 | |
| 992 | mutex_exit(&winlock_mutex); |
| 993 | } |
| 994 | |
| 995 | /* Routine to find a SegProc corresponding to the tag */ |
| 996 | |
| 997 | static SegProc * |
| 998 | seglock_find_specific(SegLock *lp, void *tag) |
| 999 | { |
| 1000 | SegProc *sdp; |
| 1001 | |
| 1002 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1003 | ASSERT(tag != NULL); |
| 1004 | for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) { |
| 1005 | if (ID(sdp) == tag) |
| 1006 | break; |
| 1007 | } |
| 1008 | return (sdp); |
| 1009 | } |
| 1010 | |
| 1011 | /* Routine to find (and if needed allocate) a SegProc corresponding to tag */ |
| 1012 | |
| 1013 | static SegProc * |
| 1014 | seglock_alloc_specific(SegLock *lp, void *tag) |
| 1015 | { |
| 1016 | SegProc *sdp; |
| 1017 | |
| 1018 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1019 | ASSERT(tag != NULL); |
| 1020 | |
| 1021 | /* Search and return if existing one found */ |
| 1022 | sdp = seglock_find_specific(lp, tag); |
| 1023 | if (sdp != NULL) |
| 1024 | return (sdp); |
| 1025 | |
| 1026 | DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1027 | tag, lp->cookie)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1028 | |
| 1029 | /* Allocate a new SegProc */ |
| 1030 | sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP); |
| 1031 | sdp->next = lp->clients; |
| 1032 | lp->clients = sdp; |
| 1033 | sdp->lp = lp; |
| 1034 | ID(sdp) = tag; |
| 1035 | return (sdp); |
| 1036 | } |
| 1037 | |
| 1038 | /* |
| 1039 | * search a context's client list for the given client and delete |
| 1040 | */ |
| 1041 | |
| 1042 | static void |
| 1043 | seglock_deleteclient(SegLock *lp, SegProc *sdp) |
| 1044 | { |
| 1045 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1046 | ASSERT(lp->owner != sdp); /* Not current owner of lock */ |
| 1047 | ASSERT(sdp->lockseg == NULL); /* Mappings torn down */ |
| 1048 | ASSERT(sdp->unlockseg == NULL); |
| 1049 | |
| 1050 | DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1051 | ddi_get_pid(), lp->cookie)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1052 | if (lp->clients == sdp) { |
| 1053 | lp->clients = sdp->next; |
| 1054 | } else { |
| 1055 | SegProc *tmp = lp->clients; |
| 1056 | while (tmp->next != sdp) { |
| 1057 | tmp = tmp->next; |
| 1058 | ASSERT(tmp != NULL); |
| 1059 | } |
| 1060 | tmp->next = sdp->next; |
| 1061 | } |
| 1062 | kmem_free(sdp, sizeof (SegProc)); |
| 1063 | } |
| 1064 | |
| 1065 | /* |
| 1066 | * Routine to verify if a SegProc and SegLock |
| 1067 | * structures are empty/idle. |
| 1068 | * Destroys the structures if they are ready |
| 1069 | * Can be called with sdp == NULL if want to verify only the lock state |
| 1070 | * caller should hold the lp->mutex |
| 1071 | * and this routine drops the mutex |
| 1072 | */ |
| 1073 | static void |
| 1074 | garbage_collect_lock(SegLock *lp, SegProc *sdp) |
| 1075 | { |
| 1076 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1077 | /* see if both segments unmapped from client structure */ |
| 1078 | if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL)) |
| 1079 | seglock_deleteclient(lp, sdp); |
| 1080 | |
| 1081 | /* see if this is last client in the entire lock context */ |
| 1082 | if ((lp->clients == NULL) && (lp->alloccount == 0)) { |
| 1083 | seglock_destroylock(lp); |
| 1084 | } else { |
| 1085 | mutex_exit(&lp->mutex); |
| 1086 | } |
| 1087 | } |
| 1088 | |
| 1089 | |
| 1090 | /* IOCTLS START HERE */ |
| 1091 | |
| 1092 | static int |
| 1093 | seglock_grabinfo(intptr_t arg, int mode) |
| 1094 | { |
| 1095 | int i = 1; |
| 1096 | |
| 1097 | /* multiple clients per lock supported - see comments up top */ |
| 1098 | if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0) |
| 1099 | return (EFAULT); |
| 1100 | return (0); |
| 1101 | } |
| 1102 | |
| 1103 | static int |
| 1104 | seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */ |
| 1105 | { |
| 1106 | struct seglock *lp; |
| 1107 | uint_t key; |
| 1108 | struct winlockalloc wla; |
| 1109 | int err; |
| 1110 | |
| 1111 | if (style == OLDSTYLE_LOCK) { |
| 1112 | key = 0; |
| 1113 | } else { |
| 1114 | if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla), |
| 1115 | mode)) { |
| 1116 | return (EFAULT); |
| 1117 | } |
| 1118 | key = wla.sy_key; |
| 1119 | } |
| 1120 | |
| 1121 | DEBUGF(3, (CE_CONT, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1122 | "seglock_graballoc: key=%u, style=%d\n", key, style)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1123 | |
| 1124 | mutex_enter(&winlock_mutex); |
| 1125 | /* Allocate lockpage on first new style alloc */ |
| 1126 | if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) { |
| 1127 | lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1128 | &lockpage_cookie); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1129 | } |
| 1130 | |
| 1131 | /* Allocate trashpage on first alloc (any style) */ |
| 1132 | if (trashpage_cookie == NULL) { |
| 1133 | (void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1134 | &trashpage_cookie); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1135 | } |
| 1136 | |
| 1137 | if ((lp = seglock_findkey(key)) != NULL) { |
| 1138 | DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1139 | key, lp->cookie)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1140 | ++lp->alloccount; |
| 1141 | } else if ((lp = seglock_createlock(style)) != NULL) { |
| 1142 | DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1143 | key, lp->cookie)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1144 | lp->key = key; |
| 1145 | } else { |
| 1146 | DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key)); |
| 1147 | mutex_exit(&winlock_mutex); |
| 1148 | return (ENOMEM); |
| 1149 | } |
| 1150 | ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex)); |
| 1151 | |
| 1152 | mutex_exit(&winlock_mutex); |
| 1153 | |
| 1154 | if (style == OLDSTYLE_LOCK) { |
| 1155 | err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1156 | sizeof (lp->cookie), mode); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1157 | } else { |
| 1158 | wla.sy_ident = lp->cookie + |
| 1159 | (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET); |
| 1160 | err = ddi_copyout((caddr_t)&wla, (caddr_t)arg, |
| 1161 | sizeof (wla), mode); |
| 1162 | } |
| 1163 | |
| 1164 | if (err) { |
| 1165 | /* On error, should undo allocation */ |
| 1166 | lp->alloccount--; |
| 1167 | |
| 1168 | /* Verify and delete if lock is unused now */ |
| 1169 | garbage_collect_lock(lp, NULL); |
| 1170 | return (EFAULT); |
| 1171 | } |
| 1172 | |
| 1173 | mutex_exit(&lp->mutex); |
| 1174 | return (0); |
| 1175 | } |
| 1176 | |
| 1177 | static int |
| 1178 | seglock_grabfree(intptr_t arg, int mode) /* IOCTL */ |
| 1179 | { |
| 1180 | struct seglock *lp; |
| 1181 | uint_t offset; |
| 1182 | |
| 1183 | if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode) |
| 1184 | != 0) { |
| 1185 | return (EFAULT); |
| 1186 | } |
| 1187 | DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset)); |
| 1188 | |
| 1189 | if ((lp = seglock_findlock(offset)) == NULL) { |
| 1190 | DEBUGF(2, (CE_CONT, "did not find lock\n")); |
| 1191 | return (EINVAL); |
| 1192 | } |
| 1193 | DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1194 | lp->key, lp->cookie, lp->alloccount)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1195 | |
| 1196 | if (lp->alloccount > 0) |
| 1197 | lp->alloccount--; |
| 1198 | |
| 1199 | /* Verify and delete if lock is unused now */ |
| 1200 | garbage_collect_lock(lp, NULL); |
| 1201 | return (0); |
| 1202 | } |
| 1203 | |
| 1204 | |
| 1205 | /* |
| 1206 | * Sets timeout in lock and UFLAGS in client |
| 1207 | * the UFLAGS are stored in the client structure and persistent only |
| 1208 | * till the unmap of the lock pages. If the process sets UFLAGS |
| 1209 | * does a map of the lock/unlock pages and unmaps them, the client |
| 1210 | * structure will get deleted and the UFLAGS will be lost. The process |
| 1211 | * will need to resetup the flags. |
| 1212 | */ |
| 1213 | static int |
| 1214 | seglock_settimeout(intptr_t arg, int mode) /* IOCTL */ |
| 1215 | { |
| 1216 | SegLock *lp; |
| 1217 | SegProc *sdp; |
| 1218 | struct winlocktimeout wlt; |
| 1219 | |
| 1220 | if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) { |
| 1221 | return (EFAULT); |
| 1222 | } |
| 1223 | |
| 1224 | if ((lp = seglock_findlock(wlt.sy_ident)) == NULL) |
| 1225 | return (EINVAL); |
| 1226 | |
| 1227 | lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout); |
| 1228 | /* if timeout modified, wake up any sleepers */ |
| 1229 | if (lp->sleepers > 0) { |
| 1230 | cv_broadcast(&lp->locksleep); |
| 1231 | } |
| 1232 | |
| 1233 | /* |
| 1234 | * If the process is trying to set UFLAGS, |
| 1235 | * Find the client segproc and allocate one if needed |
| 1236 | * Set the flags preserving the kernel flags |
| 1237 | * If the process is clearing UFLAGS |
| 1238 | * Find the client segproc but dont allocate one if does not exist |
| 1239 | */ |
| 1240 | if (wlt.sy_flags & UFLAGS) { |
| 1241 | sdp = seglock_allocclient(lp); |
| 1242 | sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS; |
| 1243 | } else if ((sdp = seglock_findclient(lp)) != NULL) { |
| 1244 | sdp->flag = sdp->flag & KFLAGS; |
| 1245 | /* If clearing UFLAGS leaves the segment or lock idle, delete */ |
| 1246 | garbage_collect_lock(lp, sdp); |
| 1247 | return (0); |
| 1248 | } |
| 1249 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 1250 | return (0); |
| 1251 | } |
| 1252 | |
| 1253 | static int |
| 1254 | seglock_gettimeout(intptr_t arg, int mode) |
| 1255 | { |
| 1256 | SegLock *lp; |
| 1257 | SegProc *sdp; |
| 1258 | struct winlocktimeout wlt; |
| 1259 | |
| 1260 | if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) |
| 1261 | return (EFAULT); |
| 1262 | |
| 1263 | if ((lp = seglock_findlock(wlt.sy_ident)) == NULL) |
| 1264 | return (EINVAL); |
| 1265 | |
| 1266 | wlt.sy_timeout = TICK_TO_MSEC(lp->timeout); |
| 1267 | /* |
| 1268 | * If this process has an active allocated lock return those flags |
| 1269 | * Dont allocate a client structure on gettimeout |
| 1270 | * If not, return 0. |
| 1271 | */ |
| 1272 | if ((sdp = seglock_findclient(lp)) != NULL) { |
| 1273 | wlt.sy_flags = sdp->flag & UFLAGS; |
| 1274 | } else { |
| 1275 | wlt.sy_flags = 0; |
| 1276 | } |
| 1277 | mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ |
| 1278 | |
| 1279 | if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0) |
| 1280 | return (EFAULT); |
| 1281 | |
| 1282 | return (0); |
| 1283 | } |
| 1284 | |
| 1285 | /* |
| 1286 | * Handle lock segment faults here... |
| 1287 | * |
| 1288 | * This is where the magic happens. |
| 1289 | */ |
| 1290 | |
| 1291 | /* ARGSUSED */ |
| 1292 | static int |
| 1293 | seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw) |
| 1294 | { |
| 1295 | SegProc *owner = lp->owner; |
| 1296 | int err; |
| 1297 | |
| 1298 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1299 | DEBUGF(3, (CE_CONT, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1300 | "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n", |
| 1301 | (void *)dhp, (void *)sdp, (void *)lp, (void *)owner)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1302 | |
| 1303 | /* lockfault is always called with sdp in current process context */ |
| 1304 | ASSERT(ID(sdp) == CURPROC_ID); |
| 1305 | |
| 1306 | /* If Lock has no current owner, give the mapping to new owner */ |
| 1307 | if (owner == NULL) { |
| 1308 | DEBUGF(4, (CE_CONT, " lock has no current owner\n")); |
| 1309 | return (give_mapping(lp, sdp, rw)); |
| 1310 | } |
| 1311 | |
| 1312 | if (owner == sdp) { |
| 1313 | /* |
| 1314 | * Current owner is faulting on owned lock segment OR |
| 1315 | * Current owner is faulting on unlock page and has no waiters |
| 1316 | * Then can give the mapping to current owner |
| 1317 | */ |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1318 | if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1319 | DEBUGF(4, (CE_CONT, "lock owner faulting\n")); |
| 1320 | return (give_mapping(lp, sdp, rw)); |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1321 | } else { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1322 | /* |
| 1323 | * Owner must be writing to unlock page and there are waiters. |
| 1324 | * other cases have been checked earlier. |
| 1325 | * Release the lock, owner, and owners mappings |
| 1326 | * As the owner is trying to write to the unlock page, leave |
| 1327 | * it with a trashpage mapping and wake up the sleepers |
| 1328 | */ |
| 1329 | ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0)); |
| 1330 | DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n")); |
| 1331 | return (lock_giveup(lp, 1)); |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1332 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1333 | } |
| 1334 | |
| 1335 | ASSERT(owner != sdp); |
| 1336 | |
| 1337 | /* |
| 1338 | * If old owner faulting on trash unlock mapping, |
| 1339 | * load hat mappings to trash page |
| 1340 | * RFE: non-owners should NOT be faulting on unlock mapping as they |
| 1341 | * as first supposed to fault on the lock seg. We could give them |
| 1342 | * a trash page or return error. |
| 1343 | */ |
| 1344 | if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) { |
| 1345 | DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n")); |
| 1346 | return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1347 | DEVMAP_ACCESS, rw)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1348 | } |
| 1349 | |
| 1350 | /* |
| 1351 | * Non-owner faulting. Need to check current LOCK state. |
| 1352 | * |
| 1353 | * Before reading lock value in LOCK(lp), we must make sure that |
| 1354 | * the owner cannot change its value before we change mappings |
| 1355 | * or else we could end up either with a hung process |
| 1356 | * or more than one process thinking they have the lock. |
| 1357 | * We do that by unloading the owner's mappings |
| 1358 | */ |
| 1359 | DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n")); |
| 1360 | err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE); |
| 1361 | err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE); |
| 1362 | if (err != 0) |
| 1363 | return (err); /* unable to remove owner mapping */ |
| 1364 | |
| 1365 | /* |
| 1366 | * If lock is not held, then current owner mappings were |
| 1367 | * unloaded above and we can give the lock to the new owner |
| 1368 | */ |
| 1369 | if (LOCK(lp) == 0) { |
| 1370 | DEBUGF(4, (CE_CONT, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1371 | "Free lock (%p): Giving mapping to new owner %d\n", |
| 1372 | (void *)lp, ddi_get_pid())); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1373 | return (give_mapping(lp, sdp, rw)); |
| 1374 | } |
| 1375 | |
| 1376 | DEBUGF(4, (CE_CONT, " lock held, sleeping\n")); |
| 1377 | |
| 1378 | /* |
| 1379 | * A non-owning process tried to write (presumably to the lockpage, |
| 1380 | * but it doesn't matter) but the lock is held; we need to sleep for |
| 1381 | * the lock while there is an owner. |
| 1382 | */ |
| 1383 | |
| 1384 | lp->sleepers++; |
| 1385 | while ((owner = lp->owner) != NULL) { |
| 1386 | int rval; |
| 1387 | |
| 1388 | if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) { |
| 1389 | /* |
| 1390 | * No timeout has been specified for this lock; |
| 1391 | * we'll simply sleep on the condition variable. |
| 1392 | */ |
| 1393 | rval = cv_wait_sig(&lp->locksleep, &lp->mutex); |
| 1394 | } else { |
| 1395 | /* |
| 1396 | * A timeout _has_ been specified for this lock. We need |
| 1397 | * to wake up and possibly steal this lock if the owner |
| 1398 | * does not let it go. Note that all sleepers on a lock |
| 1399 | * with a timeout wait; the sleeper with the earliest |
| 1400 | * timeout will wakeup, and potentially steal the lock |
| 1401 | * Stealing the lock will cause a broadcast on the |
| 1402 | * locksleep cv and thus kick the other timed waiters |
| 1403 | * and cause everyone to restart in a new timedwait |
| 1404 | */ |
Rafael Vanoni | d3d5073 | 2009-11-13 01:32:32 -0800 | [diff] [blame] | 1405 | rval = cv_reltimedwait_sig(&lp->locksleep, |
| 1406 | &lp->mutex, lp->timeout, TR_CLOCK_TICK); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1407 | } |
| 1408 | |
| 1409 | /* |
| 1410 | * Timeout and still old owner - steal lock |
| 1411 | * Force-Release lock and give old owner a trashpage mapping |
| 1412 | */ |
| 1413 | if ((rval == -1) && (lp->owner == owner)) { |
| 1414 | /* |
| 1415 | * if any errors in lock_giveup, go back and sleep/retry |
| 1416 | * If successful, will break out of loop |
| 1417 | */ |
| 1418 | cmn_err(CE_NOTE, "Process %d timed out on lock %d\n", |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1419 | ddi_get_pid(), lp->cookie); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1420 | (void) lock_giveup(lp, 1); |
| 1421 | } else if (rval == 0) { /* signal pending */ |
| 1422 | cmn_err(CE_NOTE, |
| 1423 | "Process %d signalled while waiting on lock %d\n", |
| 1424 | ddi_get_pid(), lp->cookie); |
| 1425 | lp->sleepers--; |
| 1426 | return (FC_MAKE_ERR(EINTR)); |
| 1427 | } |
| 1428 | } |
| 1429 | |
| 1430 | lp->sleepers--; |
| 1431 | /* |
| 1432 | * Give mapping to this process and save a fault later |
| 1433 | */ |
| 1434 | return (give_mapping(lp, sdp, rw)); |
| 1435 | } |
| 1436 | |
| 1437 | /* |
| 1438 | * Utility: give a valid mapping to lock and unlock pages to current process. |
| 1439 | * Caller responsible for unloading old owner's mappings |
| 1440 | */ |
| 1441 | |
| 1442 | static int |
| 1443 | give_mapping(SegLock *lp, SegProc *sdp, uint_t rw) |
| 1444 | { |
| 1445 | int err = 0; |
| 1446 | |
| 1447 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1448 | ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0))); |
| 1449 | /* give_mapping is always called with sdp in current process context */ |
| 1450 | ASSERT(ID(sdp) == CURPROC_ID); |
| 1451 | |
| 1452 | /* remap any old trash mappings */ |
| 1453 | if (sdp->flag & TRASHPAGE) { |
| 1454 | /* current owner should not have a trash mapping */ |
| 1455 | ASSERT(sdp != lp->owner); |
| 1456 | |
| 1457 | DEBUGF(4, (CE_CONT, |
| 1458 | "new owner %d remapping old trash mapping\n", |
| 1459 | ddi_get_pid())); |
| 1460 | if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip, |
| 1461 | lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) { |
| 1462 | /* |
| 1463 | * unable to remap old trash page, |
| 1464 | * abort before changing owner |
| 1465 | */ |
| 1466 | DEBUGF(4, (CE_CONT, |
| 1467 | "aborting: error in umem_remap %d\n", err)); |
| 1468 | return (err); |
| 1469 | } |
| 1470 | sdp->flag &= ~TRASHPAGE; |
| 1471 | } |
| 1472 | |
| 1473 | /* we have a new owner now */ |
| 1474 | lp->owner = sdp; |
| 1475 | |
| 1476 | if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE, |
| 1477 | DEVMAP_ACCESS, rw)) != 0) { |
| 1478 | return (err); |
| 1479 | } |
| 1480 | DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid())); |
| 1481 | |
| 1482 | if (lp->sleepers) { |
| 1483 | /* Force unload unlock mapping if there are waiters */ |
| 1484 | DEBUGF(4, (CE_CONT, |
| 1485 | " lock has %d sleepers => remove unlock mapping\n", |
| 1486 | lp->sleepers)); |
| 1487 | err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE); |
| 1488 | } else { |
| 1489 | /* |
| 1490 | * while here, give new owner a valid mapping to unlock |
| 1491 | * page so we don't get called again. |
| 1492 | */ |
| 1493 | DEBUGF(4, (CE_CONT, " and unlock mapping\n")); |
| 1494 | err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE, |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1495 | DEVMAP_ACCESS, PROT_WRITE); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1496 | } |
| 1497 | return (err); |
| 1498 | } |
| 1499 | |
| 1500 | /* |
| 1501 | * Unload owner's mappings, release the lock and wakeup any sleepers |
| 1502 | * If trash, then the old owner is given a trash mapping |
| 1503 | * => old owner held lock too long and caused a timeout |
| 1504 | */ |
| 1505 | static int |
| 1506 | lock_giveup(SegLock *lp, int trash) |
| 1507 | { |
| 1508 | SegProc *owner = lp->owner; |
| 1509 | |
| 1510 | DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n", |
| 1511 | (void *)lp, (void *)ID(lp->owner), trash)); |
| 1512 | |
| 1513 | ASSERT(MUTEX_HELD(&lp->mutex)); |
| 1514 | ASSERT(owner != NULL); |
| 1515 | |
| 1516 | /* |
| 1517 | * owner loses lockpage/unlockpage mappings and gains a |
| 1518 | * trashpage mapping, if needed. |
| 1519 | */ |
| 1520 | if (!trash) { |
| 1521 | /* |
| 1522 | * We do not handle errors in devmap_unload in the !trash case, |
| 1523 | * as the process is attempting to unmap/exit or otherwise |
| 1524 | * release the lock. Errors in unloading the mapping are not |
| 1525 | * going to affect that (unmap does not take error return). |
| 1526 | */ |
| 1527 | (void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE); |
| 1528 | (void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE); |
| 1529 | } else { |
| 1530 | int err; |
| 1531 | |
| 1532 | if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) { |
| 1533 | /* error unloading lockseg mapping. abort giveup */ |
| 1534 | return (err); |
| 1535 | } |
| 1536 | |
| 1537 | /* |
| 1538 | * old owner gets mapping to trash page so it can continue |
| 1539 | * devmap_umem_remap does a hat_unload (and does it holding |
| 1540 | * the right locks), so no need to devmap_unload on unlockseg |
| 1541 | */ |
| 1542 | if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip, |
| 1543 | trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) { |
| 1544 | /* error remapping to trash page, abort giveup */ |
| 1545 | return (err); |
| 1546 | } |
| 1547 | owner->flag |= TRASHPAGE; |
| 1548 | /* |
| 1549 | * Preload mapping to trash page by calling devmap_load |
| 1550 | * However, devmap_load can only be called on the faulting |
| 1551 | * process context and not on the owner's process context |
| 1552 | * we preload only if we happen to be in owner process context |
| 1553 | * Other processes will fault on the unlock mapping |
| 1554 | * and be given a trash mapping at that time. |
| 1555 | */ |
| 1556 | if (ID(owner) == CURPROC_ID) { |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1557 | (void) devmap_load(owner->unlockseg, lp->cookie, |
| 1558 | PAGESIZE, DEVMAP_ACCESS, PROT_WRITE); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1559 | } |
| 1560 | } |
| 1561 | |
| 1562 | lp->owner = NULL; |
| 1563 | |
| 1564 | /* Clear the lock value in underlying page so new owner can grab it */ |
| 1565 | LOCK(lp) = 0; |
| 1566 | |
| 1567 | if (lp->sleepers) { |
| 1568 | DEBUGF(4, (CE_CONT, " waking up, lp=%p\n", (void *)lp)); |
| 1569 | cv_broadcast(&lp->locksleep); |
| 1570 | } |
| 1571 | return (0); |
| 1572 | } |
| 1573 | |
| 1574 | /* |
| 1575 | * destroy all allocated memory. |
| 1576 | */ |
| 1577 | |
| 1578 | static void |
| 1579 | lock_destroyall(void) |
| 1580 | { |
| 1581 | SegLock *lp, *lpnext; |
| 1582 | |
| 1583 | ASSERT(MUTEX_HELD(&winlock_mutex)); |
| 1584 | ASSERT(lock_list == NULL); |
| 1585 | |
| 1586 | DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n")); |
| 1587 | for (lp = lock_free_list; lp != NULL; lp = lpnext) { |
| 1588 | mutex_enter(&lp->mutex); |
| 1589 | lpnext = lp->next; |
| 1590 | ASSERT(lp->clients == NULL); |
| 1591 | ASSERT(lp->owner == NULL); |
| 1592 | ASSERT(lp->alloccount == 0); |
| 1593 | mutex_destroy(&lp->mutex); |
| 1594 | cv_destroy(&lp->locksleep); |
| 1595 | kmem_free(lp, sizeof (SegLock)); |
| 1596 | } |
| 1597 | lock_free_list = NULL; |
| 1598 | next_lock = 0; |
| 1599 | } |
| 1600 | |
| 1601 | |
| 1602 | /* RFE: create mdb walkers instead of dump routines? */ |
| 1603 | static void |
| 1604 | seglock_dump_all(void) |
| 1605 | { |
| 1606 | SegLock *lp; |
| 1607 | |
| 1608 | mutex_enter(&winlock_mutex); |
| 1609 | cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n"); |
| 1610 | |
| 1611 | cmn_err(CE_CONT, "Lock List:\n"); |
| 1612 | for (lp = lock_list; lp != NULL; lp = lp->next) { |
| 1613 | mutex_enter(&lp->mutex); |
| 1614 | cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n", |
| 1615 | lp->cookie, lp->key, lp->alloccount, |
| 1616 | lp->clients ? 'Y' : 'N', |
| 1617 | lp->owner ? 'Y' : 'N', |
| 1618 | lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N', |
| 1619 | lp->sleepers); |
| 1620 | mutex_exit(&lp->mutex); |
| 1621 | } |
| 1622 | cmn_err(CE_CONT, "Free Lock List:\n"); |
| 1623 | for (lp = lock_free_list; lp != NULL; lp = lp->next) { |
| 1624 | mutex_enter(&lp->mutex); |
| 1625 | cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n", |
| 1626 | lp->cookie, lp->key, lp->alloccount, |
| 1627 | lp->clients ? 'Y' : 'N', |
| 1628 | lp->owner ? 'Y' : 'N', |
| 1629 | lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N', |
| 1630 | lp->sleepers); |
| 1631 | mutex_exit(&lp->mutex); |
| 1632 | } |
| 1633 | |
| 1634 | #ifdef DEBUG |
| 1635 | if (lock_debug < 3) { |
| 1636 | mutex_exit(&winlock_mutex); |
| 1637 | return; |
| 1638 | } |
| 1639 | |
| 1640 | for (lp = lock_list; lp != NULL; lp = lp->next) { |
| 1641 | SegProc *sdp; |
| 1642 | |
| 1643 | mutex_enter(&lp->mutex); |
| 1644 | cmn_err(CE_CONT, |
| 1645 | "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n", |
| 1646 | (void *)lp, lp->key, lp->cookie, lp->alloccount, |
| 1647 | lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers); |
| 1648 | |
| 1649 | cmn_err(CE_CONT, |
| 1650 | "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n", |
| 1651 | lp->style, (void *)lp->lockptr, lp->timeout, |
| 1652 | (void *)lp->clients, (void *)lp->owner); |
| 1653 | |
| 1654 | |
| 1655 | for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) { |
| 1656 | cmn_err(CE_CONT, " client %p%s, lp=%p, flag=%x, " |
| 1657 | "process tag=%p, lockseg=%p, unlockseg=%p\n", |
| 1658 | (void *)sdp, sdp == lp->owner ? " (owner)" : "", |
| 1659 | (void *)sdp->lp, sdp->flag, (void *)ID(sdp), |
| 1660 | (void *)sdp->lockseg, (void *)sdp->unlockseg); |
| 1661 | } |
| 1662 | mutex_exit(&lp->mutex); |
| 1663 | } |
| 1664 | #endif |
| 1665 | mutex_exit(&winlock_mutex); |
| 1666 | } |
| 1667 | |
| 1668 | #include <sys/modctl.h> |
| 1669 | |
| 1670 | static struct modldrv modldrv = { |
| 1671 | &mod_driverops, /* Type of module. This one is a driver */ |
Sherry Moore | 1939740 | 2008-09-22 16:30:26 -0700 | [diff] [blame] | 1672 | "Winlock Driver", /* Name of the module */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1673 | &winlock_ops, /* driver ops */ |
| 1674 | }; |
| 1675 | |
| 1676 | static struct modlinkage modlinkage = { |
| 1677 | MODREV_1, |
| 1678 | (void *)&modldrv, |
| 1679 | 0, |
| 1680 | 0, |
| 1681 | 0 |
| 1682 | }; |
| 1683 | |
| 1684 | int |
| 1685 | _init(void) |
| 1686 | { |
| 1687 | int e; |
| 1688 | |
| 1689 | mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL); |
| 1690 | e = mod_install(&modlinkage); |
| 1691 | if (e) { |
| 1692 | mutex_destroy(&winlock_mutex); |
| 1693 | } |
| 1694 | return (e); |
| 1695 | } |
| 1696 | |
| 1697 | |
| 1698 | int |
| 1699 | _info(struct modinfo *modinfop) |
| 1700 | { |
| 1701 | return (mod_info(&modlinkage, modinfop)); |
| 1702 | } |
| 1703 | |
| 1704 | int |
| 1705 | _fini(void) |
| 1706 | { |
| 1707 | int e; |
| 1708 | |
| 1709 | e = mod_remove(&modlinkage); |
| 1710 | if (e == 0) { |
| 1711 | mutex_destroy(&winlock_mutex); |
| 1712 | } |
| 1713 | return (e); |
| 1714 | } |