| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| |
| #include <sys/types.h> |
| #include <sys/modctl.h> |
| #include <sys/conf.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/devops.h> |
| #include <sys/stat.h> |
| #include <sys/file.h> |
| #include <sys/cred.h> |
| #include <sys/policy.h> |
| #include <sys/errno.h> |
| #include <vm/seg_dev.h> |
| #include <vm/seg_vn.h> |
| #include <vm/page.h> |
| #include <sys/fs/swapnode.h> |
| #include <sys/sysmacros.h> |
| #include <sys/fcntl.h> |
| #include <sys/vmsystm.h> |
| #include <sys/physmem.h> |
| #include <sys/vfs_opreg.h> |
| |
| static dev_info_t *physmem_dip = NULL; |
| |
| /* |
| * Linked list element hanging off physmem_proc_hash below, which holds all |
| * the information for a given segment which has been setup for this process. |
| * This is a simple linked list as we are assuming that for a given process |
| * the setup ioctl will only be called a handful of times. If this assumption |
| * changes in the future, a quicker to traverse data structure should be used. |
| */ |
| struct physmem_hash { |
| struct physmem_hash *ph_next; |
| uint64_t ph_base_pa; |
| caddr_t ph_base_va; |
| size_t ph_seg_len; |
| struct vnode *ph_vnode; |
| }; |
| |
| /* |
| * Hash of all of the processes which have setup mappings with the driver with |
| * pointers to per process data. |
| */ |
| struct physmem_proc_hash { |
| struct proc *pph_proc; |
| struct physmem_hash *pph_hash; |
| struct physmem_proc_hash *pph_next; |
| }; |
| |
| |
| /* Needs to be a power of two for simple hash algorithm */ |
| #define PPH_SIZE 8 |
| struct physmem_proc_hash *pph[PPH_SIZE]; |
| |
| /* |
| * Lock which protects the pph hash above. To add an element (either a new |
| * process or a new segment) the WRITE lock must be held. To traverse the |
| * list, only a READ lock is needed. |
| */ |
| krwlock_t pph_rwlock; |
| |
| #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1))) |
| |
| /* |
| * Need to keep a reference count of how many processes have the driver |
| * open to prevent it from disappearing. |
| */ |
| uint64_t physmem_vnodecnt; |
| kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */ |
| |
| static int physmem_getpage(struct vnode *vp, offset_t off, size_t len, |
| uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, |
| enum seg_rw rw, struct cred *cr, caller_context_t *ct); |
| |
| static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as, |
| caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, |
| struct cred *cred, caller_context_t *ct); |
| |
| static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as, |
| caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, |
| struct cred *cred, caller_context_t *ct); |
| |
| static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct); |
| |
| const fs_operation_def_t physmem_vnodeops_template[] = { |
| VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage }, |
| VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap }, |
| VOPNAME_DELMAP, { .vop_delmap = physmem_delmap }, |
| VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive }, |
| NULL, NULL |
| }; |
| |
| vnodeops_t *physmem_vnodeops = NULL; |
| |
| /* |
| * Removes the current process from the hash if the process has no more |
| * physmem segments active. |
| */ |
| void |
| physmem_remove_hash_proc() |
| { |
| int index; |
| struct physmem_proc_hash **walker; |
| struct physmem_proc_hash *victim = NULL; |
| |
| index = PHYSMEM_HASH(curproc); |
| rw_enter(&pph_rwlock, RW_WRITER); |
| walker = &pph[index]; |
| while (*walker != NULL) { |
| if ((*walker)->pph_proc == curproc && |
| (*walker)->pph_hash == NULL) { |
| victim = *walker; |
| *walker = victim->pph_next; |
| break; |
| } |
| walker = &((*walker)->pph_next); |
| } |
| rw_exit(&pph_rwlock); |
| if (victim != NULL) |
| kmem_free(victim, sizeof (struct physmem_proc_hash)); |
| } |
| |
| /* |
| * Add a new entry to the hash for the given process to cache the |
| * address ranges that it is working on. If this is the first hash |
| * item to be added for this process, we will create the head pointer |
| * for this process. |
| * Returns 0 on success, ERANGE when the physical address is already in the |
| * hash. |
| */ |
| int |
| physmem_add_hash(struct physmem_hash *php) |
| { |
| int index; |
| struct physmem_proc_hash *iterator; |
| struct physmem_proc_hash *newp = NULL; |
| struct physmem_hash *temp; |
| int ret = 0; |
| |
| index = PHYSMEM_HASH(curproc); |
| |
| insert: |
| rw_enter(&pph_rwlock, RW_WRITER); |
| iterator = pph[index]; |
| while (iterator != NULL) { |
| if (iterator->pph_proc == curproc) { |
| /* |
| * check to make sure a single process does not try to |
| * map the same region twice. |
| */ |
| for (temp = iterator->pph_hash; temp != NULL; |
| temp = temp->ph_next) { |
| if ((php->ph_base_pa >= temp->ph_base_pa && |
| php->ph_base_pa < temp->ph_base_pa + |
| temp->ph_seg_len) || |
| (temp->ph_base_pa >= php->ph_base_pa && |
| temp->ph_base_pa < php->ph_base_pa + |
| php->ph_seg_len)) { |
| ret = ERANGE; |
| break; |
| } |
| } |
| if (ret == 0) { |
| php->ph_next = iterator->pph_hash; |
| iterator->pph_hash = php; |
| } |
| rw_exit(&pph_rwlock); |
| /* Need to check for two threads in sync */ |
| if (newp != NULL) |
| kmem_free(newp, sizeof (*newp)); |
| return (ret); |
| } |
| iterator = iterator->pph_next; |
| } |
| |
| if (newp != NULL) { |
| newp->pph_proc = curproc; |
| newp->pph_next = pph[index]; |
| newp->pph_hash = php; |
| php->ph_next = NULL; |
| pph[index] = newp; |
| rw_exit(&pph_rwlock); |
| return (0); |
| } |
| |
| rw_exit(&pph_rwlock); |
| /* Dropped the lock so we could use KM_SLEEP */ |
| newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP); |
| goto insert; |
| } |
| |
| /* |
| * Will return the pointer to the physmem_hash struct if the setup routine |
| * has previously been called for this memory. |
| * Returns NULL on failure. |
| */ |
| struct physmem_hash * |
| physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp) |
| { |
| int index; |
| struct physmem_proc_hash *proc_hp; |
| struct physmem_hash *php; |
| |
| ASSERT(rw_lock_held(&pph_rwlock)); |
| |
| index = PHYSMEM_HASH(procp); |
| proc_hp = pph[index]; |
| while (proc_hp != NULL) { |
| if (proc_hp->pph_proc == procp) { |
| php = proc_hp->pph_hash; |
| while (php != NULL) { |
| if ((req_paddr >= php->ph_base_pa) && |
| (req_paddr + len <= |
| php->ph_base_pa + php->ph_seg_len)) { |
| return (php); |
| } |
| php = php->ph_next; |
| } |
| } |
| proc_hp = proc_hp->pph_next; |
| } |
| return (NULL); |
| } |
| |
| int |
| physmem_validate_cookie(uint64_t p_cookie) |
| { |
| int index; |
| struct physmem_proc_hash *proc_hp; |
| struct physmem_hash *php; |
| |
| ASSERT(rw_lock_held(&pph_rwlock)); |
| |
| index = PHYSMEM_HASH(curproc); |
| proc_hp = pph[index]; |
| while (proc_hp != NULL) { |
| if (proc_hp->pph_proc == curproc) { |
| php = proc_hp->pph_hash; |
| while (php != NULL) { |
| if ((uint64_t)(uintptr_t)php == p_cookie) { |
| return (1); |
| } |
| php = php->ph_next; |
| } |
| } |
| proc_hp = proc_hp->pph_next; |
| } |
| return (0); |
| } |
| |
| /* |
| * Remove the given vnode from the pph hash. If it exists in the hash the |
| * process still has to be around as the vnode is obviously still around and |
| * since it's a physmem vnode, it must be in the hash. |
| * If it is not in the hash that must mean that the setup ioctl failed. |
| * Return 0 in this instance, 1 if it is in the hash. |
| */ |
| int |
| physmem_remove_vnode_hash(vnode_t *vp) |
| { |
| int index; |
| struct physmem_proc_hash *proc_hp; |
| struct physmem_hash **phpp; |
| struct physmem_hash *victim; |
| |
| index = PHYSMEM_HASH(curproc); |
| /* synchronize with the map routine */ |
| rw_enter(&pph_rwlock, RW_WRITER); |
| proc_hp = pph[index]; |
| while (proc_hp != NULL) { |
| if (proc_hp->pph_proc == curproc) { |
| phpp = &proc_hp->pph_hash; |
| while (*phpp != NULL) { |
| if ((*phpp)->ph_vnode == vp) { |
| victim = *phpp; |
| *phpp = victim->ph_next; |
| |
| rw_exit(&pph_rwlock); |
| kmem_free(victim, sizeof (*victim)); |
| return (1); |
| } |
| phpp = &(*phpp)->ph_next; |
| } |
| } |
| proc_hp = proc_hp->pph_next; |
| } |
| rw_exit(&pph_rwlock); |
| |
| /* not found */ |
| return (0); |
| } |
| |
| int |
| physmem_setup_vnops() |
| { |
| int error; |
| char *name = "physmem"; |
| if (physmem_vnodeops != NULL) |
| cmn_err(CE_PANIC, "physmem vnodeops already set\n"); |
| error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops); |
| if (error != 0) { |
| cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template"); |
| } |
| return (error); |
| } |
| |
| /* |
| * The guts of the PHYSMEM_SETUP ioctl. |
| * Create a segment in the address space with the specified parameters. |
| * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA. |
| * We do not do bounds checking on the requested physical addresses, if they |
| * do not exist in the system, they will not be mappable. |
| * Returns 0 on success with the following error codes on failure: |
| * ENOMEM - The VA range requested was already mapped if pspp->user_va is |
| * non-NULL or the system was unable to find enough VA space for |
| * the desired length if user_va was NULL> |
| * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned. |
| */ |
| int |
| physmem_setup_addrs(struct physmem_setup_param *pspp) |
| { |
| struct as *as = curproc->p_as; |
| struct segvn_crargs vn_a; |
| int ret = 0; |
| uint64_t base_pa; |
| size_t len; |
| caddr_t uvaddr; |
| struct vnode *vp; |
| struct physmem_hash *php; |
| |
| ASSERT(pspp != NULL); |
| base_pa = pspp->req_paddr; |
| len = pspp->len; |
| uvaddr = (caddr_t)(uintptr_t)pspp->user_va; |
| |
| /* Sanity checking */ |
| if (!IS_P2ALIGNED(base_pa, PAGESIZE)) |
| return (EINVAL); |
| if (!IS_P2ALIGNED(len, PAGESIZE)) |
| return (EINVAL); |
| if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE)) |
| return (EINVAL); |
| |
| php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP); |
| |
| /* Need to bump vnode count so that the driver can not be unloaded */ |
| mutex_enter(&physmem_mutex); |
| physmem_vnodecnt++; |
| mutex_exit(&physmem_mutex); |
| |
| vp = vn_alloc(KM_SLEEP); |
| ASSERT(vp != NULL); /* SLEEP can't return NULL */ |
| vn_setops(vp, physmem_vnodeops); |
| |
| php->ph_vnode = vp; |
| |
| vn_a.vp = vp; |
| vn_a.offset = (u_offset_t)base_pa; |
| vn_a.type = MAP_SHARED; |
| vn_a.prot = PROT_ALL; |
| vn_a.maxprot = PROT_ALL; |
| vn_a.flags = 0; |
| vn_a.cred = NULL; |
| vn_a.amp = NULL; |
| vn_a.szc = 0; |
| vn_a.lgrp_mem_policy_flags = 0; |
| |
| as_rangelock(as); |
| if (uvaddr != NULL) { |
| if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) { |
| ret = ENOMEM; |
| fail: |
| as_rangeunlock(as); |
| vn_free(vp); |
| kmem_free(php, sizeof (*php)); |
| mutex_enter(&physmem_mutex); |
| physmem_vnodecnt--; |
| mutex_exit(&physmem_mutex); |
| return (ret); |
| } |
| } else { |
| /* We pick the address for the user */ |
| map_addr(&uvaddr, len, 0, 1, 0); |
| if (uvaddr == NULL) { |
| ret = ENOMEM; |
| goto fail; |
| } |
| } |
| ret = as_map(as, uvaddr, len, segvn_create, &vn_a); |
| |
| if (ret == 0) { |
| as_rangeunlock(as); |
| php->ph_base_pa = base_pa; |
| php->ph_base_va = uvaddr; |
| php->ph_seg_len = len; |
| pspp->user_va = (uint64_t)(uintptr_t)uvaddr; |
| pspp->cookie = (uint64_t)(uintptr_t)php; |
| ret = physmem_add_hash(php); |
| if (ret == 0) |
| return (0); |
| |
| /* Note that the call to as_unmap will free the vnode */ |
| (void) as_unmap(as, uvaddr, len); |
| kmem_free(php, sizeof (*php)); |
| return (ret); |
| } |
| |
| goto fail; |
| /*NOTREACHED*/ |
| } |
| |
| /* |
| * The guts of the PHYSMEM_MAP ioctl. |
| * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already |
| * been called for this PA range. |
| * Returns 0 on success with the following error codes on failure: |
| * EPERM - The requested page is long term locked, and thus repeated |
| * requests to allocate this page will likely fail. |
| * EAGAIN - The requested page could not be allocated, but it is believed |
| * that future attempts could succeed. |
| * ENOMEM - There was not enough free memory in the system to safely |
| * map the requested page. |
| * EINVAL - The requested paddr was not PAGESIZE aligned or the |
| * PHYSMEM_SETUP ioctl was not called for this page. |
| * ENOENT - The requested page was iniside the kernel cage, and the |
| * PHYSMEM_CAGE flag was not set. |
| * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag |
| * was not set. |
| */ |
| static int |
| physmem_map_addrs(struct physmem_map_param *pmpp) |
| { |
| caddr_t uvaddr; |
| page_t *pp; |
| uint64_t req_paddr; |
| struct vnode *vp; |
| int ret = 0; |
| struct physmem_hash *php; |
| uint_t flags = 0; |
| |
| ASSERT(pmpp != NULL); |
| req_paddr = pmpp->req_paddr; |
| |
| if (!IS_P2ALIGNED(req_paddr, PAGESIZE)) |
| return (EINVAL); |
| /* Find the vnode for this map request */ |
| rw_enter(&pph_rwlock, RW_READER); |
| php = physmem_get_hash(req_paddr, PAGESIZE, curproc); |
| if (php == NULL) { |
| rw_exit(&pph_rwlock); |
| return (EINVAL); |
| } |
| vp = php->ph_vnode; |
| uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa); |
| rw_exit(&pph_rwlock); |
| |
| pp = page_numtopp_nolock(btop((size_t)req_paddr)); |
| if (pp == NULL) { |
| pmpp->ret_va = NULL; |
| return (EPERM); |
| } |
| |
| /* |
| * Check to see if page already mapped correctly. This can happen |
| * when we failed to capture a page previously and it was captured |
| * asynchronously for us. Return success in this case. |
| */ |
| if (pp->p_vnode == vp) { |
| ASSERT(pp->p_offset == (u_offset_t)req_paddr); |
| pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; |
| return (0); |
| } |
| |
| /* |
| * physmem should be responsible for checking for cage |
| * and prom pages. |
| */ |
| if (pmpp->flags & PHYSMEM_CAGE) |
| flags = CAPTURE_GET_CAGE; |
| if (pmpp->flags & PHYSMEM_RETIRED) |
| flags |= CAPTURE_GET_RETIRED; |
| |
| ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc); |
| |
| if (ret != 0) { |
| pmpp->ret_va = NULL; |
| return (ret); |
| } else { |
| pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; |
| return (0); |
| } |
| } |
| |
| /* |
| * Map the given page into the process's address space if possible. |
| * We actually only hash the page in on the correct vnode as the page |
| * will be mapped via segvn_pagefault. |
| * returns 0 on success |
| * returns 1 if there is no need to map this page anymore (process exited) |
| * returns -1 if we failed to map the page. |
| */ |
| int |
| map_page_proc(page_t *pp, void *arg, uint_t flags) |
| { |
| struct vnode *vp; |
| proc_t *procp = (proc_t *)arg; |
| int ret; |
| u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum); |
| struct physmem_hash *php; |
| |
| ASSERT(pp != NULL); |
| |
| /* |
| * Check against availrmem to make sure that we're not low on memory. |
| * We check again here as ASYNC requests do not do this check elsewhere. |
| * We return 1 as we don't want the page to have the PR_CAPTURE bit |
| * set or be on the page capture hash. |
| */ |
| if (swapfs_minfree > availrmem + 1) { |
| page_free(pp, 1); |
| return (1); |
| } |
| |
| /* |
| * If this is an asynchronous request for the current process, |
| * we can not map the page as it's possible that we are also in the |
| * process of unmapping the page which could result in a deadlock |
| * with the as lock. |
| */ |
| if ((flags & CAPTURE_ASYNC) && (curproc == procp)) { |
| page_free(pp, 1); |
| return (-1); |
| } |
| |
| /* only return zeroed out pages */ |
| pagezero(pp, 0, PAGESIZE); |
| |
| rw_enter(&pph_rwlock, RW_READER); |
| php = physmem_get_hash(paddr, PAGESIZE, procp); |
| if (php == NULL) { |
| rw_exit(&pph_rwlock); |
| /* |
| * Free the page as there is no longer a valid outstanding |
| * request for this page. |
| */ |
| page_free(pp, 1); |
| return (1); |
| } |
| |
| vp = php->ph_vnode; |
| |
| /* |
| * We need to protect against a possible deadlock here where we own |
| * the vnode page hash mutex and want to acquire it again as there |
| * are locations in the code, where we unlock a page while holding |
| * the mutex which can lead to the page being captured and eventually |
| * end up here. |
| */ |
| if (mutex_owned(page_vnode_mutex(vp))) { |
| rw_exit(&pph_rwlock); |
| page_free(pp, 1); |
| return (-1); |
| } |
| |
| ret = page_hashin(pp, vp, paddr, NULL); |
| rw_exit(&pph_rwlock); |
| if (ret == 0) { |
| page_free(pp, 1); |
| return (-1); |
| } |
| |
| page_downgrade(pp); |
| |
| mutex_enter(&freemem_lock); |
| availrmem--; |
| mutex_exit(&freemem_lock); |
| |
| return (0); |
| } |
| |
| /* |
| * The guts of the PHYSMEM_DESTROY ioctl. |
| * The cookie passed in will provide all of the information needed to |
| * free up the address space and physical memory associated with the |
| * corresponding PHSYMEM_SETUP ioctl. |
| * Returns 0 on success with the following error codes on failure: |
| * EINVAL - The cookie supplied is not valid. |
| */ |
| int |
| physmem_destroy_addrs(uint64_t p_cookie) |
| { |
| struct as *as = curproc->p_as; |
| size_t len; |
| caddr_t uvaddr; |
| |
| rw_enter(&pph_rwlock, RW_READER); |
| if (physmem_validate_cookie(p_cookie) == 0) { |
| rw_exit(&pph_rwlock); |
| return (EINVAL); |
| } |
| |
| len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len; |
| uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va; |
| rw_exit(&pph_rwlock); |
| |
| (void) as_unmap(as, uvaddr, len); |
| |
| return (0); |
| } |
| |
| /* |
| * If the page has been hashed into the physmem vnode, then just look it up |
| * and return it via pl, otherwise return ENOMEM as the map ioctl has not |
| * succeeded on the given page. |
| */ |
| /*ARGSUSED*/ |
| static int |
| physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, |
| page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, |
| struct cred *cr, caller_context_t *ct) |
| { |
| page_t *pp; |
| |
| ASSERT(len == PAGESIZE); |
| ASSERT(AS_READ_HELD(seg->s_as)); |
| |
| /* |
| * If the page is in the hash, then we successfully claimed this |
| * page earlier, so return it to the caller. |
| */ |
| pp = page_lookup(vp, off, SE_SHARED); |
| if (pp != NULL) { |
| pl[0] = pp; |
| pl[1] = NULL; |
| *protp = PROT_ALL; |
| return (0); |
| } |
| return (ENOMEM); |
| } |
| |
| /* |
| * We can not allow a process mapping /dev/physmem pages to fork as there can |
| * only be a single mapping to a /dev/physmem page at a given time. Thus, the |
| * return of EINVAL when we are not working on our own address space. |
| * Otherwise we return zero as this function is required for normal operation. |
| */ |
| /*ARGSUSED*/ |
| static int |
| physmem_addmap(struct vnode *vp, offset_t off, struct as *as, |
| caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, |
| struct cred *cred, caller_context_t *ct) |
| { |
| if (curproc->p_as != as) { |
| return (EINVAL); |
| } |
| return (0); |
| } |
| |
| /* Will always get called for removing a whole segment. */ |
| /*ARGSUSED*/ |
| static int |
| physmem_delmap(struct vnode *vp, offset_t off, struct as *as, |
| caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, |
| struct cred *cred, caller_context_t *ct) |
| { |
| /* |
| * Release our hold on the vnode so that the final VN_RELE will |
| * call physmem_inactive to clean things up. |
| */ |
| VN_RELE(vp); |
| |
| return (0); |
| } |
| |
| /* |
| * Clean up all the pages belonging to this vnode and then free it. |
| */ |
| /*ARGSUSED*/ |
| static void |
| physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct) |
| { |
| page_t *pp; |
| |
| /* |
| * Remove the vnode from the hash now, to prevent asynchronous |
| * attempts to map into this vnode. This avoids a deadlock |
| * where two threads try to get into this logic at the same |
| * time and try to map the pages they are destroying into the |
| * other's address space. |
| * If it's not in the hash, just free it. |
| */ |
| if (physmem_remove_vnode_hash(vp) == 0) { |
| ASSERT(vp->v_pages == NULL); |
| vn_free(vp); |
| physmem_remove_hash_proc(); |
| mutex_enter(&physmem_mutex); |
| physmem_vnodecnt--; |
| mutex_exit(&physmem_mutex); |
| return; |
| } |
| |
| /* |
| * At this point in time, no other logic can be adding or removing |
| * pages from the vnode, otherwise the v_pages list could be inaccurate. |
| */ |
| |
| while ((pp = vp->v_pages) != NULL) { |
| page_t *rpp; |
| if (page_tryupgrade(pp)) { |
| /* |
| * set lckcnt for page_destroy to do availrmem |
| * accounting |
| */ |
| pp->p_lckcnt = 1; |
| page_destroy(pp, 0); |
| } else { |
| /* failure to lock should be transient */ |
| rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED); |
| if (rpp != pp) { |
| page_unlock(rpp); |
| continue; |
| } |
| page_unlock(pp); |
| } |
| } |
| vn_free(vp); |
| physmem_remove_hash_proc(); |
| mutex_enter(&physmem_mutex); |
| physmem_vnodecnt--; |
| mutex_exit(&physmem_mutex); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, |
| int *rvalp) |
| { |
| int ret; |
| |
| switch (cmd) { |
| case PHYSMEM_SETUP: |
| { |
| struct physmem_setup_param psp; |
| if (ddi_copyin((void *)arg, &psp, |
| sizeof (struct physmem_setup_param), 0)) |
| return (EFAULT); |
| ret = physmem_setup_addrs(&psp); |
| if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0)) |
| return (EFAULT); |
| } |
| break; |
| case PHYSMEM_MAP: |
| { |
| struct physmem_map_param pmp; |
| if (ddi_copyin((void *)arg, &pmp, |
| sizeof (struct physmem_map_param), 0)) |
| return (EFAULT); |
| ret = physmem_map_addrs(&pmp); |
| if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0)) |
| return (EFAULT); |
| } |
| break; |
| case PHYSMEM_DESTROY: |
| { |
| uint64_t cookie; |
| if (ddi_copyin((void *)arg, &cookie, |
| sizeof (uint64_t), 0)) |
| return (EFAULT); |
| ret = physmem_destroy_addrs(cookie); |
| } |
| break; |
| default: |
| return (ENOTSUP); |
| } |
| return (ret); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp) |
| { |
| int ret; |
| static int msg_printed = 0; |
| |
| if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) { |
| return (EINVAL); |
| } |
| |
| /* need to make sure we have the right privileges */ |
| if ((ret = secpolicy_resource(credp)) != 0) |
| return (ret); |
| if ((ret = secpolicy_lock_memory(credp)) != 0) |
| return (ret); |
| |
| if (msg_printed == 0) { |
| cmn_err(CE_NOTE, "!driver has been opened. This driver may " |
| "take out long term locks on pages which may impact " |
| "dynamic reconfiguration events"); |
| msg_printed = 1; |
| } |
| |
| return (0); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| physmem_close(dev_t dev, int flag, int otyp, cred_t *credp) |
| { |
| return (0); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, |
| void *arg, void **resultp) |
| { |
| switch (infocmd) { |
| case DDI_INFO_DEVT2DEVINFO: |
| *resultp = physmem_dip; |
| return (DDI_SUCCESS); |
| |
| case DDI_INFO_DEVT2INSTANCE: |
| *resultp = (void *)(ulong_t)getminor((dev_t)arg); |
| return (DDI_SUCCESS); |
| |
| default: |
| return (DDI_FAILURE); |
| } |
| } |
| |
| static int |
| physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) |
| { |
| int i; |
| |
| if (cmd == DDI_RESUME) { |
| return (DDI_SUCCESS); |
| } |
| |
| if (cmd != DDI_ATTACH) |
| return (DDI_FAILURE); |
| |
| if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, |
| ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) |
| return (DDI_FAILURE); |
| |
| physmem_dip = dip; |
| |
| /* Initialize driver specific data */ |
| if (physmem_setup_vnops()) { |
| ddi_remove_minor_node(dip, ddi_get_name(dip)); |
| return (DDI_FAILURE); |
| } |
| |
| for (i = 0; i < PPH_SIZE; i++) |
| pph[i] = NULL; |
| |
| page_capture_register_callback(PC_PHYSMEM, 10000, |
| map_page_proc); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| static int |
| physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) |
| { |
| int ret = DDI_SUCCESS; |
| |
| if (cmd == DDI_SUSPEND) { |
| return (DDI_SUCCESS); |
| } |
| |
| if (cmd != DDI_DETACH) |
| return (DDI_FAILURE); |
| |
| ASSERT(physmem_dip == dip); |
| |
| mutex_enter(&physmem_mutex); |
| if (physmem_vnodecnt == 0) { |
| if (physmem_vnodeops != NULL) { |
| vn_freevnodeops(physmem_vnodeops); |
| physmem_vnodeops = NULL; |
| page_capture_unregister_callback(PC_PHYSMEM); |
| } |
| } else { |
| ret = EBUSY; |
| } |
| mutex_exit(&physmem_mutex); |
| if (ret == DDI_SUCCESS) |
| ddi_remove_minor_node(dip, ddi_get_name(dip)); |
| return (ret); |
| } |
| |
| static struct cb_ops physmem_cb_ops = { |
| physmem_open, /* open */ |
| physmem_close, /* close */ |
| nodev, /* strategy */ |
| nodev, /* print */ |
| nodev, /* dump */ |
| nodev, /* read */ |
| nodev, /* write */ |
| physmem_ioctl, /* ioctl */ |
| nodev, /* devmap */ |
| nodev, /* mmap */ |
| nodev, /* segmap */ |
| nochpoll, /* chpoll */ |
| ddi_prop_op, /* prop_op */ |
| NULL, /* cb_str */ |
| D_NEW | D_MP | D_DEVMAP, |
| CB_REV, |
| NULL, |
| NULL |
| }; |
| |
| static struct dev_ops physmem_ops = { |
| DEVO_REV, |
| 0, |
| physmem_getinfo, |
| nulldev, |
| nulldev, |
| physmem_attach, |
| physmem_detach, |
| nodev, |
| &physmem_cb_ops, |
| NULL, |
| NULL, |
| ddi_quiesce_not_needed, /* quiesce */ |
| }; |
| |
| static struct modldrv modldrv = { |
| &mod_driverops, |
| "physmem driver", |
| &physmem_ops |
| }; |
| |
| static struct modlinkage modlinkage = { |
| MODREV_1, |
| &modldrv, |
| NULL |
| }; |
| |
| int |
| _init(void) |
| { |
| return (mod_install(&modlinkage)); |
| } |
| |
| int |
| _info(struct modinfo *modinfop) |
| { |
| return (mod_info(&modlinkage, modinfop)); |
| } |
| |
| int |
| _fini(void) |
| { |
| return (mod_remove(&modlinkage)); |
| } |