| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| * Copyright 2012 Milan Jurik. All rights reserved. |
| * Copyright (c) 2016 by Delphix. All rights reserved. |
| * Copyright 2017 Joyent, Inc. |
| */ |
| |
| |
| /* |
| * Overview of the RSM Kernel Agent: |
| * --------------------------------- |
| * |
| * rsm.c constitutes the implementation of the RSM kernel agent. The RSM |
| * kernel agent is a pseudo device driver which makes use of the RSMPI |
| * interface on behalf of the RSMAPI user library. |
| * |
| * The kernel agent functionality can be categorized into the following |
| * components: |
| * 1. Driver Infrastructure |
| * 2. Export/Import Segment Management |
| * 3. Internal resource allocation/deallocation |
| * |
| * The driver infrastructure includes the basic module loading entry points |
| * like _init, _info, _fini to load, unload and report information about |
| * the driver module. The driver infrastructure also includes the |
| * autoconfiguration entry points namely, attach, detach and getinfo for |
| * the device autoconfiguration. |
| * |
| * The kernel agent is a pseudo character device driver and exports |
| * a cb_ops structure which defines the driver entry points for character |
| * device access. This includes the open and close entry points. The |
| * other entry points provided include ioctl, devmap and segmap and chpoll. |
| * read and write entry points are not used since the device is memory |
| * mapped. Also ddi_prop_op is used for the prop_op entry point. |
| * |
| * The ioctl entry point supports a number of commands, which are used by |
| * the RSMAPI library in order to export and import segments. These |
| * commands include commands for binding and rebinding the physical pages |
| * allocated to the virtual address range, publishing the export segment, |
| * unpublishing and republishing an export segment, creating an |
| * import segment and a virtual connection from this import segment to |
| * an export segment, performing scatter-gather data transfer, barrier |
| * operations. |
| * |
| * |
| * Export and Import segments: |
| * --------------------------- |
| * |
| * In order to create an RSM export segment a process allocates a range in its |
| * virtual address space for the segment using standard Solaris interfaces. |
| * The process then calls RSMAPI, which in turn makes an ioctl call to the |
| * RSM kernel agent for an allocation of physical memory pages and for |
| * creation of the export segment by binding these pages to the virtual |
| * address range. These pages are locked in memory so that remote accesses |
| * are always applied to the correct page. Then the RSM segment is published, |
| * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id |
| * is assigned to it. |
| * |
| * In order to import a published RSM segment, RSMAPI creates an import |
| * segment and forms a virtual connection across the interconnect to the |
| * export segment, via an ioctl into the kernel agent with the connect |
| * command. The import segment setup is completed by mapping the |
| * local device memory into the importers virtual address space. The |
| * mapping of the import segment is handled by the segmap/devmap |
| * infrastructure described as follows. |
| * |
| * Segmap and Devmap interfaces: |
| * |
| * The RSM kernel agent allows device memory to be directly accessed by user |
| * threads via memory mapping. In order to do so, the RSM kernel agent |
| * supports the devmap and segmap entry points. |
| * |
| * The segmap entry point(rsm_segmap) is responsible for setting up a memory |
| * mapping as requested by mmap. The devmap entry point(rsm_devmap) is |
| * responsible for exporting the device memory to the user applications. |
| * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the |
| * control is transfered to the devmap_setup call which calls rsm_devmap. |
| * |
| * rsm_devmap validates the user mapping to the device or kernel memory |
| * and passes the information to the system for setting up the mapping. The |
| * actual setting up of the mapping is done by devmap_devmem_setup(for |
| * device memory) or devmap_umem_setup(for kernel memory). Callbacks are |
| * registered for device context management via the devmap_devmem_setup |
| * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, |
| * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping |
| * is created, a mapping is freed, a mapping is accessed or an existing |
| * mapping is duplicated respectively. These callbacks allow the RSM kernel |
| * agent to maintain state information associated with the mappings. |
| * The state information is mainly in the form of a cookie list for the import |
| * segment for which mapping has been done. |
| * |
| * Forced disconnect of import segments: |
| * |
| * When an exported segment is unpublished, the exporter sends a forced |
| * disconnect message to all its importers. The importer segments are |
| * unloaded and disconnected. This involves unloading the original |
| * mappings and remapping to a preallocated kernel trash page. This is |
| * done by devmap_umem_remap. The trash/dummy page is a kernel page, |
| * preallocated by the kernel agent during attach using ddi_umem_alloc with |
| * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application |
| * due to unloading of the original mappings. |
| * |
| * Additionally every segment has a mapping generation number associated |
| * with it. This is an entry in the barrier generation page, created |
| * during attach time. This mapping generation number for the import |
| * segments is incremented on a force disconnect to notify the application |
| * of the force disconnect. On this notification, the application needs |
| * to reconnect the segment to establish a new legitimate mapping. |
| * |
| * |
| * Locks used in the kernel agent: |
| * ------------------------------- |
| * |
| * The kernel agent uses a variety of mutexes and condition variables for |
| * mutual exclusion of the shared data structures and for synchronization |
| * between the various threads. Some of the locks are described as follows. |
| * |
| * Each resource structure, which represents either an export/import segment |
| * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. |
| * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the |
| * rsmseglock_acquire and rsmseglock_release macros. An additional |
| * lock called the rsmsi_lock is used for the shared import data structure |
| * that is relevant for resources representing import segments. There is |
| * also a condition variable associated with the resource called s_cv. This |
| * is used to wait for events like the segment state change etc. |
| * |
| * The resource structures are allocated from a pool of resource structures, |
| * called rsm_resource. This pool is protected via a reader-writer lock, |
| * called rsmrc_lock. |
| * |
| * There are two separate hash tables, one for the export segments and |
| * one for the import segments. The export segments are inserted into the |
| * export segment hash table only after they have been published and the |
| * import segments are inserted in the import segments list only after they |
| * have successfully connected to an exported segment. These tables are |
| * protected via reader-writer locks. |
| * |
| * Debug Support in the kernel agent: |
| * ---------------------------------- |
| * |
| * Debugging support in the kernel agent is provided by the following |
| * macros. |
| * |
| * DBG_PRINTF((category, level, message)) is a macro which logs a debug |
| * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer |
| * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based |
| * on the definition of the category and level. All messages that belong to |
| * the specified category(rsmdbg_category) and are of an equal or greater |
| * severity than the specified level(rsmdbg_level) are logged. The message |
| * is a string which uses the same formatting rules as the strings used in |
| * printf. |
| * |
| * The category defines which component of the kernel agent has logged this |
| * message. There are a number of categories that have been defined such as |
| * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, |
| * DBG_ADDCATEGORY is used to add in another category to the currently |
| * specified category value so that the component using this new category |
| * can also effectively log debug messages. Thus, the category of a specific |
| * message is some combination of the available categories and we can define |
| * sub-categories if we want a finer level of granularity. |
| * |
| * The level defines the severity of the message. Different level values are |
| * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being |
| * the least severe(debug level is 0). |
| * |
| * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug |
| * variable or a string respectively. |
| * |
| * |
| * NOTES: |
| * |
| * Special Fork and Exec Handling: |
| * ------------------------------- |
| * |
| * The backing physical pages of an exported segment are always locked down. |
| * Thus, there are two cases in which a process having exported segments |
| * will cause a cpu to hang: (1) the process invokes exec; (2) a process |
| * forks and invokes exit before the duped file descriptors for the export |
| * segments are closed in the child process. The hang is caused because the |
| * address space release algorithm in Solaris VM subsystem is based on a |
| * non-blocking loop which does not terminate while segments are locked |
| * down. In addition to this, Solaris VM subsystem lacks a callback |
| * mechanism to the rsm kernel agent to allow unlocking these export |
| * segment pages. |
| * |
| * In order to circumvent this problem, the kernel agent does the following. |
| * The Solaris VM subsystem keeps memory segments in increasing order of |
| * virtual addressses. Thus a special page(special_exit_offset) is allocated |
| * by the kernel agent and is mmapped into the heap area of the process address |
| * space(the mmap is done by the RSMAPI library). During the mmap processing |
| * of this special page by the devmap infrastructure, a callback(the same |
| * devmap context management callbacks discussed above) is registered for an |
| * unmap. |
| * |
| * As discussed above, this page is processed by the Solaris address space |
| * release code before any of the exported segments pages(which are allocated |
| * from high memory). It is during this processing that the unmap callback gets |
| * called and this callback is responsible for force destroying the exported |
| * segments and thus eliminating the problem of locked pages. |
| * |
| * Flow-control: |
| * ------------ |
| * |
| * A credit based flow control algorithm is used for messages whose |
| * processing cannot be done in the interrupt context because it might |
| * involve invoking rsmpi calls, or might take a long time to complete |
| * or might need to allocate resources. The algorithm operates on a per |
| * path basis. To send a message the pathend needs to have a credit and |
| * it consumes one for every message that is flow controlled. On the |
| * receiving pathend the message is put on a msgbuf_queue and a task is |
| * dispatched on the worker thread - recv_taskq where it is processed. |
| * After processing the message, the receiving pathend dequeues the message, |
| * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends |
| * credits to the sender pathend. |
| * |
| * RSM_DRTEST: |
| * ----------- |
| * |
| * This is used to enable the DR testing using a test driver on test |
| * platforms which do not supported DR. |
| * |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/param.h> |
| #include <sys/user.h> |
| #include <sys/buf.h> |
| #include <sys/systm.h> |
| #include <sys/cred.h> |
| #include <sys/vm.h> |
| #include <sys/uio.h> |
| #include <vm/seg.h> |
| #include <vm/page.h> |
| #include <sys/stat.h> |
| |
| #include <sys/time.h> |
| #include <sys/errno.h> |
| |
| #include <sys/file.h> |
| #include <sys/uio.h> |
| #include <sys/proc.h> |
| #include <sys/mman.h> |
| #include <sys/open.h> |
| #include <sys/atomic.h> |
| #include <sys/mem_config.h> |
| |
| |
| #include <sys/ddi.h> |
| #include <sys/devops.h> |
| #include <sys/ddidevmap.h> |
| #include <sys/sunddi.h> |
| #include <sys/esunddi.h> |
| #include <sys/ddi_impldefs.h> |
| |
| #include <sys/kmem.h> |
| #include <sys/conf.h> |
| #include <sys/devops.h> |
| #include <sys/ddi_impldefs.h> |
| |
| #include <sys/modctl.h> |
| |
| #include <sys/policy.h> |
| #include <sys/types.h> |
| #include <sys/conf.h> |
| #include <sys/param.h> |
| |
| #include <sys/taskq.h> |
| |
| #include <sys/rsm/rsm_common.h> |
| #include <sys/rsm/rsmapi_common.h> |
| #include <sys/rsm/rsm.h> |
| #include <rsm_in.h> |
| #include <sys/rsm/rsmka_path_int.h> |
| #include <sys/rsm/rsmpi.h> |
| |
| #include <sys/modctl.h> |
| #include <sys/debug.h> |
| |
| #include <sys/tuneable.h> |
| |
| #ifdef RSM_DRTEST |
| extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, |
| void *arg); |
| extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, |
| void *arg); |
| #endif |
| |
| extern void dbg_printf(int category, int level, char *fmt, ...); |
| extern void rsmka_pathmanager_init(); |
| extern void rsmka_pathmanager_cleanup(); |
| extern void rele_sendq_token(sendq_token_t *); |
| extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); |
| extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); |
| extern int rsmka_topology_ioctl(caddr_t, int, int); |
| |
| extern pri_t maxclsyspri; |
| extern work_queue_t work_queue; |
| extern kmutex_t ipc_info_lock; |
| extern kmutex_t ipc_info_cvlock; |
| extern kcondvar_t ipc_info_cv; |
| extern kmutex_t path_hold_cvlock; |
| extern kcondvar_t path_hold_cv; |
| |
| extern kmutex_t rsmka_buf_lock; |
| |
| extern path_t *rsm_find_path(char *, int, rsm_addr_t); |
| extern adapter_t *rsmka_lookup_adapter(char *, int); |
| extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); |
| extern boolean_t rsmka_do_path_active(path_t *, int); |
| extern boolean_t rsmka_check_node_alive(rsm_node_id_t); |
| extern void rsmka_release_adapter(adapter_t *); |
| extern void rsmka_enqueue_msgbuf(path_t *path, void *data); |
| extern void rsmka_dequeue_msgbuf(path_t *path); |
| extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); |
| /* lint -w2 */ |
| |
| static int rsm_open(dev_t *, int, int, cred_t *); |
| static int rsm_close(dev_t, int, int, cred_t *); |
| static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, |
| cred_t *credp, int *rvalp); |
| static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, |
| uint_t); |
| static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, |
| uint_t, uint_t, cred_t *); |
| static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, |
| struct pollhead **phpp); |
| |
| static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); |
| static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); |
| static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); |
| |
| static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); |
| static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); |
| static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); |
| static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, |
| rsm_permission_t); |
| static void rsm_export_force_destroy(ddi_umem_cookie_t *); |
| static void rsmacl_free(rsmapi_access_entry_t *, int); |
| static void rsmpiacl_free(rsm_access_entry_t *, int); |
| |
| static int rsm_inc_pgcnt(pgcnt_t); |
| static void rsm_dec_pgcnt(pgcnt_t); |
| static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); |
| static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, |
| size_t *); |
| static void exporter_quiesce(); |
| static void rsmseg_suspend(rsmseg_t *, int *); |
| static void rsmsegshare_suspend(rsmseg_t *); |
| static int rsmseg_resume(rsmseg_t *, void **); |
| static int rsmsegshare_resume(rsmseg_t *); |
| |
| static struct cb_ops rsm_cb_ops = { |
| rsm_open, /* open */ |
| rsm_close, /* close */ |
| nodev, /* strategy */ |
| nodev, /* print */ |
| nodev, /* dump */ |
| nodev, /* read */ |
| nodev, /* write */ |
| rsm_ioctl, /* ioctl */ |
| rsm_devmap, /* devmap */ |
| NULL, /* mmap */ |
| rsm_segmap, /* segmap */ |
| rsm_chpoll, /* poll */ |
| ddi_prop_op, /* cb_prop_op */ |
| 0, /* streamtab */ |
| D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ |
| 0, |
| 0, |
| 0 |
| }; |
| |
| static struct dev_ops rsm_ops = { |
| DEVO_REV, /* devo_rev, */ |
| 0, /* refcnt */ |
| rsm_info, /* get_dev_info */ |
| nulldev, /* identify */ |
| nulldev, /* probe */ |
| rsm_attach, /* attach */ |
| rsm_detach, /* detach */ |
| nodev, /* reset */ |
| &rsm_cb_ops, /* driver operations */ |
| (struct bus_ops *)0, /* bus operations */ |
| 0, |
| ddi_quiesce_not_needed, /* quiesce */ |
| }; |
| |
| /* |
| * Module linkage information for the kernel. |
| */ |
| |
| static struct modldrv modldrv = { |
| &mod_driverops, /* Type of module. This one is a pseudo driver */ |
| "Remote Shared Memory Driver", |
| &rsm_ops, /* driver ops */ |
| }; |
| |
| static struct modlinkage modlinkage = { |
| MODREV_1, |
| (void *)&modldrv, |
| 0, |
| 0, |
| 0 |
| }; |
| |
| static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); |
| static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); |
| static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); |
| |
| static kphysm_setup_vector_t rsm_dr_callback_vec = { |
| KPHYSM_SETUP_VECTOR_VERSION, |
| rsm_dr_callback_post_add, |
| rsm_dr_callback_pre_del, |
| rsm_dr_callback_post_del |
| }; |
| |
| /* This flag can be changed to 0 to help with PIT testing */ |
| int rsmka_modunloadok = 1; |
| int no_reply_cnt = 0; |
| |
| uint64_t rsm_ctrlmsg_errcnt = 0; |
| uint64_t rsm_ipcsend_errcnt = 0; |
| |
| #define MAX_NODES 64 |
| |
| static struct rsm_driver_data rsm_drv_data; |
| static struct rsmresource_table rsm_resource; |
| |
| static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); |
| static void rsmresource_destroy(void); |
| static int rsmresource_alloc(minor_t *); |
| static rsmresource_t *rsmresource_free(minor_t rnum); |
| static int rsm_closeconnection(rsmseg_t *seg, void **cookie); |
| static int rsm_unpublish(rsmseg_t *seg, int mode); |
| static int rsm_unbind(rsmseg_t *seg); |
| static uint_t rsmhash(rsm_memseg_id_t key); |
| static void rsmhash_alloc(rsmhash_table_t *rhash, int size); |
| static void rsmhash_free(rsmhash_table_t *rhash, int size); |
| static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); |
| static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); |
| static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, |
| void *cookie); |
| int rsm_disconnect(rsmseg_t *seg); |
| void rsmseg_unload(rsmseg_t *); |
| void rsm_suspend_complete(rsm_node_id_t src_node, int flag); |
| |
| rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, |
| rsm_intr_q_op_t opcode, rsm_addr_t src, |
| void *data, size_t size, rsm_intr_hand_arg_t arg); |
| |
| static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); |
| |
| rsm_node_id_t my_nodeid; |
| |
| /* cookie, va, offsets and length for the barrier */ |
| static rsm_gnum_t *bar_va; |
| static ddi_umem_cookie_t bar_cookie; |
| static off_t barrier_offset; |
| static size_t barrier_size; |
| static int max_segs; |
| |
| /* cookie for the trash memory */ |
| static ddi_umem_cookie_t remap_cookie; |
| |
| static rsm_memseg_id_t rsm_nextavail_segmentid; |
| |
| extern taskq_t *work_taskq; |
| extern char *taskq_name; |
| |
| static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ |
| |
| static rsmhash_table_t rsm_export_segs; /* list of exported segs */ |
| rsmhash_table_t rsm_import_segs; /* list of imported segs */ |
| static rsmhash_table_t rsm_event_queues; /* list of event queues */ |
| |
| static rsm_ipc_t rsm_ipc; /* ipc info */ |
| |
| /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ |
| static list_head_t rsm_suspend_list; |
| |
| /* list of descriptors for remote importers */ |
| static importers_table_t importer_list; |
| |
| kmutex_t rsm_suspend_cvlock; |
| kcondvar_t rsm_suspend_cv; |
| |
| static kmutex_t rsm_lock; |
| |
| adapter_t loopback_adapter; |
| rsm_controller_attr_t loopback_attr; |
| |
| int rsmipc_send_controlmsg(path_t *path, int msgtype); |
| |
| void rsmka_init_loopback(); |
| |
| int rsmka_null_seg_create( |
| rsm_controller_handle_t, |
| rsm_memseg_export_handle_t *, |
| size_t, |
| uint_t, |
| rsm_memory_local_t *, |
| rsm_resource_callback_t, |
| rsm_resource_callback_arg_t); |
| |
| int rsmka_null_seg_destroy( |
| rsm_memseg_export_handle_t); |
| |
| int rsmka_null_bind( |
| rsm_memseg_export_handle_t, |
| off_t, |
| rsm_memory_local_t *, |
| rsm_resource_callback_t, |
| rsm_resource_callback_arg_t); |
| |
| int rsmka_null_unbind( |
| rsm_memseg_export_handle_t, |
| off_t, |
| size_t); |
| |
| int rsmka_null_rebind( |
| rsm_memseg_export_handle_t, |
| off_t, |
| rsm_memory_local_t *, |
| rsm_resource_callback_t, |
| rsm_resource_callback_arg_t); |
| |
| int rsmka_null_publish( |
| rsm_memseg_export_handle_t, |
| rsm_access_entry_t [], |
| uint_t, |
| rsm_memseg_id_t, |
| rsm_resource_callback_t, |
| rsm_resource_callback_arg_t); |
| |
| |
| int rsmka_null_republish( |
| rsm_memseg_export_handle_t, |
| rsm_access_entry_t [], |
| uint_t, |
| rsm_resource_callback_t, |
| rsm_resource_callback_arg_t); |
| |
| int rsmka_null_unpublish( |
| rsm_memseg_export_handle_t); |
| |
| rsm_ops_t null_rsmpi_ops; |
| |
| /* |
| * data and locks to keep track of total amount of exported memory |
| */ |
| static pgcnt_t rsm_pgcnt; |
| static pgcnt_t rsm_pgcnt_max; /* max allowed */ |
| static kmutex_t rsm_pgcnt_lock; |
| |
| static int rsm_enable_dr; |
| |
| static char loopback_str[] = "loopback"; |
| |
| int rsm_hash_size; |
| |
| /* |
| * The locking model is as follows: |
| * |
| * Local operations: |
| * find resource - grab reader lock on resouce list |
| * insert rc - grab writer lock |
| * delete rc - grab writer lock and resource mutex |
| * read/write - no lock |
| * |
| * Remote invocations: |
| * find resource - grab read lock and resource mutex |
| * |
| * State: |
| * resource state - grab resource mutex |
| */ |
| |
| int |
| _init(void) |
| { |
| int e; |
| |
| e = mod_install(&modlinkage); |
| if (e != 0) { |
| return (e); |
| } |
| |
| mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); |
| |
| mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); |
| |
| |
| rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); |
| |
| rsm_hash_size = RSM_HASHSZ; |
| |
| rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); |
| |
| rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); |
| |
| mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); |
| |
| mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); |
| cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); |
| |
| mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); |
| cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); |
| |
| mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); |
| cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); |
| |
| rsm_ipc.count = RSMIPC_SZ; |
| rsm_ipc.wanted = 0; |
| rsm_ipc.sequence = 0; |
| |
| (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); |
| |
| for (e = 0; e < RSMIPC_SZ; e++) { |
| rsmipc_slot_t *slot = &rsm_ipc.slots[e]; |
| |
| RSMIPC_SET(slot, RSMIPC_FREE); |
| mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); |
| cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); |
| } |
| |
| /* |
| * Initialize the suspend message list |
| */ |
| rsm_suspend_list.list_head = NULL; |
| mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); |
| |
| /* |
| * It is assumed here that configuration data is available |
| * during system boot since _init may be called at that time. |
| */ |
| |
| rsmka_pathmanager_init(); |
| |
| DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, |
| "rsm: _init done\n")); |
| |
| return (DDI_SUCCESS); |
| |
| } |
| |
| int |
| _info(struct modinfo *modinfop) |
| { |
| |
| return (mod_info(&modlinkage, modinfop)); |
| } |
| |
| int |
| _fini(void) |
| { |
| int e; |
| |
| DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, |
| "rsm: _fini enter\n")); |
| |
| /* |
| * The rsmka_modunloadok flag is simply used to help with |
| * the PIT testing. Make this flag 0 to disallow modunload. |
| */ |
| if (rsmka_modunloadok == 0) |
| return (EBUSY); |
| |
| /* rsm_detach will be called as a result of mod_remove */ |
| e = mod_remove(&modlinkage); |
| if (e) { |
| DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, |
| "Unable to fini RSM %x\n", e)); |
| return (e); |
| } |
| |
| rsmka_pathmanager_cleanup(); |
| |
| rw_destroy(&rsm_resource.rsmrc_lock); |
| |
| rw_destroy(&rsm_export_segs.rsmhash_rw); |
| rw_destroy(&rsm_import_segs.rsmhash_rw); |
| rw_destroy(&rsm_event_queues.rsmhash_rw); |
| |
| mutex_destroy(&importer_list.lock); |
| |
| mutex_destroy(&rsm_ipc.lock); |
| cv_destroy(&rsm_ipc.cv); |
| |
| (void) mutex_destroy(&rsm_suspend_list.list_lock); |
| |
| (void) mutex_destroy(&rsm_pgcnt_lock); |
| |
| DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); |
| |
| return (DDI_SUCCESS); |
| |
| } |
| |
| /*ARGSUSED1*/ |
| static int |
| rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) |
| { |
| minor_t rnum; |
| int percent; |
| int ret; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); |
| |
| switch (cmd) { |
| case DDI_ATTACH: |
| break; |
| case DDI_RESUME: |
| default: |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm:rsm_attach - cmd not supported\n")); |
| return (DDI_FAILURE); |
| } |
| |
| if (rsm_dip != NULL) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm:rsm_attach - supports only " |
| "one instance\n")); |
| return (DDI_FAILURE); |
| } |
| |
| rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, |
| DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, |
| "enable-dynamic-reconfiguration", 1); |
| |
| mutex_enter(&rsm_drv_data.drv_lock); |
| rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; |
| mutex_exit(&rsm_drv_data.drv_lock); |
| |
| if (rsm_enable_dr) { |
| #ifdef RSM_DRTEST |
| ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, |
| (void *)NULL); |
| #else |
| ret = kphysm_setup_func_register(&rsm_dr_callback_vec, |
| (void *)NULL); |
| #endif |
| if (ret != 0) { |
| mutex_exit(&rsm_drv_data.drv_lock); |
| cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " |
| "reconfiguration setup failed\n"); |
| return (DDI_FAILURE); |
| } |
| } |
| |
| mutex_enter(&rsm_drv_data.drv_lock); |
| ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); |
| rsm_drv_data.drv_state = RSM_DRV_OK; |
| cv_broadcast(&rsm_drv_data.drv_cv); |
| mutex_exit(&rsm_drv_data.drv_lock); |
| |
| /* |
| * page_list_read_lock(); |
| * xx_setup(); |
| * page_list_read_unlock(); |
| */ |
| |
| rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, |
| DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, |
| "segment-hashtable-size", RSM_HASHSZ); |
| if (rsm_hash_size == 0) { |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsm: segment-hashtable-size in rsm.conf " |
| "must be greater than 0, defaulting to 128\n")); |
| rsm_hash_size = RSM_HASHSZ; |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", |
| rsm_hash_size)); |
| |
| rsm_pgcnt = 0; |
| |
| percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, |
| DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, |
| "max-exported-memory", 0); |
| if (percent < 0) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm:rsm_attach not enough memory available to " |
| "export, or max-exported-memory set incorrectly.\n")); |
| return (DDI_FAILURE); |
| } |
| /* 0 indicates no fixed upper limit. maxmem is the max */ |
| /* available pageable physical mem */ |
| rsm_pgcnt_max = (percent*maxmem)/100; |
| |
| if (rsm_pgcnt_max > 0) { |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsm: Available physical memory = %lu pages, " |
| "Max exportable memory = %lu pages", |
| maxmem, rsm_pgcnt_max)); |
| } |
| |
| /* |
| * Create minor number |
| */ |
| if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm: rsm_attach - Unable to get " |
| "minor number\n")); |
| return (DDI_FAILURE); |
| } |
| |
| ASSERT(rnum == RSM_DRIVER_MINOR); |
| |
| if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, |
| rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm: rsm_attach - unable to allocate " |
| "minor #\n")); |
| return (DDI_FAILURE); |
| } |
| |
| rsm_dip = devi; |
| /* |
| * Allocate the hashtables |
| */ |
| rsmhash_alloc(&rsm_export_segs, rsm_hash_size); |
| rsmhash_alloc(&rsm_import_segs, rsm_hash_size); |
| |
| importer_list.bucket = (importing_token_t **) |
| kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); |
| |
| /* |
| * Allocate a resource struct |
| */ |
| { |
| rsmresource_t *p; |
| |
| p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); |
| |
| mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); |
| |
| rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); |
| } |
| |
| /* |
| * Based on the rsm.conf property max-segments, determine the maximum |
| * number of segments that can be exported/imported. This is then used |
| * to determine the size for barrier failure pages. |
| */ |
| |
| /* First get the max number of segments from the rsm.conf file */ |
| max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, |
| DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, |
| "max-segments", 0); |
| if (max_segs == 0) { |
| /* Use default number of segments */ |
| max_segs = RSM_MAX_NUM_SEG; |
| } |
| |
| /* |
| * Based on the max number of segments allowed, determine the barrier |
| * page size. add 1 to max_segs since the barrier page itself uses |
| * a slot |
| */ |
| barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), |
| PAGESIZE); |
| |
| /* |
| * allocation of the barrier failure page |
| */ |
| bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, |
| DDI_UMEM_SLEEP, &bar_cookie); |
| |
| /* |
| * Set the barrier_offset |
| */ |
| barrier_offset = 0; |
| |
| /* |
| * Allocate a trash memory and get a cookie for it. This will be used |
| * when remapping segments during force disconnects. Allocate the |
| * trash memory with a large size which is page aligned. |
| */ |
| (void) ddi_umem_alloc((size_t)TRASHSIZE, |
| DDI_UMEM_TRASH, &remap_cookie); |
| |
| /* initialize user segment id allocation variable */ |
| rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; |
| |
| /* |
| * initialize the null_rsmpi_ops vector and the loopback adapter |
| */ |
| rsmka_init_loopback(); |
| |
| |
| ddi_report_dev(devi); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| /* |
| * The call to mod_remove in the _fine routine will cause the system |
| * to call rsm_detach |
| */ |
| /*ARGSUSED*/ |
| static int |
| rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) |
| { |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); |
| |
| switch (cmd) { |
| case DDI_DETACH: |
| break; |
| default: |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm:rsm_detach - cmd %x not supported\n", |
| cmd)); |
| return (DDI_FAILURE); |
| } |
| |
| mutex_enter(&rsm_drv_data.drv_lock); |
| while (rsm_drv_data.drv_state != RSM_DRV_OK) |
| cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); |
| rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; |
| mutex_exit(&rsm_drv_data.drv_lock); |
| |
| /* |
| * Unregister the DR callback functions |
| */ |
| if (rsm_enable_dr) { |
| #ifdef RSM_DRTEST |
| rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, |
| (void *)NULL); |
| #else |
| kphysm_setup_func_unregister(&rsm_dr_callback_vec, |
| (void *)NULL); |
| #endif |
| } |
| |
| mutex_enter(&rsm_drv_data.drv_lock); |
| ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); |
| rsm_drv_data.drv_state = RSM_DRV_NEW; |
| mutex_exit(&rsm_drv_data.drv_lock); |
| |
| ASSERT(rsm_suspend_list.list_head == NULL); |
| |
| /* |
| * Release all resources, seglist, controller, ... |
| */ |
| |
| /* remove intersend queues */ |
| /* remove registered services */ |
| |
| |
| ddi_remove_minor_node(dip, DRIVER_NAME); |
| rsm_dip = NULL; |
| |
| /* |
| * Free minor zero resource |
| */ |
| { |
| rsmresource_t *p; |
| |
| p = rsmresource_free(RSM_DRIVER_MINOR); |
| if (p) { |
| mutex_destroy(&p->rsmrc_lock); |
| kmem_free((void *)p, sizeof (*p)); |
| } |
| } |
| |
| /* |
| * Free resource table |
| */ |
| |
| rsmresource_destroy(); |
| |
| /* |
| * Free the hash tables |
| */ |
| rsmhash_free(&rsm_export_segs, rsm_hash_size); |
| rsmhash_free(&rsm_import_segs, rsm_hash_size); |
| |
| kmem_free((void *)importer_list.bucket, |
| rsm_hash_size * sizeof (importing_token_t *)); |
| importer_list.bucket = NULL; |
| |
| |
| /* free barrier page */ |
| if (bar_cookie != NULL) { |
| ddi_umem_free(bar_cookie); |
| } |
| bar_va = NULL; |
| bar_cookie = NULL; |
| |
| /* |
| * Free the memory allocated for the trash |
| */ |
| if (remap_cookie != NULL) { |
| ddi_umem_free(remap_cookie); |
| } |
| remap_cookie = NULL; |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) |
| { |
| register int error; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); |
| |
| switch (infocmd) { |
| case DDI_INFO_DEVT2DEVINFO: |
| if (rsm_dip == NULL) |
| error = DDI_FAILURE; |
| else { |
| *result = (void *)rsm_dip; |
| error = DDI_SUCCESS; |
| } |
| break; |
| case DDI_INFO_DEVT2INSTANCE: |
| *result = (void *)0; |
| error = DDI_SUCCESS; |
| break; |
| default: |
| error = DDI_FAILURE; |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); |
| return (error); |
| } |
| |
| adapter_t * |
| rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) |
| { |
| adapter_t *adapter; |
| char adapter_devname[MAXNAMELEN]; |
| int instance; |
| DBG_DEFINE(category, |
| RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); |
| |
| instance = msg->cnum; |
| |
| if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { |
| return (NULL); |
| } |
| |
| if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) |
| return (NULL); |
| |
| if (strcmp(adapter_devname, "loopback") == 0) |
| return (&loopback_adapter); |
| |
| adapter = rsmka_lookup_adapter(adapter_devname, instance); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); |
| |
| return (adapter); |
| } |
| |
| |
| /* |
| * *********************** Resource Number Management ******************** |
| * All resources are stored in a simple hash table. The table is an array |
| * of pointers to resource blks. Each blk contains: |
| * base - base number of this blk |
| * used - number of used slots in this blk. |
| * blks - array of pointers to resource items. |
| * An entry in a resource blk is empty if it's NULL. |
| * |
| * We start with no resource array. Each time we run out of slots, we |
| * reallocate a new larger array and copy the pointer to the new array and |
| * a new resource blk is allocated and added to the hash table. |
| * |
| * The resource control block contains: |
| * root - array of pointer of resource blks |
| * sz - current size of array. |
| * len - last valid entry in array. |
| * |
| * A search operation based on a resource number is as follows: |
| * index = rnum / RESOURCE_BLKSZ; |
| * ASSERT(index < resource_block.len); |
| * ASSERT(index < resource_block.sz); |
| * offset = rnum % RESOURCE_BLKSZ; |
| * ASSERT(offset >= resource_block.root[index]->base); |
| * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); |
| * return resource_block.root[index]->blks[offset]; |
| * |
| * A resource blk is freed with its used count reachs zero. |
| */ |
| static int |
| rsmresource_alloc(minor_t *rnum) |
| { |
| |
| /* search for available resource slot */ |
| int i, j, empty = -1; |
| rsmresource_blk_t *blk; |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_alloc enter\n")); |
| |
| rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); |
| |
| /* Try to find an empty slot */ |
| for (i = 0; i < rsm_resource.rsmrc_len; i++) { |
| blk = rsm_resource.rsmrc_root[i]; |
| if (blk != NULL && blk->rsmrcblk_avail > 0) { |
| /* found an empty slot in this blk */ |
| for (j = 0; j < RSMRC_BLKSZ; j++) { |
| if (blk->rsmrcblk_blks[j] == NULL) { |
| *rnum = (minor_t) |
| (j + (i * RSMRC_BLKSZ)); |
| /* |
| * obey gen page limits |
| */ |
| if (*rnum >= max_segs + 1) { |
| if (empty < 0) { |
| rw_exit(&rsm_resource. |
| rsmrc_lock); |
| DBG_PRINTF(( |
| RSM_KERNEL_ALL, |
| RSM_ERR, |
| "rsmresource" |
| "_alloc failed:" |
| "not enough res" |
| "%d\n", *rnum)); |
| return (RSMERR_INSUFFICIENT_RESOURCES); |
| } else { |
| /* use empty slot */ |
| break; |
| } |
| |
| } |
| |
| blk->rsmrcblk_blks[j] = RSMRC_RESERVED; |
| blk->rsmrcblk_avail--; |
| rw_exit(&rsm_resource.rsmrc_lock); |
| DBG_PRINTF((RSM_KERNEL_ALL, |
| RSM_DEBUG_VERBOSE, |
| "rsmresource_alloc done\n")); |
| return (RSM_SUCCESS); |
| } |
| } |
| } else if (blk == NULL && empty < 0) { |
| /* remember first empty slot */ |
| empty = i; |
| } |
| } |
| |
| /* Couldn't find anything, allocate a new blk */ |
| /* |
| * Do we need to reallocate the root array |
| */ |
| if (empty < 0) { |
| if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { |
| /* |
| * Allocate new array and copy current stuff into it |
| */ |
| rsmresource_blk_t **p; |
| uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + |
| RSMRC_BLKSZ; |
| /* |
| * Don't allocate more that max valid rnum |
| */ |
| if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= |
| max_segs + 1) { |
| rw_exit(&rsm_resource.rsmrc_lock); |
| return (RSMERR_INSUFFICIENT_RESOURCES); |
| } |
| |
| p = (rsmresource_blk_t **)kmem_zalloc( |
| newsz * sizeof (*p), |
| KM_SLEEP); |
| |
| if (rsm_resource.rsmrc_root) { |
| uint_t oldsz; |
| |
| oldsz = (uint_t)(rsm_resource.rsmrc_sz * |
| (int)sizeof (*p)); |
| |
| /* |
| * Copy old data into new space and |
| * free old stuff |
| */ |
| bcopy(rsm_resource.rsmrc_root, p, oldsz); |
| kmem_free(rsm_resource.rsmrc_root, oldsz); |
| } |
| |
| rsm_resource.rsmrc_root = p; |
| rsm_resource.rsmrc_sz = (int)newsz; |
| } |
| |
| empty = rsm_resource.rsmrc_len; |
| rsm_resource.rsmrc_len++; |
| } |
| |
| /* |
| * Allocate a new blk |
| */ |
| blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); |
| ASSERT(rsm_resource.rsmrc_root[empty] == NULL); |
| rsm_resource.rsmrc_root[empty] = blk; |
| blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; |
| |
| /* |
| * Allocate slot |
| */ |
| |
| *rnum = (minor_t)(empty * RSMRC_BLKSZ); |
| |
| /* |
| * watch out not to exceed bounds of barrier page |
| */ |
| if (*rnum >= max_segs + 1) { |
| rw_exit(&rsm_resource.rsmrc_lock); |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, |
| "rsmresource_alloc failed %d\n", *rnum)); |
| |
| return (RSMERR_INSUFFICIENT_RESOURCES); |
| } |
| blk->rsmrcblk_blks[0] = RSMRC_RESERVED; |
| |
| |
| rw_exit(&rsm_resource.rsmrc_lock); |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_alloc done\n")); |
| |
| return (RSM_SUCCESS); |
| } |
| |
| static rsmresource_t * |
| rsmresource_free(minor_t rnum) |
| { |
| |
| /* search for available resource slot */ |
| int i, j; |
| rsmresource_blk_t *blk; |
| rsmresource_t *p; |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_free enter\n")); |
| |
| i = (int)(rnum / RSMRC_BLKSZ); |
| j = (int)(rnum % RSMRC_BLKSZ); |
| |
| if (i >= rsm_resource.rsmrc_len) { |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_free done\n")); |
| return (NULL); |
| } |
| |
| rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); |
| |
| ASSERT(rsm_resource.rsmrc_root); |
| ASSERT(i < rsm_resource.rsmrc_len); |
| ASSERT(i < rsm_resource.rsmrc_sz); |
| blk = rsm_resource.rsmrc_root[i]; |
| if (blk == NULL) { |
| rw_exit(&rsm_resource.rsmrc_lock); |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_free done\n")); |
| return (NULL); |
| } |
| |
| ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ |
| |
| p = blk->rsmrcblk_blks[j]; |
| if (p == RSMRC_RESERVED) { |
| p = NULL; |
| } |
| |
| blk->rsmrcblk_blks[j] = NULL; |
| blk->rsmrcblk_avail++; |
| if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { |
| /* free this blk */ |
| kmem_free(blk, sizeof (*blk)); |
| rsm_resource.rsmrc_root[i] = NULL; |
| } |
| |
| rw_exit(&rsm_resource.rsmrc_lock); |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_free done\n")); |
| |
| return (p); |
| } |
| |
| static rsmresource_t * |
| rsmresource_lookup(minor_t rnum, int lock) |
| { |
| int i, j; |
| rsmresource_blk_t *blk; |
| rsmresource_t *p; |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_lookup enter\n")); |
| |
| /* Find resource and lock it in READER mode */ |
| /* search for available resource slot */ |
| |
| i = (int)(rnum / RSMRC_BLKSZ); |
| j = (int)(rnum % RSMRC_BLKSZ); |
| |
| if (i >= rsm_resource.rsmrc_len) { |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_lookup done\n")); |
| return (NULL); |
| } |
| |
| rw_enter(&rsm_resource.rsmrc_lock, RW_READER); |
| |
| blk = rsm_resource.rsmrc_root[i]; |
| if (blk != NULL) { |
| ASSERT(i < rsm_resource.rsmrc_len); |
| ASSERT(i < rsm_resource.rsmrc_sz); |
| |
| p = blk->rsmrcblk_blks[j]; |
| if (lock == RSM_LOCK) { |
| if (p != RSMRC_RESERVED) { |
| mutex_enter(&p->rsmrc_lock); |
| } else { |
| p = NULL; |
| } |
| } |
| } else { |
| p = NULL; |
| } |
| rw_exit(&rsm_resource.rsmrc_lock); |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_lookup done\n")); |
| |
| return (p); |
| } |
| |
| static void |
| rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) |
| { |
| /* Find resource and lock it in READER mode */ |
| /* Caller can upgrade if need be */ |
| /* search for available resource slot */ |
| int i, j; |
| rsmresource_blk_t *blk; |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_insert enter\n")); |
| |
| i = (int)(rnum / RSMRC_BLKSZ); |
| j = (int)(rnum % RSMRC_BLKSZ); |
| |
| p->rsmrc_type = type; |
| p->rsmrc_num = rnum; |
| |
| rw_enter(&rsm_resource.rsmrc_lock, RW_READER); |
| |
| ASSERT(rsm_resource.rsmrc_root); |
| ASSERT(i < rsm_resource.rsmrc_len); |
| ASSERT(i < rsm_resource.rsmrc_sz); |
| |
| blk = rsm_resource.rsmrc_root[i]; |
| ASSERT(blk); |
| |
| ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); |
| |
| blk->rsmrcblk_blks[j] = p; |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_insert done\n")); |
| |
| rw_exit(&rsm_resource.rsmrc_lock); |
| } |
| |
| static void |
| rsmresource_destroy() |
| { |
| int i, j; |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_destroy enter\n")); |
| |
| rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); |
| |
| for (i = 0; i < rsm_resource.rsmrc_len; i++) { |
| rsmresource_blk_t *blk; |
| |
| blk = rsm_resource.rsmrc_root[i]; |
| if (blk == NULL) { |
| continue; |
| } |
| for (j = 0; j < RSMRC_BLKSZ; j++) { |
| if (blk->rsmrcblk_blks[j] != NULL) { |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "Not null slot %d, %lx\n", j, |
| (size_t)blk->rsmrcblk_blks[j])); |
| } |
| } |
| kmem_free(blk, sizeof (*blk)); |
| rsm_resource.rsmrc_root[i] = NULL; |
| } |
| if (rsm_resource.rsmrc_root) { |
| i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); |
| kmem_free(rsm_resource.rsmrc_root, (uint_t)i); |
| rsm_resource.rsmrc_root = NULL; |
| rsm_resource.rsmrc_len = 0; |
| rsm_resource.rsmrc_sz = 0; |
| } |
| |
| DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, |
| "rsmresource_destroy done\n")); |
| |
| rw_exit(&rsm_resource.rsmrc_lock); |
| } |
| |
| |
| /* ******************** Generic Key Hash Table Management ********* */ |
| static rsmresource_t * |
| rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, |
| rsm_resource_state_t state) |
| { |
| rsmresource_t *p; |
| uint_t hashval; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); |
| |
| hashval = rsmhash(key); |
| |
| DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", |
| key, hashval)); |
| |
| rw_enter(&rhash->rsmhash_rw, RW_READER); |
| |
| p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); |
| |
| for (; p; p = p->rsmrc_next) { |
| if (p->rsmrc_key == key) { |
| /* acquire resource lock */ |
| RSMRC_LOCK(p); |
| break; |
| } |
| } |
| |
| rw_exit(&rhash->rsmhash_rw); |
| |
| if (p != NULL && p->rsmrc_state != state) { |
| /* state changed, release lock and return null */ |
| RSMRC_UNLOCK(p); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmhash_lookup done: state changed\n")); |
| return (NULL); |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); |
| |
| return (p); |
| } |
| |
| static void |
| rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) |
| { |
| rsmresource_t *p, **back; |
| uint_t hashval; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); |
| |
| hashval = rsmhash(rcelm->rsmrc_key); |
| |
| DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", |
| rcelm->rsmrc_key, hashval)); |
| |
| /* |
| * It's ok not to find the segment. |
| */ |
| rw_enter(&rhash->rsmhash_rw, RW_WRITER); |
| |
| back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); |
| |
| for (; (p = *back) != NULL; back = &p->rsmrc_next) { |
| if (p == rcelm) { |
| *back = rcelm->rsmrc_next; |
| break; |
| } |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); |
| |
| rw_exit(&rhash->rsmhash_rw); |
| } |
| |
| static int |
| rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, |
| int dup_check, rsm_resource_state_t state) |
| { |
| rsmresource_t *p = NULL, **bktp; |
| uint_t hashval; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); |
| |
| /* lock table */ |
| rw_enter(&rhash->rsmhash_rw, RW_WRITER); |
| |
| /* |
| * If the current resource state is other than the state passed in |
| * then the resource is (probably) already on the list. eg. for an |
| * import segment if the state is not RSM_STATE_NEW then it's on the |
| * list already. |
| */ |
| RSMRC_LOCK(new); |
| if (new->rsmrc_state != state) { |
| RSMRC_UNLOCK(new); |
| rw_exit(&rhash->rsmhash_rw); |
| return (RSMERR_BAD_SEG_HNDL); |
| } |
| |
| hashval = rsmhash(key); |
| DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); |
| |
| if (dup_check) { |
| /* |
| * Used for checking export segments; don't want to have |
| * the same key used for multiple segments. |
| */ |
| |
| p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); |
| |
| for (; p; p = p->rsmrc_next) { |
| if (p->rsmrc_key == key) { |
| RSMRC_UNLOCK(new); |
| break; |
| } |
| } |
| } |
| |
| if (p == NULL) { |
| /* Key doesn't exist, add it */ |
| |
| bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); |
| |
| new->rsmrc_key = key; |
| new->rsmrc_next = *bktp; |
| *bktp = new; |
| } |
| |
| rw_exit(&rhash->rsmhash_rw); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); |
| |
| return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); |
| } |
| |
| /* |
| * XOR each byte of the key. |
| */ |
| static uint_t |
| rsmhash(rsm_memseg_id_t key) |
| { |
| uint_t hash = key; |
| |
| hash ^= (key >> 8); |
| hash ^= (key >> 16); |
| hash ^= (key >> 24); |
| |
| return (hash % rsm_hash_size); |
| |
| } |
| |
| /* |
| * generic function to get a specific bucket |
| */ |
| static void * |
| rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) |
| { |
| |
| if (rhash->bucket == NULL) |
| return (NULL); |
| else |
| return ((void *)rhash->bucket[hashval]); |
| } |
| |
| /* |
| * generic function to get a specific bucket's address |
| */ |
| static void ** |
| rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) |
| { |
| if (rhash->bucket == NULL) |
| return (NULL); |
| else |
| return ((void **)&(rhash->bucket[hashval])); |
| } |
| |
| /* |
| * generic function to alloc a hash table |
| */ |
| static void |
| rsmhash_alloc(rsmhash_table_t *rhash, int size) |
| { |
| rhash->bucket = (rsmresource_t **) |
| kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); |
| } |
| |
| /* |
| * generic function to free a hash table |
| */ |
| static void |
| rsmhash_free(rsmhash_table_t *rhash, int size) |
| { |
| |
| kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); |
| rhash->bucket = NULL; |
| |
| } |
| /* *********************** Exported Segment Key Management ************ */ |
| |
| #define rsmexport_add(new, key) \ |
| rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ |
| RSM_STATE_BIND) |
| |
| #define rsmexport_rm(arg) \ |
| rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) |
| |
| #define rsmexport_lookup(key) \ |
| (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) |
| |
| /* ************************** Import Segment List Management ********** */ |
| |
| /* |
| * Add segment to import list. This will be useful for paging and loopback |
| * segment unloading. |
| */ |
| #define rsmimport_add(arg, key) \ |
| rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ |
| RSM_STATE_NEW) |
| |
| #define rsmimport_rm(arg) \ |
| rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) |
| |
| /* |
| * #define rsmimport_lookup(key) \ |
| * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) |
| */ |
| |
| /* |
| * increase the ref count and make the import segment point to the |
| * shared data structure. Return a pointer to the share data struct |
| * and the shared data struct is locked upon return |
| */ |
| static rsm_import_share_t * |
| rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, |
| rsmseg_t *segp) |
| { |
| uint_t hash; |
| rsmresource_t *p; |
| rsm_import_share_t *shdatap; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); |
| |
| hash = rsmhash(key); |
| /* lock table */ |
| rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); |
| DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", |
| key, hash)); |
| |
| p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); |
| |
| for (; p; p = p->rsmrc_next) { |
| /* |
| * Look for an entry that is importing the same exporter |
| * with the share data structure allocated. |
| */ |
| if ((p->rsmrc_key == key) && |
| (p->rsmrc_node == node) && |
| (p->rsmrc_adapter == adapter) && |
| (((rsmseg_t *)p)->s_share != NULL)) { |
| shdatap = ((rsmseg_t *)p)->s_share; |
| break; |
| } |
| } |
| |
| if (p == NULL) { |
| /* we are the first importer, create the shared data struct */ |
| shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); |
| shdatap->rsmsi_state = RSMSI_STATE_NEW; |
| shdatap->rsmsi_segid = key; |
| shdatap->rsmsi_node = node; |
| mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); |
| cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); |
| } |
| |
| rsmseglock_acquire(segp); |
| |
| /* we grab the shared lock before returning from this function */ |
| mutex_enter(&shdatap->rsmsi_lock); |
| |
| shdatap->rsmsi_refcnt++; |
| segp->s_share = shdatap; |
| |
| rsmseglock_release(segp); |
| |
| rw_exit(&rsm_import_segs.rsmhash_rw); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); |
| |
| return (shdatap); |
| } |
| |
| /* |
| * the shared data structure should be locked before calling |
| * rsmsharecv_signal(). |
| * Change the state and signal any waiting segments. |
| */ |
| void |
| rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) |
| { |
| ASSERT(rsmsharelock_held(seg)); |
| |
| if (seg->s_share->rsmsi_state == oldstate) { |
| seg->s_share->rsmsi_state = newstate; |
| cv_broadcast(&seg->s_share->rsmsi_cv); |
| } |
| } |
| |
| /* |
| * Add to the hash table |
| */ |
| static void |
| importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, |
| void *cookie) |
| { |
| |
| importing_token_t *head; |
| importing_token_t *new_token; |
| int index; |
| |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); |
| |
| new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); |
| new_token->importing_node = node; |
| new_token->key = key; |
| new_token->import_segment_cookie = cookie; |
| new_token->importing_adapter_hwaddr = hwaddr; |
| |
| index = rsmhash(key); |
| |
| mutex_enter(&importer_list.lock); |
| |
| head = importer_list.bucket[index]; |
| importer_list.bucket[index] = new_token; |
| new_token->next = head; |
| mutex_exit(&importer_list.lock); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); |
| } |
| |
| static void |
| importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) |
| { |
| |
| importing_token_t *prev, *token = NULL; |
| int index; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); |
| |
| index = rsmhash(key); |
| |
| mutex_enter(&importer_list.lock); |
| |
| token = importer_list.bucket[index]; |
| |
| prev = token; |
| while (token != NULL) { |
| if (token->importing_node == node && |
| token->import_segment_cookie == cookie) { |
| if (prev == token) |
| importer_list.bucket[index] = token->next; |
| else |
| prev->next = token->next; |
| kmem_free((void *)token, sizeof (*token)); |
| break; |
| } else { |
| prev = token; |
| token = token->next; |
| } |
| } |
| |
| mutex_exit(&importer_list.lock); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); |
| |
| |
| } |
| |
| /* **************************Segment Structure Management ************* */ |
| |
| /* |
| * Free segment structure |
| */ |
| static void |
| rsmseg_free(rsmseg_t *seg) |
| { |
| |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); |
| |
| /* need to take seglock here to avoid race with rsmmap_unmap() */ |
| rsmseglock_acquire(seg); |
| if (seg->s_ckl != NULL) { |
| /* Segment is still busy */ |
| seg->s_state = RSM_STATE_END; |
| rsmseglock_release(seg); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmseg_free done\n")); |
| return; |
| } |
| |
| rsmseglock_release(seg); |
| |
| ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); |
| |
| /* |
| * If it's an importer decrement the refcount |
| * and if its down to zero free the shared data structure. |
| * This is where failures during rsm_connect() are unrefcounted |
| */ |
| if (seg->s_share != NULL) { |
| |
| ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); |
| |
| rsmsharelock_acquire(seg); |
| |
| ASSERT(seg->s_share->rsmsi_refcnt > 0); |
| |
| seg->s_share->rsmsi_refcnt--; |
| |
| if (seg->s_share->rsmsi_refcnt == 0) { |
| rsmsharelock_release(seg); |
| mutex_destroy(&seg->s_share->rsmsi_lock); |
| cv_destroy(&seg->s_share->rsmsi_cv); |
| kmem_free((void *)(seg->s_share), |
| sizeof (rsm_import_share_t)); |
| } else { |
| rsmsharelock_release(seg); |
| } |
| /* |
| * The following needs to be done after any |
| * rsmsharelock calls which use seg->s_share. |
| */ |
| seg->s_share = NULL; |
| } |
| |
| cv_destroy(&seg->s_cv); |
| mutex_destroy(&seg->s_lock); |
| rsmacl_free(seg->s_acl, seg->s_acl_len); |
| rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); |
| if (seg->s_adapter) |
| rsmka_release_adapter(seg->s_adapter); |
| |
| kmem_free((void *)seg, sizeof (*seg)); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); |
| |
| } |
| |
| |
| static rsmseg_t * |
| rsmseg_alloc(minor_t num, struct cred *cred) |
| { |
| rsmseg_t *new; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); |
| /* |
| * allocate memory for new segment. This should be a segkmem cache. |
| */ |
| new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); |
| |
| new->s_state = RSM_STATE_NEW; |
| new->s_minor = num; |
| new->s_acl_len = 0; |
| new->s_cookie = NULL; |
| new->s_adapter = NULL; |
| |
| new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; |
| /* we don't have a key yet, will set at export/connect */ |
| new->s_uid = crgetuid(cred); |
| new->s_gid = crgetgid(cred); |
| |
| mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); |
| cv_init(&new->s_cv, NULL, CV_DRIVER, 0); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); |
| |
| return (new); |
| } |
| |
| /* ******************************** Driver Open/Close/Poll *************** */ |
| |
| /*ARGSUSED1*/ |
| static int |
| rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) |
| { |
| minor_t rnum; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); |
| /* |
| * Char only |
| */ |
| if (otyp != OTYP_CHR) { |
| DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); |
| return (EINVAL); |
| } |
| |
| /* |
| * Only zero can be opened, clones are used for resources. |
| */ |
| if (getminor(*devp) != RSM_DRIVER_MINOR) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_open: bad minor %d\n", getminor(*devp))); |
| return (ENODEV); |
| } |
| |
| if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { |
| DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); |
| return (EPERM); |
| } |
| |
| if (!(flag & FWRITE)) { |
| /* |
| * The library function _rsm_librsm_init calls open for |
| * /dev/rsm with flag set to O_RDONLY. We want a valid |
| * file descriptor to be returned for minor device zero. |
| */ |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsm_open RDONLY done\n")); |
| return (DDI_SUCCESS); |
| } |
| |
| /* |
| * - allocate new minor number and segment. |
| * - add segment to list of all segments. |
| * - set minordev data to segment |
| * - update devp argument to new device |
| * - update s_cred to cred; make sure you do crhold(cred); |
| */ |
| |
| /* allocate a new resource number */ |
| if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { |
| /* |
| * We will bind this minor to a specific resource in first |
| * ioctl |
| */ |
| *devp = makedevice(getmajor(*devp), rnum); |
| } else { |
| return (EAGAIN); |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); |
| return (DDI_SUCCESS); |
| } |
| |
| static void |
| rsmseg_close(rsmseg_t *seg, int force_flag) |
| { |
| int e = RSM_SUCCESS; |
| |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); |
| |
| rsmseglock_acquire(seg); |
| if (!force_flag && (seg->s_hdr.rsmrc_type == |
| RSM_RESOURCE_EXPORT_SEGMENT)) { |
| /* |
| * If we are processing rsm_close wait for force_destroy |
| * processing to complete since force_destroy processing |
| * needs to finish first before we can free the segment. |
| * force_destroy is only for export segments |
| */ |
| while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { |
| cv_wait(&seg->s_cv, &seg->s_lock); |
| } |
| } |
| rsmseglock_release(seg); |
| |
| /* It's ok to read the state without a lock */ |
| switch (seg->s_state) { |
| case RSM_STATE_EXPORT: |
| case RSM_STATE_EXPORT_QUIESCING: |
| case RSM_STATE_EXPORT_QUIESCED: |
| e = rsm_unpublish(seg, 1); |
| /* FALLTHRU */ |
| case RSM_STATE_BIND_QUIESCED: |
| /* FALLTHRU */ |
| case RSM_STATE_BIND: |
| e = rsm_unbind(seg); |
| if (e != RSM_SUCCESS && force_flag == 1) |
| return; |
| ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); |
| /* FALLTHRU */ |
| case RSM_STATE_NEW_QUIESCED: |
| rsmseglock_acquire(seg); |
| seg->s_state = RSM_STATE_NEW; |
| cv_broadcast(&seg->s_cv); |
| rsmseglock_release(seg); |
| break; |
| case RSM_STATE_NEW: |
| break; |
| case RSM_STATE_ZOMBIE: |
| /* |
| * Segments in this state have been removed off the |
| * exported segments list and have been unpublished |
| * and unbind. These segments have been removed during |
| * a callback to the rsm_export_force_destroy, which |
| * is called for the purpose of unlocking these |
| * exported memory segments when a process exits but |
| * leaves the segments locked down since rsm_close is |
| * is not called for the segments. This can happen |
| * when a process calls fork or exec and then exits. |
| * Once the segments are in the ZOMBIE state, all that |
| * remains is to destroy them when rsm_close is called. |
| * This is done here. Thus, for such segments the |
| * the state is changed to new so that later in this |
| * function rsmseg_free is called. |
| */ |
| rsmseglock_acquire(seg); |
| seg->s_state = RSM_STATE_NEW; |
| rsmseglock_release(seg); |
| break; |
| case RSM_STATE_MAP_QUIESCE: |
| case RSM_STATE_ACTIVE: |
| /* Disconnect will handle the unmap */ |
| case RSM_STATE_CONN_QUIESCE: |
| case RSM_STATE_CONNECT: |
| case RSM_STATE_DISCONNECT: |
| ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); |
| (void) rsm_disconnect(seg); |
| break; |
| case RSM_STATE_MAPPING: |
| /*FALLTHRU*/ |
| case RSM_STATE_END: |
| DBG_PRINTF((category, RSM_ERR, |
| "Invalid segment state %d in rsm_close\n", seg->s_state)); |
| break; |
| default: |
| DBG_PRINTF((category, RSM_ERR, |
| "Invalid segment state %d in rsm_close\n", seg->s_state)); |
| break; |
| } |
| |
| /* |
| * check state. |
| * - make sure you do crfree(s_cred); |
| * release segment and minor number |
| */ |
| ASSERT(seg->s_state == RSM_STATE_NEW); |
| |
| /* |
| * The export_force_destroy callback is created to unlock |
| * the exported segments of a process |
| * when the process does a fork or exec and then exits calls this |
| * function with the force flag set to 1 which indicates that the |
| * segment state must be converted to ZOMBIE. This state means that the |
| * segments still exist and have been unlocked and most importantly the |
| * only operation allowed is to destroy them on an rsm_close. |
| */ |
| if (force_flag) { |
| rsmseglock_acquire(seg); |
| seg->s_state = RSM_STATE_ZOMBIE; |
| rsmseglock_release(seg); |
| } else { |
| rsmseg_free(seg); |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); |
| } |
| |
| static int |
| rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) |
| { |
| minor_t rnum = getminor(dev); |
| rsmresource_t *res; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); |
| |
| flag = flag; cred = cred; |
| |
| if (otyp != OTYP_CHR) |
| return (EINVAL); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); |
| |
| /* |
| * At this point we are the last reference to the resource. |
| * Free resource number from resource table. |
| * It's ok to remove number before we free the segment. |
| * We need to lock the resource to protect against remote calls. |
| */ |
| if (rnum == RSM_DRIVER_MINOR || |
| (res = rsmresource_free(rnum)) == NULL) { |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); |
| return (DDI_SUCCESS); |
| } |
| |
| switch (res->rsmrc_type) { |
| case RSM_RESOURCE_EXPORT_SEGMENT: |
| case RSM_RESOURCE_IMPORT_SEGMENT: |
| rsmseg_close((rsmseg_t *)res, 0); |
| break; |
| case RSM_RESOURCE_BAR: |
| DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); |
| break; |
| default: |
| break; |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| /* |
| * rsm_inc_pgcnt |
| * |
| * Description: increment rsm page counter. |
| * |
| * Parameters: pgcnt_t pnum; number of pages to be used |
| * |
| * Returns: RSM_SUCCESS if memory limit not exceeded |
| * ENOSPC if memory limit exceeded. In this case, the |
| * page counter remains unchanged. |
| * |
| */ |
| static int |
| rsm_inc_pgcnt(pgcnt_t pnum) |
| { |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ |
| return (RSM_SUCCESS); |
| } |
| |
| mutex_enter(&rsm_pgcnt_lock); |
| |
| if (rsm_pgcnt + pnum > rsm_pgcnt_max) { |
| /* ensure that limits have not been exceeded */ |
| mutex_exit(&rsm_pgcnt_lock); |
| return (RSMERR_INSUFFICIENT_MEM); |
| } |
| |
| rsm_pgcnt += pnum; |
| DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", |
| rsm_pgcnt)); |
| mutex_exit(&rsm_pgcnt_lock); |
| |
| return (RSM_SUCCESS); |
| } |
| |
| /* |
| * rsm_dec_pgcnt |
| * |
| * Description: decrement rsm page counter. |
| * |
| * Parameters: pgcnt_t pnum; number of pages freed |
| * |
| */ |
| static void |
| rsm_dec_pgcnt(pgcnt_t pnum) |
| { |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ |
| return; |
| } |
| |
| mutex_enter(&rsm_pgcnt_lock); |
| ASSERT(rsm_pgcnt >= pnum); |
| rsm_pgcnt -= pnum; |
| DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", |
| rsm_pgcnt)); |
| mutex_exit(&rsm_pgcnt_lock); |
| } |
| |
| static struct umem_callback_ops rsm_as_ops = { |
| UMEM_CALLBACK_VERSION, /* version number */ |
| rsm_export_force_destroy, |
| }; |
| |
| static int |
| rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, |
| proc_t *procp) |
| { |
| int error = RSM_SUCCESS; |
| ulong_t pnum; |
| struct umem_callback_ops *callbackops = &rsm_as_ops; |
| |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); |
| |
| /* |
| * Make sure vaddr and len are aligned on a page boundary |
| */ |
| if ((uintptr_t)vaddr & (PAGESIZE - 1)) { |
| return (RSMERR_BAD_ADDR); |
| } |
| |
| if (len & (PAGESIZE - 1)) { |
| return (RSMERR_BAD_LENGTH); |
| } |
| |
| /* |
| * Find number of pages |
| */ |
| pnum = btopr(len); |
| error = rsm_inc_pgcnt(pnum); |
| if (error != RSM_SUCCESS) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_bind_pages:mem limit exceeded\n")); |
| return (RSMERR_INSUFFICIENT_MEM); |
| } |
| |
| error = umem_lockmemory(vaddr, len, |
| DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, |
| cookie, |
| callbackops, procp); |
| |
| if (error) { |
| rsm_dec_pgcnt(pnum); |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_bind_pages:ddi_umem_lock failed\n")); |
| /* |
| * ddi_umem_lock, in the case of failure, returns one of |
| * the following three errors. These are translated into |
| * the RSMERR namespace and returned. |
| */ |
| if (error == EFAULT) |
| return (RSMERR_BAD_ADDR); |
| else if (error == EACCES) |
| return (RSMERR_PERM_DENIED); |
| else |
| return (RSMERR_INSUFFICIENT_MEM); |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); |
| |
| return (error); |
| |
| } |
| |
| static int |
| rsm_unbind_pages(rsmseg_t *seg) |
| { |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); |
| |
| ASSERT(rsmseglock_held(seg)); |
| |
| if (seg->s_cookie != NULL) { |
| /* unlock address range */ |
| ddi_umem_unlock(seg->s_cookie); |
| rsm_dec_pgcnt(btopr(seg->s_len)); |
| seg->s_cookie = NULL; |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); |
| |
| return (RSM_SUCCESS); |
| } |
| |
| |
| static int |
| rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) |
| { |
| int e; |
| adapter_t *adapter; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); |
| |
| adapter = rsm_getadapter(msg, mode); |
| if (adapter == NULL) { |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsm_bind done:no adapter\n")); |
| return (RSMERR_CTLR_NOT_PRESENT); |
| } |
| |
| /* lock address range */ |
| if (msg->vaddr == NULL) { |
| rsmka_release_adapter(adapter); |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm: rsm_bind done: invalid vaddr\n")); |
| return (RSMERR_BAD_ADDR); |
| } |
| if (msg->len <= 0) { |
| rsmka_release_adapter(adapter); |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_bind: invalid length\n")); |
| return (RSMERR_BAD_LENGTH); |
| } |
| |
| /* Lock segment */ |
| rsmseglock_acquire(seg); |
| |
| while (seg->s_state == RSM_STATE_NEW_QUIESCED) { |
| if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { |
| DBG_PRINTF((category, RSM_DEBUG, |
| "rsm_bind done: cv_wait INTERRUPTED")); |
| rsmka_release_adapter(adapter); |
| rsmseglock_release(seg); |
| return (RSMERR_INTERRUPTED); |
| } |
| } |
| |
| ASSERT(seg->s_state == RSM_STATE_NEW); |
| |
| ASSERT(seg->s_cookie == NULL); |
| |
| e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); |
| if (e == RSM_SUCCESS) { |
| seg->s_flags |= RSM_USER_MEMORY; |
| if (msg->perm & RSM_ALLOW_REBIND) { |
| seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; |
| } |
| if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { |
| seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; |
| } |
| seg->s_region.r_vaddr = msg->vaddr; |
| /* |
| * Set the s_pid value in the segment structure. This is used |
| * to identify exported segments belonging to a particular |
| * process so that when the process exits, these segments can |
| * be unlocked forcefully even if rsm_close is not called on |
| * process exit since there maybe other processes referencing |
| * them (for example on a fork or exec). |
| * The s_pid value is also used to authenticate the process |
| * doing a publish or unpublish on the export segment. Only |
| * the creator of the export segment has a right to do a |
| * publish or unpublish and unbind on the segment. |
| */ |
| seg->s_pid = ddi_get_pid(); |
| seg->s_len = msg->len; |
| seg->s_state = RSM_STATE_BIND; |
| seg->s_adapter = adapter; |
| seg->s_proc = curproc; |
| } else { |
| rsmka_release_adapter(adapter); |
| DBG_PRINTF((category, RSM_WARNING, |
| "unable to lock down pages\n")); |
| } |
| |
| msg->rnum = seg->s_minor; |
| /* Unlock segment */ |
| rsmseglock_release(seg); |
| |
| if (e == RSM_SUCCESS) { |
| /* copyout the resource number */ |
| #ifdef _MULTI_DATAMODEL |
| if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { |
| rsm_ioctlmsg32_t msg32; |
| |
| msg32.rnum = msg->rnum; |
| if (ddi_copyout((caddr_t)&msg32.rnum, |
| (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, |
| sizeof (minor_t), mode)) { |
| rsmka_release_adapter(adapter); |
| e = RSMERR_BAD_ADDR; |
| } |
| } |
| #endif |
| if (ddi_copyout((caddr_t)&msg->rnum, |
| (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, |
| sizeof (minor_t), mode)) { |
| rsmka_release_adapter(adapter); |
| e = RSMERR_BAD_ADDR; |
| } |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); |
| |
| return (e); |
| } |
| |
| static void |
| rsm_remap_local_importers(rsm_node_id_t src_nodeid, |
| rsm_memseg_id_t ex_segid, ddi_umem_cookie_t cookie) |
| { |
| rsmresource_t *p = NULL; |
| rsmhash_table_t *rhash = &rsm_import_segs; |
| uint_t index; |
| |
| DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, |
| "rsm_remap_local_importers enter\n")); |
| |
| index = rsmhash(ex_segid); |
| |
| rw_enter(&rhash->rsmhash_rw, RW_READER); |
| |
| p = rsmhash_getbkt(rhash, index); |
| |
| for (; p; p = p->rsmrc_next) { |
| rsmseg_t *seg = (rsmseg_t *)p; |
| rsmseglock_acquire(seg); |
| /* |
| * Change the s_cookie value of only the local importers |
| * which have been mapped (in state RSM_STATE_ACTIVE). |
| * Note that there is no need to change the s_cookie value |
| * if the imported segment is in RSM_STATE_MAPPING since |
| * eventually the s_cookie will be updated via the mapping |
| * functionality. |
| */ |
| if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && |
| (seg->s_state == RSM_STATE_ACTIVE)) { |
| seg->s_cookie = cookie; |
| } |
| rsmseglock_release(seg); |
| } |
| rw_exit(&rhash->rsmhash_rw); |
| |
| DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, |
| "rsm_remap_local_importers done\n")); |
| } |
| |
| static int |
| rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) |
| { |
| int e; |
| adapter_t *adapter; |
| ddi_umem_cookie_t cookie; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); |
| |
| /* Check for permissions to rebind */ |
| if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { |
| return (RSMERR_REBIND_NOT_ALLOWED); |
| } |
| |
| if (seg->s_pid != ddi_get_pid() && |
| ddi_get_pid() != 0) { |
| DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); |
| return (RSMERR_NOT_CREATOR); |
| } |
| |
| /* |
| * We will not be allowing partial rebind and hence length passed |
| * in must be same as segment length |
| */ |
| if (msg->vaddr == NULL) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_rebind done: null msg->vaddr\n")); |
| return (RSMERR_BAD_ADDR); |
| } |
| if (msg->len != seg->s_len) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_rebind: invalid length\n")); |
| return (RSMERR_BAD_LENGTH); |
| } |
| |
| /* Lock segment */ |
| rsmseglock_acquire(seg); |
| |
| while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || |
| (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || |
| (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { |
| if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { |
| rsmseglock_release(seg); |
| DBG_PRINTF((category, RSM_DEBUG, |
| "rsm_rebind done: cv_wait INTERRUPTED")); |
| return (RSMERR_INTERRUPTED); |
| } |
| } |
| |
| /* verify segment state */ |
| if ((seg->s_state != RSM_STATE_BIND) && |
| (seg->s_state != RSM_STATE_EXPORT)) { |
| /* Unlock segment */ |
| rsmseglock_release(seg); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsm_rebind done: invalid state\n")); |
| return (RSMERR_BAD_SEG_HNDL); |
| } |
| |
| ASSERT(seg->s_cookie != NULL); |
| |
| if (msg->vaddr == seg->s_region.r_vaddr) { |
| rsmseglock_release(seg); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); |
| return (RSM_SUCCESS); |
| } |
| |
| e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); |
| if (e == RSM_SUCCESS) { |
| struct buf *xbuf; |
| dev_t sdev = 0; |
| rsm_memory_local_t mem; |
| |
| xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, |
| sdev, 0, NULL, DDI_UMEM_SLEEP); |
| ASSERT(xbuf != NULL); |
| |
| mem.ms_type = RSM_MEM_BUF; |
| mem.ms_bp = xbuf; |
| |
| adapter = seg->s_adapter; |
| e = adapter->rsmpi_ops->rsm_rebind( |
| seg->s_handle.out, 0, &mem, |
| RSM_RESOURCE_DONTWAIT, NULL); |
| |
| if (e == RSM_SUCCESS) { |
| /* |
| * unbind the older pages, and unload local importers; |
| * but don't disconnect importers |
| */ |
| (void) rsm_unbind_pages(seg); |
| seg->s_cookie = cookie; |
| seg->s_region.r_vaddr = msg->vaddr; |
| rsm_remap_local_importers(my_nodeid, seg->s_segid, |
| cookie); |
| } else { |
| /* |
| * Unbind the pages associated with "cookie" by the |
| * rsm_bind_pages calls prior to this. This is |
| * similar to what is done in the rsm_unbind_pages |
| * routine for the seg->s_cookie. |
| */ |
| ddi_umem_unlock(cookie); |
| rsm_dec_pgcnt(btopr(msg->len)); |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_rebind failed with %d\n", e)); |
| } |
| /* |
| * At present there is no dependency on the existence of xbuf. |
| * So we can free it here. If in the future this changes, it can |
| * be freed sometime during the segment destroy. |
| */ |
| freerbuf(xbuf); |
| } |
| |
| /* Unlock segment */ |
| rsmseglock_release(seg); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); |
| |
| return (e); |
| } |
| |
| static int |
| rsm_unbind(rsmseg_t *seg) |
| { |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); |
| |
| rsmseglock_acquire(seg); |
| |
| /* verify segment state */ |
| if ((seg->s_state != RSM_STATE_BIND) && |
| (seg->s_state != RSM_STATE_BIND_QUIESCED)) { |
| rsmseglock_release(seg); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsm_unbind: invalid state\n")); |
| return (RSMERR_BAD_SEG_HNDL); |
| } |
| |
| /* unlock current range */ |
| (void) rsm_unbind_pages(seg); |
| |
| if (seg->s_state == RSM_STATE_BIND) { |
| seg->s_state = RSM_STATE_NEW; |
| } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { |
| seg->s_state = RSM_STATE_NEW_QUIESCED; |
| } |
| |
| rsmseglock_release(seg); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); |
| |
| return (RSM_SUCCESS); |
| } |
| |
| /* **************************** Exporter Access List Management ******* */ |
| static void |
| rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) |
| { |
| int acl_sz; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); |
| |
| /* acl could be NULL */ |
| |
| if (acl != NULL && acl_len > 0) { |
| acl_sz = acl_len * sizeof (rsmapi_access_entry_t); |
| kmem_free((void *)acl, acl_sz); |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); |
| } |
| |
| static void |
| rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) |
| { |
| int acl_sz; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); |
| |
| if (acl != NULL && acl_len > 0) { |
| acl_sz = acl_len * sizeof (rsm_access_entry_t); |
| kmem_free((void *)acl, acl_sz); |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); |
| |
| } |
| |
| static int |
| rsmacl_build(rsm_ioctlmsg_t *msg, int mode, |
| rsmapi_access_entry_t **list, int *len, int loopback) |
| { |
| rsmapi_access_entry_t *acl; |
| int acl_len; |
| int i; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); |
| |
| *len = 0; |
| *list = NULL; |
| |
| acl_len = msg->acl_len; |
| if ((loopback && acl_len > 1) || (acl_len < 0) || |
| (acl_len > MAX_NODES)) { |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmacl_build done: acl invalid\n")); |
| return (RSMERR_BAD_ACL); |
| } |
| |
| if (acl_len > 0 && acl_len <= MAX_NODES) { |
| size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); |
| |
| acl = kmem_alloc(acl_size, KM_SLEEP); |
| |
| if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, |
| acl_size, mode)) { |
| kmem_free((void *) acl, acl_size); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmacl_build done: BAD_ADDR\n")); |
| return (RSMERR_BAD_ADDR); |
| } |
| |
| /* |
| * Verify access list |
| */ |
| for (i = 0; i < acl_len; i++) { |
| if (acl[i].ae_node > MAX_NODES || |
| (loopback && (acl[i].ae_node != my_nodeid)) || |
| acl[i].ae_permission > RSM_ACCESS_TRUSTED) { |
| /* invalid entry */ |
| kmem_free((void *) acl, acl_size); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmacl_build done: EINVAL\n")); |
| return (RSMERR_BAD_ACL); |
| } |
| } |
| |
| *len = acl_len; |
| *list = acl; |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| static int |
| rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, |
| int acl_len, adapter_t *adapter) |
| { |
| rsm_access_entry_t *acl; |
| rsm_addr_t hwaddr; |
| int i; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); |
| |
| if (src != NULL) { |
| size_t acl_size = acl_len * sizeof (rsm_access_entry_t); |
| acl = kmem_alloc(acl_size, KM_SLEEP); |
| |
| /* |
| * translate access list |
| */ |
| for (i = 0; i < acl_len; i++) { |
| if (src[i].ae_node == my_nodeid) { |
| acl[i].ae_addr = adapter->hwaddr; |
| } else { |
| hwaddr = get_remote_hwaddr(adapter, |
| src[i].ae_node); |
| if ((int64_t)hwaddr < 0) { |
| /* invalid hwaddr */ |
| kmem_free((void *) acl, acl_size); |
| DBG_PRINTF((category, |
| RSM_DEBUG_VERBOSE, |
| "rsmpiacl_create done:" |
| "EINVAL hwaddr\n")); |
| return (RSMERR_INTERNAL_ERROR); |
| } |
| acl[i].ae_addr = hwaddr; |
| } |
| /* rsmpi understands only RSM_PERM_XXXX */ |
| acl[i].ae_permission = |
| src[i].ae_permission & RSM_PERM_RDWR; |
| } |
| *dest = acl; |
| } else { |
| *dest = NULL; |
| } |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); |
| |
| return (RSM_SUCCESS); |
| } |
| |
| static int |
| rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, |
| rsmipc_reply_t *reply) |
| { |
| |
| int i; |
| rsmseg_t *seg; |
| rsm_memseg_id_t key = req->rsmipc_key; |
| rsm_permission_t perm = req->rsmipc_perm; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmsegacl_validate enter\n")); |
| |
| /* |
| * Find segment and grab its lock. The reason why we grab the segment |
| * lock in side the search is to avoid the race when the segment is |
| * being deleted and we already have a pointer to it. |
| */ |
| seg = rsmexport_lookup(key); |
| if (!seg) { |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmsegacl_validate done: %u ENXIO\n", key)); |
| return (RSMERR_SEG_NOT_PUBLISHED); |
| } |
| |
| ASSERT(rsmseglock_held(seg)); |
| ASSERT(seg->s_state == RSM_STATE_EXPORT); |
| |
| /* |
| * We implement a 2-level protection scheme. |
| * First, we check if local/remote host has access rights. |
| * Second, we check if the user has access rights. |
| * |
| * This routine only validates the rnode access_list |
| */ |
| if (seg->s_acl_len > 0) { |
| /* |
| * Check host access list |
| */ |
| ASSERT(seg->s_acl != NULL); |
| for (i = 0; i < seg->s_acl_len; i++) { |
| if (seg->s_acl[i].ae_node == rnode) { |
| perm &= seg->s_acl[i].ae_permission; |
| goto found; |
| } |
| } |
| /* rnode is not found in the list */ |
| rsmseglock_release(seg); |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, |
| "rsmsegacl_validate done: EPERM\n")); |
| return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); |
| } else { |
| /* use default owner creation umask */ |
| perm &= seg->s_mode; |
| } |
| |
| found: |
| /* update perm for this node */ |
| reply->rsmipc_mode = perm; |
| reply->rsmipc_uid = seg->s_uid; |
| reply->rsmipc_gid = seg->s_gid; |
| reply->rsmipc_segid = seg->s_segid; |
| reply->rsmipc_seglen = seg->s_len; |
| |
| /* |
| * Perm of requesting node is valid; source will validate user |
| */ |
| rsmseglock_release(seg); |
| |
| /* |
| * Add the importer to the list right away, if connect fails |
| * the importer will ask the exporter to remove it. |
| */ |
| importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, |
| req->rsmipc_segment_cookie); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); |
| |
| return (RSM_SUCCESS); |
| } |
| |
| |
| /* ************************** Exporter Calls ************************* */ |
| |
| static int |
| rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) |
| { |
| int e; |
| int acl_len; |
| rsmapi_access_entry_t *acl; |
| rsm_access_entry_t *rsmpi_acl; |
| rsm_memory_local_t mem; |
| struct buf *xbuf; |
| dev_t sdev = 0; |
| adapter_t *adapter; |
| rsm_memseg_id_t segment_id = 0; |
| int loopback_flag = 0; |
| int create_flags = 0; |
| rsm_resource_callback_t callback_flag; |
| DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); |
| |
| DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); |
| |
| if (seg->s_adapter == &loopback_adapter) |
| loopback_flag = 1; |
| |
| if (seg->s_pid != ddi_get_pid() && |
| ddi_get_pid() != 0) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_publish: Not creator\n")); |
| return (RSMERR_NOT_CREATOR); |
| } |
| |
| /* |
| * Get per node access list |
| */ |
| e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); |
| if (e != DDI_SUCCESS) { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_publish done: rsmacl_build failed\n")); |
| return (e); |
| } |
| |
| /* |
| * The application provided msg->key is used for resolving a |
| * segment id according to the following: |
| * key = 0 Kernel Agent selects the segment id |
| * key <= RSM_DLPI_ID_END Reserved for system usage except |
| * RSMLIB range |
| * key < RSM_USER_APP_ID_BASE segment id = key |
| * key >= RSM_USER_APP_ID_BASE Reserved for KA selections |
| * |
| * rsm_nextavail_segmentid is initialized to 0x80000000 and |
| * overflows to zero after 0x80000000 allocations. |
| * An algorithm is needed which allows reinitialization and provides |
| * for reallocation after overflow. For now, ENOMEM is returned |
| * once the overflow condition has occurred. |
| */ |
| if (msg->key == 0) { |
| mutex_enter(&rsm_lock); |
| segment_id = rsm_nextavail_segmentid; |
| if (segment_id != 0) { |
| rsm_nextavail_segmentid++; |
| mutex_exit(&rsm_lock); |
| } else { |
| mutex_exit(&rsm_lock); |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_publish done: no more keys avlbl\n")); |
| return (RSMERR_INSUFFICIENT_RESOURCES); |
| } |
| } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) |
| /* range reserved for internal use by base/ndi libraries */ |
| segment_id = msg->key; |
| else if (msg->key <= RSM_DLPI_ID_END) |
| return (RSMERR_RESERVED_SEGID); |
| else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) |
| segment_id = msg->key; |
| else { |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_publish done: invalid key %u\n", msg->key)); |
| return (RSMERR_RESERVED_SEGID); |
| } |
| |
| /* Add key to exportlist; The segment lock is held on success */ |
| e = rsmexport_add(seg, segment_id); |
| if (e) { |
| rsmacl_free(acl, acl_len); |
| DBG_PRINTF((category, RSM_ERR, |
| "rsm_publish done: export_add failed: %d\n", e)); |
| return (e); |
| } |
| |
| seg->s_segid = segment_id; |
| |
| if ((seg->s_state != RSM_STATE_BIND) && |
| (seg->s_state != RSM_STATE_BIND_QUIESCED)) { |
| /* state changed since then, free acl and return */ |
| rsmseglock_release(seg); |
| rsmexport_rm(seg); |
|