| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| |
| /* |
| * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. |
| */ |
| |
| /* |
| * driver for accessing kernel devinfo tree. |
| */ |
| #include <sys/types.h> |
| #include <sys/pathname.h> |
| #include <sys/debug.h> |
| #include <sys/autoconf.h> |
| #include <sys/vmsystm.h> |
| #include <sys/conf.h> |
| #include <sys/file.h> |
| #include <sys/kmem.h> |
| #include <sys/modctl.h> |
| #include <sys/stat.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/sunldi_impl.h> |
| #include <sys/sunndi.h> |
| #include <sys/esunddi.h> |
| #include <sys/sunmdi.h> |
| #include <sys/ddi_impldefs.h> |
| #include <sys/ndi_impldefs.h> |
| #include <sys/mdi_impldefs.h> |
| #include <sys/devinfo_impl.h> |
| #include <sys/thread.h> |
| #include <sys/modhash.h> |
| #include <sys/bitmap.h> |
| #include <util/qsort.h> |
| #include <sys/disp.h> |
| #include <sys/kobj.h> |
| #include <sys/crc32.h> |
| #include <sys/ddi_hp.h> |
| #include <sys/ddi_hp_impl.h> |
| #include <sys/sysmacros.h> |
| #include <sys/list.h> |
| |
| |
| #ifdef DEBUG |
| static int di_debug; |
| #define dcmn_err(args) if (di_debug >= 1) cmn_err args |
| #define dcmn_err2(args) if (di_debug >= 2) cmn_err args |
| #define dcmn_err3(args) if (di_debug >= 3) cmn_err args |
| #else |
| #define dcmn_err(args) /* nothing */ |
| #define dcmn_err2(args) /* nothing */ |
| #define dcmn_err3(args) /* nothing */ |
| #endif |
| |
| /* |
| * We partition the space of devinfo minor nodes equally between the full and |
| * unprivileged versions of the driver. The even-numbered minor nodes are the |
| * full version, while the odd-numbered ones are the read-only version. |
| */ |
| static int di_max_opens = 32; |
| |
| static int di_prop_dyn = 1; /* enable dynamic property support */ |
| |
| #define DI_FULL_PARENT 0 |
| #define DI_READONLY_PARENT 1 |
| #define DI_NODE_SPECIES 2 |
| #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) |
| |
| #define IOC_IDLE 0 /* snapshot ioctl states */ |
| #define IOC_SNAP 1 /* snapshot in progress */ |
| #define IOC_DONE 2 /* snapshot done, but not copied out */ |
| #define IOC_COPY 3 /* copyout in progress */ |
| |
| /* |
| * Keep max alignment so we can move snapshot to different platforms. |
| * |
| * NOTE: Most callers should rely on the di_checkmem return value |
| * being aligned, and reestablish *off_p with aligned value, instead |
| * of trying to align size of their allocations: this approach will |
| * minimize memory use. |
| */ |
| #define DI_ALIGN(addr) ((addr + 7l) & ~7l) |
| |
| /* |
| * To avoid wasting memory, make a linked list of memory chunks. |
| * Size of each chunk is buf_size. |
| */ |
| struct di_mem { |
| struct di_mem *next; /* link to next chunk */ |
| char *buf; /* contiguous kernel memory */ |
| size_t buf_size; /* size of buf in bytes */ |
| devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ |
| }; |
| |
| /* |
| * This is a stack for walking the tree without using recursion. |
| * When the devinfo tree height is above some small size, one |
| * gets watchdog resets on sun4m. |
| */ |
| struct di_stack { |
| void *offset[MAX_TREE_DEPTH]; |
| struct dev_info *dip[MAX_TREE_DEPTH]; |
| int circ[MAX_TREE_DEPTH]; |
| int depth; /* depth of current node to be copied */ |
| }; |
| |
| #define TOP_OFFSET(stack) \ |
| ((di_off_t *)(stack)->offset[(stack)->depth - 1]) |
| #define TOP_NODE(stack) \ |
| ((stack)->dip[(stack)->depth - 1]) |
| #define PARENT_OFFSET(stack) \ |
| ((di_off_t *)(stack)->offset[(stack)->depth - 2]) |
| #define EMPTY_STACK(stack) ((stack)->depth == 0) |
| #define POP_STACK(stack) { \ |
| ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ |
| (stack)->circ[(stack)->depth - 1]); \ |
| ((stack)->depth--); \ |
| } |
| #define PUSH_STACK(stack, node, off_p) { \ |
| ASSERT(node != NULL); \ |
| ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ |
| (stack)->dip[(stack)->depth] = (node); \ |
| (stack)->offset[(stack)->depth] = (void *)(off_p); \ |
| ((stack)->depth)++; \ |
| } |
| |
| #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0)) |
| |
| /* |
| * With devfs, the device tree has no global locks. The device tree is |
| * dynamic and dips may come and go if they are not locked locally. Under |
| * these conditions, pointers are no longer reliable as unique IDs. |
| * Specifically, these pointers cannot be used as keys for hash tables |
| * as the same devinfo structure may be freed in one part of the tree only |
| * to be allocated as the structure for a different device in another |
| * part of the tree. This can happen if DR and the snapshot are |
| * happening concurrently. |
| * The following data structures act as keys for devinfo nodes and |
| * pathinfo nodes. |
| */ |
| |
| enum di_ktype { |
| DI_DKEY = 1, |
| DI_PKEY = 2 |
| }; |
| |
| struct di_dkey { |
| dev_info_t *dk_dip; |
| major_t dk_major; |
| int dk_inst; |
| pnode_t dk_nodeid; |
| }; |
| |
| struct di_pkey { |
| mdi_pathinfo_t *pk_pip; |
| char *pk_path_addr; |
| dev_info_t *pk_client; |
| dev_info_t *pk_phci; |
| }; |
| |
| struct di_key { |
| enum di_ktype k_type; |
| union { |
| struct di_dkey dkey; |
| struct di_pkey pkey; |
| } k_u; |
| }; |
| |
| |
| struct i_lnode; |
| |
| typedef struct i_link { |
| /* |
| * If a di_link struct representing this i_link struct makes it |
| * into the snapshot, then self will point to the offset of |
| * the di_link struct in the snapshot |
| */ |
| di_off_t self; |
| |
| int spec_type; /* block or char access type */ |
| struct i_lnode *src_lnode; /* src i_lnode */ |
| struct i_lnode *tgt_lnode; /* tgt i_lnode */ |
| struct i_link *src_link_next; /* next src i_link /w same i_lnode */ |
| struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ |
| } i_link_t; |
| |
| typedef struct i_lnode { |
| /* |
| * If a di_lnode struct representing this i_lnode struct makes it |
| * into the snapshot, then self will point to the offset of |
| * the di_lnode struct in the snapshot |
| */ |
| di_off_t self; |
| |
| /* |
| * used for hashing and comparing i_lnodes |
| */ |
| int modid; |
| |
| /* |
| * public information describing a link endpoint |
| */ |
| struct di_node *di_node; /* di_node in snapshot */ |
| dev_t devt; /* devt */ |
| |
| /* |
| * i_link ptr to links coming into this i_lnode node |
| * (this i_lnode is the target of these i_links) |
| */ |
| i_link_t *link_in; |
| |
| /* |
| * i_link ptr to links going out of this i_lnode node |
| * (this i_lnode is the source of these i_links) |
| */ |
| i_link_t *link_out; |
| } i_lnode_t; |
| |
| typedef struct i_hp { |
| di_off_t hp_off; /* Offset of di_hp_t in snapshot */ |
| dev_info_t *hp_child; /* Child devinfo node of the di_hp_t */ |
| list_node_t hp_link; /* List linkage */ |
| } i_hp_t; |
| |
| /* |
| * Soft state associated with each instance of driver open. |
| */ |
| static struct di_state { |
| di_off_t mem_size; /* total # bytes in memlist */ |
| struct di_mem *memlist; /* head of memlist */ |
| uint_t command; /* command from ioctl */ |
| int di_iocstate; /* snapshot ioctl state */ |
| mod_hash_t *reg_dip_hash; |
| mod_hash_t *reg_pip_hash; |
| int lnode_count; |
| int link_count; |
| |
| mod_hash_t *lnode_hash; |
| mod_hash_t *link_hash; |
| |
| list_t hp_list; |
| } **di_states; |
| |
| static kmutex_t di_lock; /* serialize instance assignment */ |
| |
| typedef enum { |
| DI_QUIET = 0, /* DI_QUIET must always be 0 */ |
| DI_ERR, |
| DI_INFO, |
| DI_TRACE, |
| DI_TRACE1, |
| DI_TRACE2 |
| } di_cache_debug_t; |
| |
| static uint_t di_chunk = 32; /* I/O chunk size in pages */ |
| |
| #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) |
| #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) |
| #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) |
| |
| /* |
| * Check that whole device tree is being configured as a pre-condition for |
| * cleaning up /etc/devices files. |
| */ |
| #define DEVICES_FILES_CLEANABLE(st) \ |
| (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ |
| strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) |
| |
| #define CACHE_DEBUG(args) \ |
| { if (di_cache_debug != DI_QUIET) di_cache_print args; } |
| |
| typedef struct phci_walk_arg { |
| di_off_t off; |
| struct di_state *st; |
| } phci_walk_arg_t; |
| |
| static int di_open(dev_t *, int, int, cred_t *); |
| static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); |
| static int di_close(dev_t, int, int, cred_t *); |
| static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); |
| static int di_attach(dev_info_t *, ddi_attach_cmd_t); |
| static int di_detach(dev_info_t *, ddi_detach_cmd_t); |
| |
| static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); |
| static di_off_t di_snapshot_and_clean(struct di_state *); |
| static di_off_t di_copydevnm(di_off_t *, struct di_state *); |
| static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); |
| static di_off_t di_copynode(struct dev_info *, struct di_stack *, |
| struct di_state *); |
| static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, |
| struct di_state *); |
| static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); |
| static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); |
| static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *, |
| struct di_state *); |
| static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *, |
| struct di_state *, struct dev_info *); |
| static void di_allocmem(struct di_state *, size_t); |
| static void di_freemem(struct di_state *); |
| static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); |
| static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); |
| static void *di_mem_addr(struct di_state *, di_off_t); |
| static int di_setstate(struct di_state *, int); |
| static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); |
| static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); |
| static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, |
| struct di_state *, int); |
| static di_off_t di_getlink_data(di_off_t, struct di_state *); |
| static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); |
| |
| static int cache_args_valid(struct di_state *st, int *error); |
| static int snapshot_is_cacheable(struct di_state *st); |
| static int di_cache_lookup(struct di_state *st); |
| static int di_cache_update(struct di_state *st); |
| static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); |
| static int build_vhci_list(dev_info_t *vh_devinfo, void *arg); |
| static int build_phci_list(dev_info_t *ph_devinfo, void *arg); |
| static void di_hotplug_children(struct di_state *st); |
| |
| extern int modrootloaded; |
| extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); |
| extern void mdi_vhci_walk_phcis(dev_info_t *, |
| int (*)(dev_info_t *, void *), void *); |
| |
| |
| static struct cb_ops di_cb_ops = { |
| di_open, /* open */ |
| di_close, /* close */ |
| nodev, /* strategy */ |
| nodev, /* print */ |
| nodev, /* dump */ |
| nodev, /* read */ |
| nodev, /* write */ |
| di_ioctl, /* ioctl */ |
| nodev, /* devmap */ |
| nodev, /* mmap */ |
| nodev, /* segmap */ |
| nochpoll, /* poll */ |
| ddi_prop_op, /* prop_op */ |
| NULL, /* streamtab */ |
| D_NEW | D_MP /* Driver compatibility flag */ |
| }; |
| |
| static struct dev_ops di_ops = { |
| DEVO_REV, /* devo_rev, */ |
| 0, /* refcnt */ |
| di_info, /* info */ |
| nulldev, /* identify */ |
| nulldev, /* probe */ |
| di_attach, /* attach */ |
| di_detach, /* detach */ |
| nodev, /* reset */ |
| &di_cb_ops, /* driver operations */ |
| NULL /* bus operations */ |
| }; |
| |
| /* |
| * Module linkage information for the kernel. |
| */ |
| static struct modldrv modldrv = { |
| &mod_driverops, |
| "DEVINFO Driver", |
| &di_ops |
| }; |
| |
| static struct modlinkage modlinkage = { |
| MODREV_1, |
| &modldrv, |
| NULL |
| }; |
| |
| int |
| _init(void) |
| { |
| int error; |
| |
| mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); |
| |
| error = mod_install(&modlinkage); |
| if (error != 0) { |
| mutex_destroy(&di_lock); |
| return (error); |
| } |
| |
| return (0); |
| } |
| |
| int |
| _info(struct modinfo *modinfop) |
| { |
| return (mod_info(&modlinkage, modinfop)); |
| } |
| |
| int |
| _fini(void) |
| { |
| int error; |
| |
| error = mod_remove(&modlinkage); |
| if (error != 0) { |
| return (error); |
| } |
| |
| mutex_destroy(&di_lock); |
| return (0); |
| } |
| |
| static dev_info_t *di_dip; |
| |
| /*ARGSUSED*/ |
| static int |
| di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) |
| { |
| int error = DDI_FAILURE; |
| |
| switch (infocmd) { |
| case DDI_INFO_DEVT2DEVINFO: |
| *result = (void *)di_dip; |
| error = DDI_SUCCESS; |
| break; |
| case DDI_INFO_DEVT2INSTANCE: |
| /* |
| * All dev_t's map to the same, single instance. |
| */ |
| *result = (void *)0; |
| error = DDI_SUCCESS; |
| break; |
| default: |
| break; |
| } |
| |
| return (error); |
| } |
| |
| static int |
| di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) |
| { |
| int error = DDI_FAILURE; |
| |
| switch (cmd) { |
| case DDI_ATTACH: |
| di_states = kmem_zalloc( |
| di_max_opens * sizeof (struct di_state *), KM_SLEEP); |
| |
| if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, |
| DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || |
| ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, |
| DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { |
| kmem_free(di_states, |
| di_max_opens * sizeof (struct di_state *)); |
| ddi_remove_minor_node(dip, NULL); |
| error = DDI_FAILURE; |
| } else { |
| di_dip = dip; |
| ddi_report_dev(dip); |
| |
| error = DDI_SUCCESS; |
| } |
| break; |
| default: |
| error = DDI_FAILURE; |
| break; |
| } |
| |
| return (error); |
| } |
| |
| static int |
| di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) |
| { |
| int error = DDI_FAILURE; |
| |
| switch (cmd) { |
| case DDI_DETACH: |
| ddi_remove_minor_node(dip, NULL); |
| di_dip = NULL; |
| kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); |
| |
| error = DDI_SUCCESS; |
| break; |
| default: |
| error = DDI_FAILURE; |
| break; |
| } |
| |
| return (error); |
| } |
| |
| /* |
| * Allow multiple opens by tweaking the dev_t such that it looks like each |
| * open is getting a different minor device. Each minor gets a separate |
| * entry in the di_states[] table. Based on the original minor number, we |
| * discriminate opens of the full and read-only nodes. If all of the instances |
| * of the selected minor node are currently open, we return EAGAIN. |
| */ |
| /*ARGSUSED*/ |
| static int |
| di_open(dev_t *devp, int flag, int otyp, cred_t *credp) |
| { |
| int m; |
| minor_t minor_parent = getminor(*devp); |
| |
| if (minor_parent != DI_FULL_PARENT && |
| minor_parent != DI_READONLY_PARENT) |
| return (ENXIO); |
| |
| mutex_enter(&di_lock); |
| |
| for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { |
| if (di_states[m] != NULL) |
| continue; |
| |
| di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); |
| break; /* It's ours. */ |
| } |
| |
| if (m >= di_max_opens) { |
| /* |
| * maximum open instance for device reached |
| */ |
| mutex_exit(&di_lock); |
| dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); |
| return (EAGAIN); |
| } |
| mutex_exit(&di_lock); |
| |
| ASSERT(m < di_max_opens); |
| *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); |
| |
| dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", |
| (void *)curthread, m + DI_NODE_SPECIES)); |
| |
| return (0); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| di_close(dev_t dev, int flag, int otype, cred_t *cred_p) |
| { |
| struct di_state *st; |
| int m = (int)getminor(dev) - DI_NODE_SPECIES; |
| |
| if (m < 0) { |
| cmn_err(CE_WARN, "closing non-existent devinfo minor %d", |
| m + DI_NODE_SPECIES); |
| return (ENXIO); |
| } |
| |
| st = di_states[m]; |
| ASSERT(m < di_max_opens && st != NULL); |
| |
| di_freemem(st); |
| kmem_free(st, sizeof (struct di_state)); |
| |
| /* |
| * empty slot in state table |
| */ |
| mutex_enter(&di_lock); |
| di_states[m] = NULL; |
| dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", |
| (void *)curthread, m + DI_NODE_SPECIES)); |
| mutex_exit(&di_lock); |
| |
| return (0); |
| } |
| |
| |
| /*ARGSUSED*/ |
| static int |
| di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) |
| { |
| int rv, error; |
| di_off_t off; |
| struct di_all *all; |
| struct di_state *st; |
| int m = (int)getminor(dev) - DI_NODE_SPECIES; |
| major_t i; |
| char *drv_name; |
| size_t map_size, size; |
| struct di_mem *dcp; |
| int ndi_flags; |
| |
| if (m < 0 || m >= di_max_opens) { |
| return (ENXIO); |
| } |
| |
| st = di_states[m]; |
| ASSERT(st != NULL); |
| |
| dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); |
| |
| switch (cmd) { |
| case DINFOIDENT: |
| /* |
| * This is called from di_init to verify that the driver |
| * opened is indeed devinfo. The purpose is to guard against |
| * sending ioctl to an unknown driver in case of an |
| * unresolved major number conflict during bfu. |
| */ |
| *rvalp = DI_MAGIC; |
| return (0); |
| |
| case DINFOLODRV: |
| /* |
| * Hold an installed driver and return the result |
| */ |
| if (DI_UNPRIVILEGED_NODE(m)) { |
| /* |
| * Only the fully enabled instances may issue |
| * DINFOLDDRV. |
| */ |
| return (EACCES); |
| } |
| |
| drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); |
| if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { |
| kmem_free(drv_name, MAXNAMELEN); |
| return (EFAULT); |
| } |
| |
| /* |
| * Some 3rd party driver's _init() walks the device tree, |
| * so we load the driver module before configuring driver. |
| */ |
| i = ddi_name_to_major(drv_name); |
| if (ddi_hold_driver(i) == NULL) { |
| kmem_free(drv_name, MAXNAMELEN); |
| return (ENXIO); |
| } |
| |
| ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; |
| |
| /* |
| * i_ddi_load_drvconf() below will trigger a reprobe |
| * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't |
| * needed here. |
| */ |
| modunload_disable(); |
| (void) i_ddi_load_drvconf(i); |
| (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); |
| kmem_free(drv_name, MAXNAMELEN); |
| ddi_rele_driver(i); |
| rv = i_ddi_devs_attached(i); |
| modunload_enable(); |
| |
| i_ddi_di_cache_invalidate(); |
| |
| return ((rv == DDI_SUCCESS)? 0 : ENXIO); |
| |
| case DINFOUSRLD: |
| /* |
| * The case for copying snapshot to userland |
| */ |
| if (di_setstate(st, IOC_COPY) == -1) |
| return (EBUSY); |
| |
| map_size = DI_ALL_PTR(st)->map_size; |
| if (map_size == 0) { |
| (void) di_setstate(st, IOC_DONE); |
| return (EFAULT); |
| } |
| |
| /* |
| * copyout the snapshot |
| */ |
| map_size = (map_size + PAGEOFFSET) & PAGEMASK; |
| |
| /* |
| * Return the map size, so caller may do a sanity |
| * check against the return value of snapshot ioctl() |
| */ |
| *rvalp = (int)map_size; |
| |
| /* |
| * Copy one chunk at a time |
| */ |
| off = 0; |
| dcp = st->memlist; |
| while (map_size) { |
| size = dcp->buf_size; |
| if (map_size <= size) { |
| size = map_size; |
| } |
| |
| if (ddi_copyout(di_mem_addr(st, off), |
| (void *)(arg + off), size, mode) != 0) { |
| (void) di_setstate(st, IOC_DONE); |
| return (EFAULT); |
| } |
| |
| map_size -= size; |
| off += size; |
| dcp = dcp->next; |
| } |
| |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (0); |
| |
| default: |
| if ((cmd & ~DIIOC_MASK) != DIIOC) { |
| /* |
| * Invalid ioctl command |
| */ |
| return (ENOTTY); |
| } |
| /* |
| * take a snapshot |
| */ |
| st->command = cmd & DIIOC_MASK; |
| /*FALLTHROUGH*/ |
| } |
| |
| /* |
| * Obtain enough memory to hold header + rootpath. We prevent kernel |
| * memory exhaustion by freeing any previously allocated snapshot and |
| * refusing the operation; otherwise we would be allowing ioctl(), |
| * ioctl(), ioctl(), ..., panic. |
| */ |
| if (di_setstate(st, IOC_SNAP) == -1) |
| return (EBUSY); |
| |
| /* |
| * Initial memlist always holds di_all and the root_path - and |
| * is at least a page and size. |
| */ |
| size = sizeof (struct di_all) + |
| sizeof (((struct dinfo_io *)(NULL))->root_path); |
| if (size < PAGESIZE) |
| size = PAGESIZE; |
| off = di_checkmem(st, 0, size); |
| all = DI_ALL_PTR(st); |
| off += sizeof (struct di_all); /* real length of di_all */ |
| |
| all->devcnt = devcnt; |
| all->command = st->command; |
| all->version = DI_SNAPSHOT_VERSION; |
| all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */ |
| |
| /* |
| * Note the endianness in case we need to transport snapshot |
| * over the network. |
| */ |
| #if defined(_LITTLE_ENDIAN) |
| all->endianness = DI_LITTLE_ENDIAN; |
| #else |
| all->endianness = DI_BIG_ENDIAN; |
| #endif |
| |
| /* Copyin ioctl args, store in the snapshot. */ |
| if (copyinstr((void *)arg, all->req_path, |
| sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (EFAULT); |
| } |
| (void) strcpy(all->root_path, all->req_path); |
| off += size; /* real length of root_path */ |
| |
| if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (EINVAL); |
| } |
| |
| error = 0; |
| if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (error); |
| } |
| |
| /* |
| * Only the fully enabled version may force load drivers or read |
| * the parent private data from a driver. |
| */ |
| if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && |
| DI_UNPRIVILEGED_NODE(m)) { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (EACCES); |
| } |
| |
| /* Do we need private data? */ |
| if (st->command & DINFOPRIVDATA) { |
| arg += sizeof (((struct dinfo_io *)(NULL))->root_path); |
| |
| #ifdef _MULTI_DATAMODEL |
| switch (ddi_model_convert_from(mode & FMODELS)) { |
| case DDI_MODEL_ILP32: { |
| /* |
| * Cannot copy private data from 64-bit kernel |
| * to 32-bit app |
| */ |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (EINVAL); |
| } |
| case DDI_MODEL_NONE: |
| if ((off = di_copyformat(off, st, arg, mode)) == 0) { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (EFAULT); |
| } |
| break; |
| } |
| #else /* !_MULTI_DATAMODEL */ |
| if ((off = di_copyformat(off, st, arg, mode)) == 0) { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| return (EFAULT); |
| } |
| #endif /* _MULTI_DATAMODEL */ |
| } |
| |
| all->top_devinfo = DI_ALIGN(off); |
| |
| /* |
| * For cache lookups we reallocate memory from scratch, |
| * so the value of "all" is no longer valid. |
| */ |
| all = NULL; |
| |
| if (st->command & DINFOCACHE) { |
| *rvalp = di_cache_lookup(st); |
| } else if (snapshot_is_cacheable(st)) { |
| DI_CACHE_LOCK(di_cache); |
| *rvalp = di_cache_update(st); |
| DI_CACHE_UNLOCK(di_cache); |
| } else |
| *rvalp = di_snapshot_and_clean(st); |
| |
| if (*rvalp) { |
| DI_ALL_PTR(st)->map_size = *rvalp; |
| (void) di_setstate(st, IOC_DONE); |
| } else { |
| di_freemem(st); |
| (void) di_setstate(st, IOC_IDLE); |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * Get a chunk of memory >= size, for the snapshot |
| */ |
| static void |
| di_allocmem(struct di_state *st, size_t size) |
| { |
| struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP); |
| |
| /* |
| * Round up size to nearest power of 2. If it is less |
| * than st->mem_size, set it to st->mem_size (i.e., |
| * the mem_size is doubled every time) to reduce the |
| * number of memory allocations. |
| */ |
| size_t tmp = 1; |
| while (tmp < size) { |
| tmp <<= 1; |
| } |
| size = (tmp > st->mem_size) ? tmp : st->mem_size; |
| |
| mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); |
| mem->buf_size = size; |
| |
| dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); |
| |
| if (st->mem_size == 0) { /* first chunk */ |
| st->memlist = mem; |
| } else { |
| /* |
| * locate end of linked list and add a chunk at the end |
| */ |
| struct di_mem *dcp = st->memlist; |
| while (dcp->next != NULL) { |
| dcp = dcp->next; |
| } |
| |
| dcp->next = mem; |
| } |
| |
| st->mem_size += size; |
| } |
| |
| /* |
| * Copy upto bufsiz bytes of the memlist to buf |
| */ |
| static void |
| di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) |
| { |
| struct di_mem *dcp; |
| size_t copysz; |
| |
| if (st->mem_size == 0) { |
| ASSERT(st->memlist == NULL); |
| return; |
| } |
| |
| copysz = 0; |
| for (dcp = st->memlist; dcp; dcp = dcp->next) { |
| |
| ASSERT(bufsiz > 0); |
| |
| if (bufsiz <= dcp->buf_size) |
| copysz = bufsiz; |
| else |
| copysz = dcp->buf_size; |
| |
| bcopy(dcp->buf, buf, copysz); |
| |
| buf += copysz; |
| bufsiz -= copysz; |
| |
| if (bufsiz == 0) |
| break; |
| } |
| } |
| |
| /* |
| * Free all memory for the snapshot |
| */ |
| static void |
| di_freemem(struct di_state *st) |
| { |
| struct di_mem *dcp, *tmp; |
| |
| dcmn_err2((CE_CONT, "di_freemem\n")); |
| |
| if (st->mem_size) { |
| dcp = st->memlist; |
| while (dcp) { /* traverse the linked list */ |
| tmp = dcp; |
| dcp = dcp->next; |
| ddi_umem_free(tmp->cook); |
| kmem_free(tmp, sizeof (struct di_mem)); |
| } |
| st->mem_size = 0; |
| st->memlist = NULL; |
| } |
| |
| ASSERT(st->mem_size == 0); |
| ASSERT(st->memlist == NULL); |
| } |
| |
| /* |
| * Copies cached data to the di_state structure. |
| * Returns: |
| * - size of data copied, on SUCCESS |
| * - 0 on failure |
| */ |
| static int |
| di_cache2mem(struct di_cache *cache, struct di_state *st) |
| { |
| caddr_t pa; |
| |
| ASSERT(st->mem_size == 0); |
| ASSERT(st->memlist == NULL); |
| ASSERT(!servicing_interrupt()); |
| ASSERT(DI_CACHE_LOCKED(*cache)); |
| |
| if (cache->cache_size == 0) { |
| ASSERT(cache->cache_data == NULL); |
| CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); |
| return (0); |
| } |
| |
| ASSERT(cache->cache_data); |
| |
| di_allocmem(st, cache->cache_size); |
| |
| pa = di_mem_addr(st, 0); |
| |
| ASSERT(pa); |
| |
| /* |
| * Verify that di_allocmem() allocates contiguous memory, |
| * so that it is safe to do straight bcopy() |
| */ |
| ASSERT(st->memlist != NULL); |
| ASSERT(st->memlist->next == NULL); |
| bcopy(cache->cache_data, pa, cache->cache_size); |
| |
| return (cache->cache_size); |
| } |
| |
| /* |
| * Copies a snapshot from di_state to the cache |
| * Returns: |
| * - 0 on failure |
| * - size of copied data on success |
| */ |
| static size_t |
| di_mem2cache(struct di_state *st, struct di_cache *cache) |
| { |
| size_t map_size; |
| |
| ASSERT(cache->cache_size == 0); |
| ASSERT(cache->cache_data == NULL); |
| ASSERT(!servicing_interrupt()); |
| ASSERT(DI_CACHE_LOCKED(*cache)); |
| |
| if (st->mem_size == 0) { |
| ASSERT(st->memlist == NULL); |
| CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); |
| return (0); |
| } |
| |
| ASSERT(st->memlist); |
| |
| /* |
| * The size of the memory list may be much larger than the |
| * size of valid data (map_size). Cache only the valid data |
| */ |
| map_size = DI_ALL_PTR(st)->map_size; |
| if (map_size == 0 || map_size < sizeof (struct di_all) || |
| map_size > st->mem_size) { |
| CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); |
| return (0); |
| } |
| |
| cache->cache_data = kmem_alloc(map_size, KM_SLEEP); |
| cache->cache_size = map_size; |
| di_copymem(st, cache->cache_data, cache->cache_size); |
| |
| return (map_size); |
| } |
| |
| /* |
| * Make sure there is at least "size" bytes memory left before |
| * going on. Otherwise, start on a new chunk. |
| */ |
| static di_off_t |
| di_checkmem(struct di_state *st, di_off_t off, size_t size) |
| { |
| dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", |
| off, (int)size)); |
| |
| /* |
| * di_checkmem() shouldn't be called with a size of zero. |
| * But in case it is, we want to make sure we return a valid |
| * offset within the memlist and not an offset that points us |
| * at the end of the memlist. |
| */ |
| if (size == 0) { |
| dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); |
| size = 1; |
| } |
| |
| off = DI_ALIGN(off); |
| if ((st->mem_size - off) < size) { |
| off = st->mem_size; |
| di_allocmem(st, size); |
| } |
| |
| /* verify that return value is aligned */ |
| ASSERT(off == DI_ALIGN(off)); |
| return (off); |
| } |
| |
| /* |
| * Copy the private data format from ioctl arg. |
| * On success, the ending offset is returned. On error 0 is returned. |
| */ |
| static di_off_t |
| di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) |
| { |
| di_off_t size; |
| struct di_priv_data *priv; |
| struct di_all *all = DI_ALL_PTR(st); |
| |
| dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", |
| off, (void *)arg, mode)); |
| |
| /* |
| * Copyin data and check version. |
| * We only handle private data version 0. |
| */ |
| priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); |
| if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), |
| mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { |
| kmem_free(priv, sizeof (struct di_priv_data)); |
| return (0); |
| } |
| |
| /* |
| * Save di_priv_data copied from userland in snapshot. |
| */ |
| all->pd_version = priv->version; |
| all->n_ppdata = priv->n_parent; |
| all->n_dpdata = priv->n_driver; |
| |
| /* |
| * copyin private data format, modify offset accordingly |
| */ |
| if (all->n_ppdata) { /* parent private data format */ |
| /* |
| * check memory |
| */ |
| size = all->n_ppdata * sizeof (struct di_priv_format); |
| all->ppdata_format = off = di_checkmem(st, off, size); |
| if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, |
| mode) != 0) { |
| kmem_free(priv, sizeof (struct di_priv_data)); |
| return (0); |
| } |
| |
| off += size; |
| } |
| |
| if (all->n_dpdata) { /* driver private data format */ |
| /* |
| * check memory |
| */ |
| size = all->n_dpdata * sizeof (struct di_priv_format); |
| all->dpdata_format = off = di_checkmem(st, off, size); |
| if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, |
| mode) != 0) { |
| kmem_free(priv, sizeof (struct di_priv_data)); |
| return (0); |
| } |
| |
| off += size; |
| } |
| |
| kmem_free(priv, sizeof (struct di_priv_data)); |
| return (off); |
| } |
| |
| /* |
| * Return the real address based on the offset (off) within snapshot |
| */ |
| static void * |
| di_mem_addr(struct di_state *st, di_off_t off) |
| { |
| struct di_mem *dcp = st->memlist; |
| |
| dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", |
| (void *)dcp, off)); |
| |
| ASSERT(off < st->mem_size); |
| |
| while (off >= dcp->buf_size) { |
| off -= dcp->buf_size; |
| dcp = dcp->next; |
| } |
| |
| dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", |
| off, (void *)(dcp->buf + off))); |
| |
| return (dcp->buf + off); |
| } |
| |
| /* |
| * Ideally we would use the whole key to derive the hash |
| * value. However, the probability that two keys will |
| * have the same dip (or pip) is very low, so |
| * hashing by dip (or pip) pointer should suffice. |
| */ |
| static uint_t |
| di_hash_byptr(void *arg, mod_hash_key_t key) |
| { |
| struct di_key *dik = key; |
| size_t rshift; |
| void *ptr; |
| |
| ASSERT(arg == NULL); |
| |
| switch (dik->k_type) { |
| case DI_DKEY: |
| ptr = dik->k_u.dkey.dk_dip; |
| rshift = highbit(sizeof (struct dev_info)); |
| break; |
| case DI_PKEY: |
| ptr = dik->k_u.pkey.pk_pip; |
| rshift = highbit(sizeof (struct mdi_pathinfo)); |
| break; |
| default: |
| panic("devinfo: unknown key type"); |
| /*NOTREACHED*/ |
| } |
| return (mod_hash_byptr((void *)rshift, ptr)); |
| } |
| |
| static void |
| di_key_dtor(mod_hash_key_t key) |
| { |
| char *path_addr; |
| struct di_key *dik = key; |
| |
| switch (dik->k_type) { |
| case DI_DKEY: |
| break; |
| case DI_PKEY: |
| path_addr = dik->k_u.pkey.pk_path_addr; |
| if (path_addr) |
| kmem_free(path_addr, strlen(path_addr) + 1); |
| break; |
| default: |
| panic("devinfo: unknown key type"); |
| /*NOTREACHED*/ |
| } |
| |
| kmem_free(dik, sizeof (struct di_key)); |
| } |
| |
| static int |
| di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) |
| { |
| if (dk1->dk_dip != dk2->dk_dip) |
| return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); |
| |
| if (dk1->dk_major != DDI_MAJOR_T_NONE && |
| dk2->dk_major != DDI_MAJOR_T_NONE) { |
| if (dk1->dk_major != dk2->dk_major) |
| return (dk1->dk_major > dk2->dk_major ? 1 : -1); |
| |
| if (dk1->dk_inst != dk2->dk_inst) |
| return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); |
| } |
| |
| if (dk1->dk_nodeid != dk2->dk_nodeid) |
| return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); |
| |
| return (0); |
| } |
| |
| static int |
| di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) |
| { |
| char *p1, *p2; |
| int rv; |
| |
| if (pk1->pk_pip != pk2->pk_pip) |
| return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); |
| |
| p1 = pk1->pk_path_addr; |
| p2 = pk2->pk_path_addr; |
| |
| p1 = p1 ? p1 : ""; |
| p2 = p2 ? p2 : ""; |
| |
| rv = strcmp(p1, p2); |
| if (rv) |
| return (rv > 0 ? 1 : -1); |
| |
| if (pk1->pk_client != pk2->pk_client) |
| return (pk1->pk_client > pk2->pk_client ? 1 : -1); |
| |
| if (pk1->pk_phci != pk2->pk_phci) |
| return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); |
| |
| return (0); |
| } |
| |
| static int |
| di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) |
| { |
| struct di_key *dik1, *dik2; |
| |
| dik1 = key1; |
| dik2 = key2; |
| |
| if (dik1->k_type != dik2->k_type) { |
| panic("devinfo: mismatched keys"); |
| /*NOTREACHED*/ |
| } |
| |
| switch (dik1->k_type) { |
| case DI_DKEY: |
| return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); |
| case DI_PKEY: |
| return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); |
| default: |
| panic("devinfo: unknown key type"); |
| /*NOTREACHED*/ |
| } |
| } |
| |
| static void |
| di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp) |
| { |
| di_off_t off; |
| struct di_all *all = DI_ALL_PTR(st); |
| struct di_alias *di_alias; |
| di_off_t curroff; |
| dev_info_t *currdip; |
| size_t size; |
| |
| currdip = NULL; |
| if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) { |
| return; |
| } |
| |
| if (di_dip_find(st, currdip, &curroff) != 0) { |
| ndi_rele_devi(currdip); |
| return; |
| } |
| ndi_rele_devi(currdip); |
| |
| off = *offp; |
| size = sizeof (struct di_alias); |
| size += strlen(apair->pair_alias) + 1; |
| off = di_checkmem(st, off, size); |
| di_alias = DI_ALIAS(di_mem_addr(st, off)); |
| |
| di_alias->self = off; |
| di_alias->next = all->aliases; |
| all->aliases = off; |
| (void) strcpy(di_alias->alias, apair->pair_alias); |
| di_alias->curroff = curroff; |
| |
| off += size; |
| |
| *offp = off; |
| } |
| |
| /* |
| * This is the main function that takes a snapshot |
| */ |
| static di_off_t |
| di_snapshot(struct di_state *st) |
| { |
| di_off_t off; |
| struct di_all *all; |
| dev_info_t *rootnode; |
| char buf[80]; |
| int plen; |
| char *path; |
| vnode_t *vp; |
| int i; |
| |
| all = DI_ALL_PTR(st); |
| dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); |
| |
| /* |
| * Translate requested root path if an alias and snap-root != "/" |
| */ |
| if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) { |
| /* If there is no redirected alias, use root_path as is */ |
| rootnode = ddi_alias_redirect(all->root_path); |
| if (rootnode) { |
| (void) ddi_pathname(rootnode, all->root_path); |
| goto got_root; |
| } |
| } |
| |
| /* |
| * Verify path before entrusting it to e_ddi_hold_devi_by_path because |
| * some platforms have OBP bugs where executing the NDI_PROMNAME code |
| * path against an invalid path results in panic. The lookupnameat |
| * is done relative to rootdir without a leading '/' on "devices/" |
| * to force the lookup to occur in the global zone. |
| */ |
| plen = strlen("devices/") + strlen(all->root_path) + 1; |
| path = kmem_alloc(plen, KM_SLEEP); |
| (void) snprintf(path, plen, "devices/%s", all->root_path); |
| if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { |
| dcmn_err((CE_CONT, "Devinfo node %s not found\n", |
| all->root_path)); |
| kmem_free(path, plen); |
| return (0); |
| } |
| kmem_free(path, plen); |
| VN_RELE(vp); |
| |
| /* |
| * Hold the devinfo node referred by the path. |
| */ |
| rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); |
| if (rootnode == NULL) { |
| dcmn_err((CE_CONT, "Devinfo node %s not found\n", |
| all->root_path)); |
| return (0); |
| } |
| |
| got_root: |
| (void) snprintf(buf, sizeof (buf), |
| "devinfo registered dips (statep=%p)", (void *)st); |
| |
| st->reg_dip_hash = mod_hash_create_extended(buf, 64, |
| di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, |
| NULL, di_key_cmp, KM_SLEEP); |
| |
| |
| (void) snprintf(buf, sizeof (buf), |
| "devinfo registered pips (statep=%p)", (void *)st); |
| |
| st->reg_pip_hash = mod_hash_create_extended(buf, 64, |
| di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, |
| NULL, di_key_cmp, KM_SLEEP); |
| |
| if (DINFOHP & st->command) { |
| list_create(&st->hp_list, sizeof (i_hp_t), |
| offsetof(i_hp_t, hp_link)); |
| } |
| |
| /* |
| * copy the device tree |
| */ |
| off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); |
| |
| if (DINFOPATH & st->command) { |
| mdi_walk_vhcis(build_vhci_list, st); |
| } |
| |
| if (DINFOHP & st->command) { |
| di_hotplug_children(st); |
| } |
| |
| ddi_release_devi(rootnode); |
| |
| /* |
| * copy the devnames array |
| */ |
| all->devnames = off; |
| off = di_copydevnm(&all->devnames, st); |
| |
| |
| /* initialize the hash tables */ |
| st->lnode_count = 0; |
| st->link_count = 0; |
| |
| if (DINFOLYR & st->command) { |
| off = di_getlink_data(off, st); |
| } |
| |
| all->aliases = 0; |
| if (ddi_aliases_present == B_FALSE) |
| goto done; |
| |
| for (i = 0; i < ddi_aliases.dali_num_pairs; i++) { |
| di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off); |
| } |
| |
| done: |
| /* |
| * Free up hash tables |
| */ |
| mod_hash_destroy_hash(st->reg_dip_hash); |
| mod_hash_destroy_hash(st->reg_pip_hash); |
| |
| /* |
| * Record the timestamp now that we are done with snapshot. |
| * |
| * We compute the checksum later and then only if we cache |
| * the snapshot, since checksumming adds some overhead. |
| * The checksum is checked later if we read the cache file. |
| * from disk. |
| * |
| * Set checksum field to 0 as CRC is calculated with that |
| * field set to 0. |
| */ |
| all->snapshot_time = ddi_get_time(); |
| all->cache_checksum = 0; |
| |
| ASSERT(all->snapshot_time != 0); |
| |
| return (off); |
| } |
| |
| /* |
| * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set |
| */ |
| static di_off_t |
| di_snapshot_and_clean(struct di_state *st) |
| { |
| di_off_t off; |
| |
| modunload_disable(); |
| off = di_snapshot(st); |
| if (off != 0 && (st->command & DINFOCLEANUP)) { |
| ASSERT(DEVICES_FILES_CLEANABLE(st)); |
| /* |
| * Cleanup /etc/devices files: |
| * In order to accurately account for the system configuration |
| * in /etc/devices files, the appropriate drivers must be |
| * fully configured before the cleanup starts. |
| * So enable modunload only after the cleanup. |
| */ |
| i_ddi_clean_devices_files(); |
| /* |
| * Remove backing store nodes for unused devices, |
| * which retain past permissions customizations |
| * and may be undesired for newly configured devices. |
| */ |
| dev_devices_cleanup(); |
| } |
| modunload_enable(); |
| |
| return (off); |
| } |
| |
| /* |
| * construct vhci linkage in the snapshot. |
| */ |
| static int |
| build_vhci_list(dev_info_t *vh_devinfo, void *arg) |
| { |
| struct di_all *all; |
| struct di_node *me; |
| struct di_state *st; |
| di_off_t off; |
| phci_walk_arg_t pwa; |
| |
| dcmn_err3((CE_CONT, "build_vhci list\n")); |
| |
| dcmn_err3((CE_CONT, "vhci node %s%d\n", |
| ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo))); |
| |
| st = (struct di_state *)arg; |
| if (di_dip_find(st, vh_devinfo, &off) != 0) { |
| dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); |
| return (DDI_WALK_TERMINATE); |
| } |
| |
| dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", |
| st->mem_size, off)); |
| |
| all = DI_ALL_PTR(st); |
| if (all->top_vhci_devinfo == 0) { |
| all->top_vhci_devinfo = off; |
| } else { |
| me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo)); |
| |
| while (me->next_vhci != 0) { |
| me = DI_NODE(di_mem_addr(st, me->next_vhci)); |
| } |
| |
| me->next_vhci = off; |
| } |
| |
| pwa.off = off; |
| pwa.st = st; |
| mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); |
| |
| return (DDI_WALK_CONTINUE); |
| } |
| |
| /* |
| * construct phci linkage for the given vhci in the snapshot. |
| */ |
| static int |
| build_phci_list(dev_info_t *ph_devinfo, void *arg) |
| { |
| struct di_node *vh_di_node; |
| struct di_node *me; |
| phci_walk_arg_t *pwa; |
| di_off_t off; |
| |
| pwa = (phci_walk_arg_t *)arg; |
| |
| dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", |
| pwa->off)); |
| |
| vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off)); |
| if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { |
| dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); |
| return (DDI_WALK_TERMINATE); |
| } |
| |
| dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n", |
| ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off)); |
| |
| if (vh_di_node->top_phci == 0) { |
| vh_di_node->top_phci = off; |
| return (DDI_WALK_CONTINUE); |
| } |
| |
| me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci)); |
| |
| while (me->next_phci != 0) { |
| me = DI_NODE(di_mem_addr(pwa->st, me->next_phci)); |
| } |
| me->next_phci = off; |
| |
| return (DDI_WALK_CONTINUE); |
| } |
| |
| /* |
| * Assumes all devinfo nodes in device tree have been snapshotted |
| */ |
| static void |
| snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p) |
| { |
| struct dev_info *node; |
| struct di_node *me; |
| di_off_t off; |
| |
| ASSERT(mutex_owned(&dnp->dn_lock)); |
| |
| node = DEVI(dnp->dn_head); |
| for (; node; node = node->devi_next) { |
| if (di_dip_find(st, (dev_info_t *)node, &off) != 0) |
| continue; |
| |
| ASSERT(off > 0); |
| me = DI_NODE(di_mem_addr(st, off)); |
| ASSERT(me->next == 0 || me->next == -1); |
| /* |
| * Only nodes which were BOUND when they were |
| * snapshotted will be added to per-driver list. |
| */ |
| if (me->next != -1) |
| continue; |
| |
| *off_p = off; |
| off_p = &me->next; |
| } |
| |
| *off_p = 0; |
| } |
| |
| /* |
| * Copy the devnames array, so we have a list of drivers in the snapshot. |
| * Also makes it possible to locate the per-driver devinfo nodes. |
| */ |
| static di_off_t |
| di_copydevnm(di_off_t *off_p, struct di_state *st) |
| { |
| int i; |
| di_off_t off; |
| size_t size; |
| struct di_devnm *dnp; |
| |
| dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); |
| |
| /* |
| * make sure there is some allocated memory |
| */ |
| size = devcnt * sizeof (struct di_devnm); |
| *off_p = off = di_checkmem(st, *off_p, size); |
| dnp = DI_DEVNM(di_mem_addr(st, off)); |
| off += size; |
| |
| dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", |
| devcnt, off)); |
| |
| for (i = 0; i < devcnt; i++) { |
| if (devnamesp[i].dn_name == NULL) { |
| continue; |
| } |
| |
| /* |
| * dn_name is not freed during driver unload or removal. |
| * |
| * There is a race condition when make_devname() changes |
| * dn_name during our strcpy. This should be rare since |
| * only add_drv does this. At any rate, we never had a |
| * problem with ddi_name_to_major(), which should have |
| * the same problem. |
| */ |
| dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", |
| devnamesp[i].dn_name, devnamesp[i].dn_instance, off)); |
| |
| size = strlen(devnamesp[i].dn_name) + 1; |
| dnp[i].name = off = di_checkmem(st, off, size); |
| (void) strcpy((char *)di_mem_addr(st, off), |
| devnamesp[i].dn_name); |
| off += size; |
| |
| mutex_enter(&devnamesp[i].dn_lock); |
| |
| /* |
| * Snapshot per-driver node list |
| */ |
| snap_driver_list(st, &devnamesp[i], &dnp[i].head); |
| |
| /* |
| * This is not used by libdevinfo, leave it for now |
| */ |
| dnp[i].flags = devnamesp[i].dn_flags; |
| dnp[i].instance = devnamesp[i].dn_instance; |
| |
| /* |
| * get global properties |
| */ |
| if ((DINFOPROP & st->command) && |
| devnamesp[i].dn_global_prop_ptr) { |
| dnp[i].global_prop = off; |
| off = di_getprop(DI_PROP_GLB_LIST, |
| &devnamesp[i].dn_global_prop_ptr->prop_list, |
| &dnp[i].global_prop, st, NULL); |
| } |
| |
| /* |
| * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str |
| */ |
| if (CB_DRV_INSTALLED(devopsp[i])) { |
| if (devopsp[i]->devo_cb_ops) { |
| dnp[i].ops |= DI_CB_OPS; |
| if (devopsp[i]->devo_cb_ops->cb_str) |
| dnp[i].ops |= DI_STREAM_OPS; |
| } |
| if (NEXUS_DRV(devopsp[i])) { |
| dnp[i].ops |= DI_BUS_OPS; |
| } |
| } |
| |
| mutex_exit(&devnamesp[i].dn_lock); |
| } |
| |
| dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); |
| |
| return (off); |
| } |
| |
| /* |
| * Copy the kernel devinfo tree. The tree and the devnames array forms |
| * the entire snapshot (see also di_copydevnm). |
| */ |
| static di_off_t |
| di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) |
| { |
| di_off_t off; |
| struct dev_info *node; |
| struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); |
| |
| dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", |
| (void *)root, *off_p)); |
| |
| /* force attach drivers */ |
| if (i_ddi_devi_attached((dev_info_t *)root) && |
| (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { |
| (void) ndi_devi_config((dev_info_t *)root, |
| NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | |
| NDI_DRV_CONF_REPROBE); |
| } |
| |
| /* |
| * Push top_devinfo onto a stack |
| * |
| * The stack is necessary to avoid recursion, which can overrun |
| * the kernel stack. |
| */ |
| PUSH_STACK(dsp, root, off_p); |
| |
| /* |
| * As long as there is a node on the stack, copy the node. |
| * di_copynode() is responsible for pushing and popping |
| * child and sibling nodes on the stack. |
| */ |
| while (!EMPTY_STACK(dsp)) { |
| node = TOP_NODE(dsp); |
| off = di_copynode(node, dsp, st); |
| } |
| |
| /* |
| * Free the stack structure |
| */ |
| kmem_free(dsp, sizeof (struct di_stack)); |
| |
| return (off); |
| } |
| |
| /* |
| * This is the core function, which copies all data associated with a single |
| * node into the snapshot. The amount of information is determined by the |
| * ioctl command. |
| */ |
| static di_off_t |
| di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st) |
| { |
| di_off_t off; |
| struct di_node *me; |
| size_t size; |
| struct dev_info *n; |
| |
| dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth)); |
| ASSERT((node != NULL) && (node == TOP_NODE(dsp))); |
| |
| /* |
| * check memory usage, and fix offsets accordingly. |
| */ |
| size = sizeof (struct di_node); |
| *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size); |
| me = DI_NODE(di_mem_addr(st, off)); |
| me->self = off; |
| off += size; |
| |
| dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", |
| node->devi_node_name, node->devi_instance, off)); |
| |
| /* |
| * Node parameters: |
| * self -- offset of current node within snapshot |
| * nodeid -- pointer to PROM node (tri-valued) |
| * state -- hot plugging device state |
| * node_state -- devinfo node state |
| */ |
| me->instance = node->devi_instance; |
| me->nodeid = node->devi_nodeid; |
| me->node_class = node->devi_node_class; |
| me->attributes = node->devi_node_attributes; |
| me->state = node->devi_state; |
| me->flags = node->devi_flags; |
| me->node_state = node->devi_node_state; |
| me->next_vhci = 0; /* Filled up by build_vhci_list. */ |
| me->top_phci = 0; /* Filled up by build_phci_list. */ |
| me->next_phci = 0; /* Filled up by build_phci_list. */ |
| me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ |
| me->user_private_data = NULL; |
| |
| /* |
| * Get parent's offset in snapshot from the stack |
| * and store it in the current node |
| */ |
| if (dsp->depth > 1) { |
| me->parent = *(PARENT_OFFSET(dsp)); |
| } |
| |
| /* |
| * Save the offset of this di_node in a hash table. |
| * This is used later to resolve references to this |
| * dip from other parts of the tree (per-driver list, |
| * multipathing linkages, layered usage linkages). |
| * The key used for the hash table is derived from |
| * information in the dip. |
| */ |
| di_register_dip(st, (dev_info_t *)node, me->self); |
| |
| #ifdef DEVID_COMPATIBILITY |
| /* check for devid as property marker */ |
| if (node->devi_devid_str) { |
| ddi_devid_t devid; |
| |
| /* |
| * The devid is now represented as a property. For |
| * compatibility with di_devid() interface in libdevinfo we |
| * must return it as a binary structure in the snapshot. When |
| * (if) di_devid() is removed from libdevinfo then the code |
| * related to DEVID_COMPATIBILITY can be removed. |
| */ |
| if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) == |
| DDI_SUCCESS) { |
| size = ddi_devid_sizeof(devid); |
| off = di_checkmem(st, off, size); |
| me->devid = off; |
| bcopy(devid, di_mem_addr(st, off), size); |
| off += size; |
| ddi_devid_free(devid); |
| } |
| } |
| #endif /* DEVID_COMPATIBILITY */ |
| |
| if (node->devi_node_name) { |
| size = strlen(node->devi_node_name) + 1; |
| me->node_name = off = di_checkmem(st, off, size); |
| (void) strcpy(di_mem_addr(st, off), node->devi_node_name); |
| off += size; |
| } |
| |
| if (node->devi_compat_names && (node->devi_compat_length > 1)) { |
| size = node->devi_compat_length; |
| me->compat_names = off = di_checkmem(st, off, size); |
| me->compat_length = (int)size; |
| bcopy(node->devi_compat_names, di_mem_addr(st, off), size); |
| off += size; |
| } |
| |
| if (node->devi_addr) { |
| size = strlen(node->devi_addr) + 1; |
| me->address = off = di_checkmem(st, off, size); |
| (void) strcpy(di_mem_addr(st, off), node->devi_addr); |
| off += size; |
| } |
| |
| if (node->devi_binding_name) { |
| size = strlen(node->devi_binding_name) + 1; |
| me->bind_name = off = di_checkmem(st, off, size); |
| (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); |
| off += size; |
| } |
| |
| me->drv_major = node->devi_major; |
| |
| /* |
| * If the dip is BOUND, set the next pointer of the |
| * per-instance list to -1, indicating that it is yet to be resolved. |
| * This will be resolved later in snap_driver_list(). |
| */ |
| if (me->drv_major != -1) { |
| me->next = -1; |
| } else { |
| me->next = 0; |
| } |
| |
| /* |
| * An optimization to skip mutex_enter when not needed. |
| */ |
| if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) { |
| goto priv_data; |
| } |
| |
| /* |
| * LOCKING: We already have an active ndi_devi_enter to gather the |
| * minor data, and we will take devi_lock to gather properties as |
| * needed off di_getprop. |
| */ |
| if (!(DINFOMINOR & st->command)) { |
| goto path; |
| } |
| |
| ASSERT(DEVI_BUSY_OWNED(node)); |
| if (node->devi_minor) { /* minor data */ |
| me->minor_data = off; |
| off = di_getmdata(node->devi_minor, &me->minor_data, |
| me->self, st); |
| } |
| |
| path: |
| if (!(DINFOPATH & st->command)) { |
| goto property; |
| } |
| |
| if (MDI_VHCI(node)) { |
| me->multipath_component = MULTIPATH_COMPONENT_VHCI; |
| } |
| |
| if (MDI_CLIENT(node)) { |
| me->multipath_component = MULTIPATH_COMPONENT_CLIENT; |
| me->multipath_client = off; |
| off = di_getpath_data((dev_info_t *)node, &me->multipath_client, |
| me->self, st, 1); |
| dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " |
| "component type = %d. off=%d", |
| me->multipath_client, |
| (void *)node, node->devi_mdi_component, off)); |
| } |
| |
| if (MDI_PHCI(node)) { |
| me->multipath_component = MULTIPATH_COMPONENT_PHCI; |
| me->multipath_phci = off; |
| off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, |
| me->self, st, 0); |
| dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " |
| "component type = %d. off=%d", |
| me->multipath_phci, |
| (void *)node, node->devi_mdi_component, off)); |
| } |
| |
| property: |
| if (!(DINFOPROP & st->command)) { |
| goto hotplug_data; |
| } |
| |
| if (node->devi_drv_prop_ptr) { /* driver property list */ |
| me->drv_prop = off; |
| off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr, |
| &me->drv_prop, st, node); |
| } |
| |
| if (node->devi_sys_prop_ptr) { /* system property list */ |
| me->sys_prop = off; |
| off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr, |
| &me->sys_prop, st, node); |
| } |
| |
| if (node->devi_hw_prop_ptr) { /* hardware property list */ |
| me->hw_prop = off; |
| off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr, |
| &me->hw_prop, st, node); |
| } |
| |
| if (node->devi_global_prop_list == NULL) { |
| me->glob_prop = (di_off_t)-1; /* not global property */ |
| } else { |
| /* |
| * Make copy of global property list if this devinfo refers |
| * global properties different from what's on the devnames |
| * array. It can happen if there has been a forced |
| * driver.conf update. See mod_drv(1M). |
| */ |
| ASSERT(me->drv_major != -1); |
| if (node->devi_global_prop_list != |
| devnamesp[me->drv_major].dn_global_prop_ptr) { |
| me->glob_prop = off; |
| off = di_getprop(DI_PROP_GLB_LIST, |
| &node->devi_global_prop_list->prop_list, |
| &me->glob_prop, st, node); |
| } |
| } |
| |
| hotplug_data: |
| if (!(DINFOHP & st->command)) { |
| goto priv_data; |
| } |
| |
| if (node->devi_hp_hdlp) { /* hotplug data */ |
| me->hp_data = off; |
| off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st); |
| } |
| |
| priv_data: |
| if (!(DINFOPRIVDATA & st->command)) { |
| goto pm_info; |
| } |
| |
| if (ddi_get_parent_data((dev_info_t *)node) != NULL) { |
| me->parent_data = off; |
| off = di_getppdata(node, &me->parent_data, st); |
| } |
| |
| if (ddi_get_driver_private((dev_info_t *)node) != NULL) { |
| me->driver_data = off; |
| off = di_getdpdata(node, &me->driver_data, st); |
| } |
| |
| pm_info: /* NOT implemented */ |
| |
| subtree: |
| /* keep the stack aligned */ |
| off = DI_ALIGN(off); |
| |
| if (!(DINFOSUBTREE & st->command)) { |
| POP_STACK(dsp); |
| return (off); |
| } |
| |
| child: |
| /* |
| * If there is a visible child--push child onto stack. |
| * Hold the parent (me) busy while doing so. |
| */ |
| if ((n = node->devi_child) != NULL) { |
| /* skip hidden nodes */ |
| while (n && ndi_dev_is_hidden_node((dev_info_t *)n)) |
| n = n->devi_sibling; |
| if (n) { |
| me->child = off; |
| PUSH_STACK(dsp, n, &me->child); |
| return (me->child); |
| } |
| } |
| |
| sibling: |
| /* |
| * Done with any child nodes, unroll the stack till a visible |
| * sibling of a parent node is found or root node is reached. |
| */ |
| POP_STACK(dsp); |
| while (!EMPTY_STACK(dsp)) { |
| if ((n = node->devi_sibling) != NULL) { |
| /* skip hidden nodes */ |
| while (n && ndi_dev_is_hidden_node((dev_info_t *)n)) |
| n = n->devi_sibling; |
| if (n) { |
| me->sibling = DI_ALIGN(off); |
| PUSH_STACK(dsp, n, &me->sibling); |
| return (me->sibling); |
| } |
| } |
| node = TOP_NODE(dsp); |
| me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); |
| POP_STACK(dsp); |
| } |
| |
| /* |
| * DONE with all nodes |
| */ |
| return (off); |
| } |
| |
| static i_lnode_t * |
| i_lnode_alloc(int modid) |
| { |
| i_lnode_t *i_lnode; |
| |
| i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); |
| |
| ASSERT(modid != -1); |
| i_lnode->modid = modid; |
| |
| return (i_lnode); |
| } |
| |
| static void |
| i_lnode_free(i_lnode_t *i_lnode) |
| { |
| kmem_free(i_lnode, sizeof (i_lnode_t)); |
| } |
| |
| static void |
| i_lnode_check_free(i_lnode_t *i_lnode) |
| { |
| /* This lnode and its dip must have been snapshotted */ |
| ASSERT(i_lnode->self > 0); |
| ASSERT(i_lnode->di_node->self > 0); |
| |
| /* at least 1 link (in or out) must exist for this lnode */ |
| ASSERT(i_lnode->link_in || i_lnode->link_out); |
| |
| i_lnode_free(i_lnode); |
| } |
| |
| static i_link_t * |
| i_link_alloc(int spec_type) |
| { |
| i_link_t *i_link; |
| |
| i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); |
| i_link->spec_type = spec_type; |
| |
| return (i_link); |
| } |
| |
| static void |
| i_link_check_free(i_link_t *i_link) |
| { |
| /* This link must have been snapshotted */ |
| ASSERT(i_link->self > 0); |
| |
| /* Both endpoint lnodes must exist for this link */ |
| ASSERT(i_link->src_lnode); |
| ASSERT(i_link->tgt_lnode); |
| |
| kmem_free(i_link, sizeof (i_link_t)); |
| } |
| |
| /*ARGSUSED*/ |
| static uint_t |
| i_lnode_hashfunc(void *arg, mod_hash_key_t key) |
| { |
| i_lnode_t *i_lnode = (i_lnode_t *)key; |
| struct di_node *ptr; |
| dev_t dev; |
| |
| dev = i_lnode->devt; |
| if (dev != DDI_DEV_T_NONE) |
| return (i_lnode->modid + getminor(dev) + getmajor(dev)); |
| |
| ptr = i_lnode->di_node; |
| ASSERT(ptr->self > 0); |
| if (ptr) { |
| uintptr_t k = (uintptr_t)ptr; |
| k >>= (int)highbit(sizeof (struct di_node)); |
| return ((uint_t)k); |
| } |
| |
| return (i_lnode->modid); |
| } |
| |
| static int |
| i_lnode_cmp(void *arg1, void *arg2) |
| { |
| i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; |
| i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; |
| |
| if (i_lnode1->modid != i_lnode2->modid) { |
| return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); |
| } |
| |
| if (i_lnode1->di_node != i_lnode2->di_node) |
| return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); |
| |
| if (i_lnode1->devt != i_lnode2->devt) |
| return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); |
| |
| return (0); |
| } |
| |
| /* |
| * An lnode represents a {dip, dev_t} tuple. A link represents a |
| * {src_lnode, tgt_lnode, spec_type} tuple. |
| * The following callback assumes that LDI framework ref-counts the |
| * src_dip and tgt_dip while invoking this callback. |
| */ |
| static int |
| di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) |
| { |
| struct di_state *st = (struct di_state *)arg; |
| i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; |
| i_link_t **i_link_next, *i_link; |
| di_off_t soff, toff; |
| mod_hash_val_t nodep = NULL; |
| int res; |
| |
| /* |
| * if the source or target of this device usage information doesn't |
| * correspond to a device node then we don't report it via |
| * libdevinfo so return. |
| */ |
| if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) |
| return (LDI_USAGE_CONTINUE); |
| |
| ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); |
| ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); |
| |
| /* |
| * Skip the ldi_usage if either src or tgt dip is not in the |
| * snapshot. This saves us from pruning bad lnodes/links later. |
| */ |
| if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) |
| return (LDI_USAGE_CONTINUE); |
| if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) |
| return (LDI_USAGE_CONTINUE); |
| |
| ASSERT(soff > 0); |
| ASSERT(toff > 0); |
| |
| /* |
| * allocate an i_lnode and add it to the lnode hash |
| * if it is not already present. For this particular |
| * link the lnode is a source, but it may |
| * participate as tgt or src in any number of layered |
| * operations - so it may already be in the hash. |
| */ |
| i_lnode = i_lnode_alloc(ldi_usage->src_modid); |
| i_lnode->di_node = DI_NODE(di_mem_addr(st, soff)); |
| i_lnode->devt = ldi_usage->src_devt; |
| |
| res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); |
| if (res == MH_ERR_NOTFOUND) { |
| /* |
| * new i_lnode |
| * add it to the hash and increment the lnode count |
| */ |
| res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); |
| ASSERT(res == 0); |
| st->lnode_count++; |
| src_lnode = i_lnode; |
| } else { |
| /* this i_lnode already exists in the lnode_hash */ |
| i_lnode_free(i_lnode); |
| src_lnode = (i_lnode_t *)nodep; |
| } |
| |
| /* |
| * allocate a tgt i_lnode and add it to the lnode hash |
| */ |
| i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); |
| i_lnode->di_node = DI_NODE(di_mem_addr(st, toff)); |
| i_lnode->devt = ldi_usage->tgt_devt; |
| |
| res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); |
| if (res == MH_ERR_NOTFOUND) { |
| /* |
| * new i_lnode |
| * add it to the hash and increment the lnode count |
| */ |
| res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); |
| ASSERT(res == 0); |
| st->lnode_count++; |
| tgt_lnode = i_lnode; |
| } else { |
| /* this i_lnode already exists in the lnode_hash */ |
| i_lnode_free(i_lnode); |
| tgt_lnode = (i_lnode_t *)nodep; |
| } |
| |
| /* |
| * allocate a i_link |
| */ |
| i_link = i_link_alloc(ldi_usage->tgt_spec_type); |
| i_link->src_lnode = src_lnode; |
| i_link->tgt_lnode = tgt_lnode; |
| |
| /* |
| * add this link onto the src i_lnodes outbound i_link list |
| */ |
| i_link_next = &(src_lnode->link_out); |
| while (*i_link_next != NULL) { |
| if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && |
| (i_link->spec_type == (*i_link_next)->spec_type)) { |
| /* this link already exists */ |
| kmem_free(i_link, sizeof (i_link_t)); |
| return (LDI_USAGE_CONTINUE); |
| } |
| i_link_next = &((*i_link_next)->src_link_next); |
| } |
| *i_link_next = i_link; |
| |
| /* |
| * add this link onto the tgt i_lnodes inbound i_link list |
| */ |
| i_link_next = &(tgt_lnode->link_in); |
| while (*i_link_next != NULL) { |
| ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); |
| i_link_next = &((*i_link_next)->tgt_link_next); |
| } |
| *i_link_next = i_link; |
| |
| /* |
| * add this i_link to the link hash |
| */ |
| res = mod_hash_insert(st->link_hash, i_link, i_link); |
| ASSERT(res == 0); |
| st->link_count++; |
| |
| return (LDI_USAGE_CONTINUE); |
| } |
| |
| struct i_layer_data { |
| struct di_state *st; |
| int lnode_count; |
| int link_count; |
| di_off_t lnode_off; |
| di_off_t link_off; |
| }; |
| |
| /*ARGSUSED*/ |
| static uint_t |
| i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) |
| { |
| i_link_t *i_link = (i_link_t *)key; |
| struct i_layer_data *data = arg; |
| struct di_link *me; |
| struct di_lnode *melnode; |
| struct di_node *medinode; |
| |
| ASSERT(i_link->self == 0); |
| |
| i_link->self = data->link_off + |
| (data->link_count * sizeof (struct di_link)); |
| data->link_count++; |
| |
| ASSERT(data->link_off > 0 && data->link_count > 0); |
| ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ |
| ASSERT(data->link_count <= data->st->link_count); |
| |
| /* fill in fields for the di_link snapshot */ |
| me = DI_LINK(di_mem_addr(data->st, i_link->self)); |
| me->self = i_link->self; |
| me->spec_type = i_link->spec_type; |
| |
| /* |
| * The src_lnode and tgt_lnode i_lnode_t for this i_link_t |
| * are created during the LDI table walk. Since we are |
| * walking the link hash, the lnode hash has already been |
| * walked and the lnodes have been snapshotted. Save lnode |
| * offsets. |
| */ |
| me->src_lnode = i_link->src_lnode->self; |
| me->tgt_lnode = i_link->tgt_lnode->self; |
| |
| /* |
| * Save this link's offset in the src_lnode snapshot's link_out |
| * field |
| */ |
| melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode)); |
| me->src_link_next = melnode->link_out; |
| melnode->link_out = me->self; |
| |
| /* |
| * Put this link on the tgt_lnode's link_in field |
| */ |
| melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode)); |
| me->tgt_link_next = melnode->link_in; |
| melnode->link_in = me->self; |
| |
| /* |
| * An i_lnode_t is only created if the corresponding dip exists |
| * in the snapshot. A pointer to the di_node is saved in the |
| * i_lnode_t when it is allocated. For this link, get the di_node |
| * for the source lnode. Then put the link on the di_node's list |
| * of src links |
| */ |
| medinode = i_link->src_lnode->di_node; |
| me->src_node_next = medinode->src_links; |
| medinode->src_links = me->self; |
| |
| /* |
| * Put this link on the tgt_links list of the target |
| * dip. |
| */ |
| medinode = i_link->tgt_lnode->di_node; |
| me->tgt_node_next = medinode->tgt_links; |
| medinode->tgt_links = me->self; |
| |
| return (MH_WALK_CONTINUE); |
| } |
| |
| /*ARGSUSED*/ |
| static uint_t |
| i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) |
| { |
| i_lnode_t *i_lnode = (i_lnode_t *)key; |
| struct i_layer_data *data = arg; |
| struct di_lnode *me; |
| struct di_node *medinode; |
| |
| ASSERT(i_lnode->self == 0); |
| |
| i_lnode->self = data->lnode_off + |
| (data->lnode_count * sizeof (struct di_lnode)); |
| data->lnode_count++; |
| |
| ASSERT(data->lnode_off > 0 && data->lnode_count > 0); |
| ASSERT(data->link_count == 0); /* links not done yet */ |
| ASSERT(data->lnode_count <= data->st->lnode_count); |
| |
| /* fill in fields for the di_lnode snapshot */ |
| me = DI_LNODE(di_mem_addr(data->st, i_lnode->self)); |
| me->self = i_lnode->self; |
| |
| if (i_lnode->devt == DDI_DEV_T_NONE) { |
| me->dev_major = DDI_MAJOR_T_NONE; |
| me->dev_minor = DDI_MAJOR_T_NONE; |
| } else { |
| me->dev_major = getmajor(i_lnode->devt); |
| me->dev_minor = getminor(i_lnode->devt); |
| } |
| |
| /* |
| * The dip corresponding to this lnode must exist in |
| * the snapshot or we wouldn't have created the i_lnode_t |
| * during LDI walk. Save the offset of the dip. |
| */ |
| ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); |
| me->node = i_lnode->di_node->self; |
| |
| /* |
| * There must be at least one link in or out of this lnode |
| * or we wouldn't have created it. These fields will be set |
| * during the link hash walk. |
| */ |
| ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); |
| |
| /* |
| * set the offset of the devinfo node associated with this |
| * lnode. Also update the node_next next pointer. this pointer |
| * is set if there are multiple lnodes associated with the same |
| * devinfo node. (could occure when multiple minor nodes |
| * are open for one device, etc.) |
| */ |
| medinode = i_lnode->di_node; |
| me->node_next = medinode->lnodes; |
| medinode->lnodes = me->self; |
| |
| return (MH_WALK_CONTINUE); |
| } |
| |
| static di_off_t |
| di_getlink_data(di_off_t off, struct di_state *st) |
| { |
| struct i_layer_data data = {0}; |
| size_t size; |
| |
| dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); |
| |
| st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, |
| mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, |
| i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); |
| |
| st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, |
| (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); |
| |
| /* get driver layering information */ |
| (void) ldi_usage_walker(st, di_ldi_callback); |
| |
| /* check if there is any link data to include in the snapshot */ |
| if (st->lnode_count == 0) { |
| ASSERT(st->link_count == 0); |
| goto out; |
| } |
| |
| ASSERT(st->link_count != 0); |
| |
| /* get a pointer to snapshot memory for all the di_lnodes */ |
| size = sizeof (struct di_lnode) * st->lnode_count; |
| data.lnode_off = off = di_checkmem(st, off, size); |
| off += size; |
| |
| /* get a pointer to snapshot memory for all the di_links */ |
| size = sizeof (struct di_link) * st->link_count; |
| data.link_off = off = di_checkmem(st, off, size); |
| off += size; |
| |
| data.lnode_count = data.link_count = 0; |
| data.st = st; |
| |
| /* |
| * We have lnodes and links that will go into the |
| * snapshot, so let's walk the respective hashes |
| * and snapshot them. The various linkages are |
| * also set up during the walk. |
| */ |
| mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); |
| ASSERT(data.lnode_count == st->lnode_count); |
| |
| mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); |
| ASSERT(data.link_count == st->link_count); |
| |
| out: |
| /* free up the i_lnodes and i_links used to create the snapshot */ |
| mod_hash_destroy_hash(st->lnode_hash); |
| mod_hash_destroy_hash(st->link_hash); |
| st->lnode_count = 0; |
| st->link_count = 0; |
| |
| return (off); |
| } |
| |
| |
| /* |
| * Copy all minor data nodes attached to a devinfo node into the snapshot. |
| * It is called from di_copynode with active ndi_devi_enter to protect |
| * the list of minor nodes. |
| */ |
| static di_off_t |
| di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, |
| struct di_state *st) |
| { |
| di_off_t off; |
| struct di_minor *me; |
| size_t size; |
| |
| dcmn_err2((CE_CONT, "di_getmdata:\n")); |
| |
| /* |
| * check memory first |
| */ |
| off = di_checkmem(st, *off_p, sizeof (struct di_minor)); |
| *off_p = off; |
| |
| do { |
| me = DI_MINOR(di_mem_addr(st, off)); |
| me->self = off; |
| me->type = mnode->type; |
| me->node = node; |
| me->user_private_data = NULL; |
| |
| off += sizeof (struct di_minor); |
| |
| /* |
| * Split dev_t to major/minor, so it works for |
| * both ILP32 and LP64 model |
| */ |
| me->dev_major = getmajor(mnode->ddm_dev); |
| me->dev_minor = getminor(mnode->ddm_dev); |
| me->spec_type = mnode->ddm_spec_type; |
| |
| if (mnode->ddm_name) { |
| size = strlen(mnode->ddm_name) + 1; |
| me->name = off = di_checkmem(st, off, size); |
| (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); |
| off += size; |
| } |
| |
| if (mnode->ddm_node_type) { |
| size = strlen(mnode->ddm_node_type) + 1; |
| me->node_type = off = di_checkmem(st, off, size); |
| (void) strcpy(di_mem_addr(st, off), |
| mnode->ddm_node_type); |
| off += size; |
| } |
| |
| off = di_checkmem(st, off, sizeof (struct di_minor)); |
| me->next = off; |
| mnode = mnode->next; |
| } while (mnode); |
| |
| me->next = 0; |
| |
| return (off); |
| } |
| |
| /* |
| * di_register_dip(), di_find_dip(): The dip must be protected |
| * from deallocation when using these routines - this can either |
| * be a reference count, a busy hold or a per-driver lock. |
| */ |
| |
| static void |
| di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) |
| { |
| struct dev_info *node = DEVI(dip); |
| struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); |
| struct di_dkey *dk; |
| |
| ASSERT(dip); |
| ASSERT(off > 0); |
| |
| key->k_type = DI_DKEY; |
| dk = &(key->k_u.dkey); |
| |
| dk->dk_dip = dip; |
| dk->dk_major = node->devi_major; |
| dk->dk_inst = node->devi_instance; |
| dk->dk_nodeid = node->devi_nodeid; |
| |
| if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, |
| (mod_hash_val_t)(uintptr_t)off) != 0) { |
| panic( |
| "duplicate devinfo (%p) registered during device " |
| "tree walk", (void *)dip); |
| } |
| } |
| |
| |
| static int |
| di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) |
| { |
| /* |
| * uintptr_t must be used because it matches the size of void *; |
| * mod_hash expects clients to place results into pointer-size |
| * containers; since di_off_t is always a 32-bit offset, alignment |
| * would otherwise be broken on 64-bit kernels. |
| */ |
| uintptr_t offset; |
| struct di_key key = {0}; |
| struct di_dkey *dk; |
| |
| ASSERT(st->reg_dip_hash); |
| ASSERT(dip); |
| ASSERT(off_p); |
| |
| |
| key.k_type = DI_DKEY; |
| dk = &(key.k_u.dkey); |
| |
| dk->dk_dip = dip; |
| dk->dk_major = DEVI(dip)->devi_major; |
| dk->dk_inst = DEVI(dip)->devi_instance; |
| dk->dk_nodeid = DEVI(dip)->devi_nodeid; |
| |
| if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, |
| (mod_hash_val_t *)&offset) == 0) { |
| *off_p = (di_off_t)offset; |
| return (0); |
| } else { |
| return (-1); |
| } |
| } |
| |
| /* |
| * di_register_pip(), di_find_pip(): The pip must be protected from deallocation |
| * when using these routines. The caller must do this by protecting the |
| * client(or phci)<->pip linkage while traversing the list and then holding the |
| * pip when it is found in the list. |
| */ |
| |
| static void |
| di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) |
| { |
| struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); |
| char *path_addr; |
| struct di_pkey *pk; |
| |
| ASSERT(pip); |
| ASSERT(off > 0); |
| |
| key->k_type = DI_PKEY; |
| pk = &(key->k_u.pkey); |
| |
| pk->pk_pip = pip; |
| path_addr = mdi_pi_get_addr(pip); |
| if (path_addr) |
| pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); |
| pk->pk_client = mdi_pi_get_client(pip); |
| pk->pk_phci = mdi_pi_get_phci(pip); |
| |
| if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, |
| (mod_hash_val_t)(uintptr_t)off) != 0) { |
| panic( |
| "duplicate pathinfo (%p) registered during device " |
| "tree walk", (void *)pip); |
| } |
| } |
| |
| /* |
| * As with di_register_pip, the caller must hold or lock the pip |
| */ |
| static int |
| di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) |
| { |
| /* |
| * uintptr_t must be used because it matches the size of void *; |
| * mod_hash expects clients to place results into pointer-size |
| * containers; since di_off_t is always a 32-bit offset, alignment |
| * would otherwise be broken on 64-bit kernels. |
| */ |
| uintptr_t offset; |
| struct di_key key = {0}; |
| struct di_pkey *pk; |
| |
| ASSERT(st->reg_pip_hash); |
| ASSERT(off_p); |
| |
| if (pip == NULL) { |
| *off_p = 0; |
| return (0); |
| } |
| |
| key.k_type = DI_PKEY; |
| pk = &(key.k_u.pkey); |
| |
| pk->pk_pip = pip; |
| pk->pk_path_addr = mdi_pi_get_addr(pip); |
| pk->pk_client = mdi_pi_get_client(pip); |
| pk->pk_phci = mdi_pi_get_phci(pip); |
| |
| if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, |
| (mod_hash_val_t *)&offset) == 0) { |
| *off_p = (di_off_t)offset; |
| return (0); |
| } else { |
| return (-1); |
| } |
| } |
| |
| static di_path_state_t |
| path_state_convert(mdi_pathinfo_state_t st) |
| { |
| switch (st) { |
| case MDI_PATHINFO_STATE_ONLINE: |
| return (DI_PATH_STATE_ONLINE); |
| case MDI_PATHINFO_STATE_STANDBY: |
| return (DI_PATH_STATE_STANDBY); |
| case MDI_PATHINFO_STATE_OFFLINE: |
| return (DI_PATH_STATE_OFFLINE); |
| case MDI_PATHINFO_STATE_FAULT: |
| return (DI_PATH_STATE_FAULT); |
| default: |
| return (DI_PATH_STATE_UNKNOWN); |
| } |
| } |
| |
| static uint_t |
| path_flags_convert(uint_t pi_path_flags) |
| { |
| uint_t di_path_flags = 0; |
| |
| /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */ |
| |
| if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED) |
| di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED; |
| |
| return (di_path_flags); |
| } |
| |
| |
| static di_off_t |
| di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p, |
| struct di_state *st) |
| { |
| nvpair_t *prop = NULL; |
| struct di_path_prop *me; |
| int off; |
| size_t size; |
| char *str; |
| uchar_t *buf; |
| uint_t nelems; |
| |
| off = *off_p; |
| if (mdi_pi_get_next_prop(pip, NULL) == NULL) { |
| *off_p = 0; |
| return (off); |
| } |
| |
| off = di_checkmem(st, off, sizeof (struct di_path_prop)); |
| *off_p = off; |
| |
| while (prop = mdi_pi_get_next_prop(pip, prop)) { |
| me = DI_PATHPROP(di_mem_addr(st, off)); |
| me->self = off; |
| off += sizeof (struct di_path_prop); |
| |
| /* |
| * property name |
| */ |
| size = strlen(nvpair_name(prop)) + 1; |
| me->prop_name = off = di_checkmem(st, off, size); |
| (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); |
| off += size; |
| |
| switch (nvpair_type(prop)) { |
| case DATA_TYPE_BYTE: |
| case DATA_TYPE_INT16: |
| case DATA_TYPE_UINT16: |
| case DATA_TYPE_INT32: |
| case DATA_TYPE_UINT32: |
| me->prop_type = DDI_PROP_TYPE_INT; |
| size = sizeof (int32_t); |
| off = di_checkmem(st, off, size); |
| (void) nvpair_value_int32(prop, |
| (int32_t *)di_mem_addr(st, off)); |
| break; |
| |
| case DATA_TYPE_INT64: |
| case DATA_TYPE_UINT64: |
| me->prop_type = DDI_PROP_TYPE_INT64; |
| size = sizeof (int64_t); |
| off = di_checkmem(st, off, size); |
| (void) nvpair_value_int64(prop, |
| (int64_t *)di_mem_addr(st, off)); |
| break; |
|