| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| |
| /* |
| * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
| * Copyright (c) 2017, Joyent, Inc. |
| * Copyright 2015 Garrett D'Amore <garrett@damore.org> |
| */ |
| |
| /* |
| * MAC Services Module |
| * |
| * The GLDv3 framework locking - The MAC layer |
| * -------------------------------------------- |
| * |
| * The MAC layer is central to the GLD framework and can provide the locking |
| * framework needed for itself and for the use of MAC clients. MAC end points |
| * are fairly disjoint and don't share a lot of state. So a coarse grained |
| * multi-threading scheme is to single thread all create/modify/delete or set |
| * type of control operations on a per mac end point while allowing data threads |
| * concurrently. |
| * |
| * Control operations (set) that modify a mac end point are always serialized on |
| * a per mac end point basis, We have at most 1 such thread per mac end point |
| * at a time. |
| * |
| * All other operations that are not serialized are essentially multi-threaded. |
| * For example a control operation (get) like getting statistics which may not |
| * care about reading values atomically or data threads sending or receiving |
| * data. Mostly these type of operations don't modify the control state. Any |
| * state these operations care about are protected using traditional locks. |
| * |
| * The perimeter only serializes serial operations. It does not imply there |
| * aren't any other concurrent operations. However a serialized operation may |
| * sometimes need to make sure it is the only thread. In this case it needs |
| * to use reference counting mechanisms to cv_wait until any current data |
| * threads are done. |
| * |
| * The mac layer itself does not hold any locks across a call to another layer. |
| * The perimeter is however held across a down call to the driver to make the |
| * whole control operation atomic with respect to other control operations. |
| * Also the data path and get type control operations may proceed concurrently. |
| * These operations synchronize with the single serial operation on a given mac |
| * end point using regular locks. The perimeter ensures that conflicting |
| * operations like say a mac_multicast_add and a mac_multicast_remove on the |
| * same mac end point don't interfere with each other and also ensures that the |
| * changes in the mac layer and the call to the underlying driver to say add a |
| * multicast address are done atomically without interference from a thread |
| * trying to delete the same address. |
| * |
| * For example, consider |
| * mac_multicst_add() |
| * { |
| * mac_perimeter_enter(); serialize all control operations |
| * |
| * grab list lock protect against access by data threads |
| * add to list |
| * drop list lock |
| * |
| * call driver's mi_multicst |
| * |
| * mac_perimeter_exit(); |
| * } |
| * |
| * To lessen the number of serialization locks and simplify the lock hierarchy, |
| * we serialize all the control operations on a per mac end point by using a |
| * single serialization lock called the perimeter. We allow recursive entry into |
| * the perimeter to facilitate use of this mechanism by both the mac client and |
| * the MAC layer itself. |
| * |
| * MAC client means an entity that does an operation on a mac handle |
| * obtained from a mac_open/mac_client_open. Similarly MAC driver means |
| * an entity that does an operation on a mac handle obtained from a |
| * mac_register. An entity could be both client and driver but on different |
| * handles eg. aggr. and should only make the corresponding mac interface calls |
| * i.e. mac driver interface or mac client interface as appropriate for that |
| * mac handle. |
| * |
| * General rules. |
| * ------------- |
| * |
| * R1. The lock order of upcall threads is natually opposite to downcall |
| * threads. Hence upcalls must not hold any locks across layers for fear of |
| * recursive lock enter and lock order violation. This applies to all layers. |
| * |
| * R2. The perimeter is just another lock. Since it is held in the down |
| * direction, acquiring the perimeter in an upcall is prohibited as it would |
| * cause a deadlock. This applies to all layers. |
| * |
| * Note that upcalls that need to grab the mac perimeter (for example |
| * mac_notify upcalls) can still achieve that by posting the request to a |
| * thread, which can then grab all the required perimeters and locks in the |
| * right global order. Note that in the above example the mac layer iself |
| * won't grab the mac perimeter in the mac_notify upcall, instead the upcall |
| * to the client must do that. Please see the aggr code for an example. |
| * |
| * MAC client rules |
| * ---------------- |
| * |
| * R3. A MAC client may use the MAC provided perimeter facility to serialize |
| * control operations on a per mac end point. It does this by by acquring |
| * and holding the perimeter across a sequence of calls to the mac layer. |
| * This ensures atomicity across the entire block of mac calls. In this |
| * model the MAC client must not hold any client locks across the calls to |
| * the mac layer. This model is the preferred solution. |
| * |
| * R4. However if a MAC client has a lot of global state across all mac end |
| * points the per mac end point serialization may not be sufficient. In this |
| * case the client may choose to use global locks or use its own serialization. |
| * To avoid deadlocks, these client layer locks held across the mac calls |
| * in the control path must never be acquired by the data path for the reason |
| * mentioned below. |
| * |
| * (Assume that a control operation that holds a client lock blocks in the |
| * mac layer waiting for upcall reference counts to drop to zero. If an upcall |
| * data thread that holds this reference count, tries to acquire the same |
| * client lock subsequently it will deadlock). |
| * |
| * A MAC client may follow either the R3 model or the R4 model, but can't |
| * mix both. In the former, the hierarchy is Perim -> client locks, but in |
| * the latter it is client locks -> Perim. |
| * |
| * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able |
| * context since they may block while trying to acquire the perimeter. |
| * In addition some calls may block waiting for upcall refcnts to come down to |
| * zero. |
| * |
| * R6. MAC clients must make sure that they are single threaded and all threads |
| * from the top (in particular data threads) have finished before calling |
| * mac_client_close. The MAC framework does not track the number of client |
| * threads using the mac client handle. Also mac clients must make sure |
| * they have undone all the control operations before calling mac_client_close. |
| * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding |
| * mac_unicast_add/mac_multicast_add. |
| * |
| * MAC framework rules |
| * ------------------- |
| * |
| * R7. The mac layer itself must not hold any mac layer locks (except the mac |
| * perimeter) across a call to any other layer from the mac layer. The call to |
| * any other layer could be via mi_* entry points, classifier entry points into |
| * the driver or via upcall pointers into layers above. The mac perimeter may |
| * be acquired or held only in the down direction, for e.g. when calling into |
| * a mi_* driver enty point to provide atomicity of the operation. |
| * |
| * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across |
| * mac driver interfaces, the MAC layer must provide a cut out for control |
| * interfaces like upcall notifications and start them in a separate thread. |
| * |
| * R9. Note that locking order also implies a plumbing order. For example |
| * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt |
| * to plumb in any other order must be failed at mac_open time, otherwise it |
| * could lead to deadlocks due to inverse locking order. |
| * |
| * R10. MAC driver interfaces must not block since the driver could call them |
| * in interrupt context. |
| * |
| * R11. Walkers must preferably not hold any locks while calling walker |
| * callbacks. Instead these can operate on reference counts. In simple |
| * callbacks it may be ok to hold a lock and call the callbacks, but this is |
| * harder to maintain in the general case of arbitrary callbacks. |
| * |
| * R12. The MAC layer must protect upcall notification callbacks using reference |
| * counts rather than holding locks across the callbacks. |
| * |
| * R13. Given the variety of drivers, it is preferable if the MAC layer can make |
| * sure that any pointers (such as mac ring pointers) it passes to the driver |
| * remain valid until mac unregister time. Currently the mac layer achieves |
| * this by using generation numbers for rings and freeing the mac rings only |
| * at unregister time. The MAC layer must provide a layer of indirection and |
| * must not expose underlying driver rings or driver data structures/pointers |
| * directly to MAC clients. |
| * |
| * MAC driver rules |
| * ---------------- |
| * |
| * R14. It would be preferable if MAC drivers don't hold any locks across any |
| * mac call. However at a minimum they must not hold any locks across data |
| * upcalls. They must also make sure that all references to mac data structures |
| * are cleaned up and that it is single threaded at mac_unregister time. |
| * |
| * R15. MAC driver interfaces don't block and so the action may be done |
| * asynchronously in a separate thread as for example handling notifications. |
| * The driver must not assume that the action is complete when the call |
| * returns. |
| * |
| * R16. Drivers must maintain a generation number per Rx ring, and pass it |
| * back to mac_rx_ring(); They are expected to increment the generation |
| * number whenever the ring's stop routine is invoked. |
| * See comments in mac_rx_ring(); |
| * |
| * R17 Similarly mi_stop is another synchronization point and the driver must |
| * ensure that all upcalls are done and there won't be any future upcall |
| * before returning from mi_stop. |
| * |
| * R18. The driver may assume that all set/modify control operations via |
| * the mi_* entry points are single threaded on a per mac end point. |
| * |
| * Lock and Perimeter hierarchy scenarios |
| * --------------------------------------- |
| * |
| * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] |
| * |
| * ft_lock -> fe_lock [mac_flow_lookup] |
| * |
| * mi_rw_lock -> fe_lock [mac_bcast_send] |
| * |
| * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] |
| * |
| * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] |
| * |
| * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] |
| * |
| * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac |
| * client to driver. In the case of clients that explictly use the mac provided |
| * perimeter mechanism for its serialization, the hierarchy is |
| * Perimeter -> mac layer locks, since the client never holds any locks across |
| * the mac calls. In the case of clients that use its own locks the hierarchy |
| * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly |
| * calls mac_perim_enter/exit in this case. |
| * |
| * Subflow creation rules |
| * --------------------------- |
| * o In case of a user specified cpulist present on underlying link and flows, |
| * the flows cpulist must be a subset of the underlying link. |
| * o In case of a user specified fanout mode present on link and flow, the |
| * subflow fanout count has to be less than or equal to that of the |
| * underlying link. The cpu-bindings for the subflows will be a subset of |
| * the underlying link. |
| * o In case if no cpulist specified on both underlying link and flow, the |
| * underlying link relies on a MAC tunable to provide out of box fanout. |
| * The subflow will have no cpulist (the subflow will be unbound) |
| * o In case if no cpulist is specified on the underlying link, a subflow can |
| * carry either a user-specified cpulist or fanout count. The cpu-bindings |
| * for the subflow will not adhere to restriction that they need to be subset |
| * of the underlying link. |
| * o In case where the underlying link is carrying either a user specified |
| * cpulist or fanout mode and for a unspecified subflow, the subflow will be |
| * created unbound. |
| * o While creating unbound subflows, bandwidth mode changes attempt to |
| * figure a right fanout count. In such cases the fanout count will override |
| * the unbound cpu-binding behavior. |
| * o In addition to this, while cycling between flow and link properties, we |
| * impose a restriction that if a link property has a subflow with |
| * user-specified attributes, we will not allow changing the link property. |
| * The administrator needs to reset all the user specified properties for the |
| * subflows before attempting a link property change. |
| * Some of the above rules can be overridden by specifying additional command |
| * line options while creating or modifying link or subflow properties. |
| * |
| * Datapath |
| * -------- |
| * |
| * For information on the datapath, the world of soft rings, hardware rings, how |
| * it is structured, and the path of an mblk_t between a driver and a mac |
| * client, see mac_sched.c. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/conf.h> |
| #include <sys/id_space.h> |
| #include <sys/esunddi.h> |
| #include <sys/stat.h> |
| #include <sys/mkdev.h> |
| #include <sys/stream.h> |
| #include <sys/strsun.h> |
| #include <sys/strsubr.h> |
| #include <sys/dlpi.h> |
| #include <sys/list.h> |
| #include <sys/modhash.h> |
| #include <sys/mac_provider.h> |
| #include <sys/mac_client_impl.h> |
| #include <sys/mac_soft_ring.h> |
| #include <sys/mac_stat.h> |
| #include <sys/mac_impl.h> |
| #include <sys/mac.h> |
| #include <sys/dls.h> |
| #include <sys/dld.h> |
| #include <sys/modctl.h> |
| #include <sys/fs/dv_node.h> |
| #include <sys/thread.h> |
| #include <sys/proc.h> |
| #include <sys/callb.h> |
| #include <sys/cpuvar.h> |
| #include <sys/atomic.h> |
| #include <sys/bitmap.h> |
| #include <sys/sdt.h> |
| #include <sys/mac_flow.h> |
| #include <sys/ddi_intr_impl.h> |
| #include <sys/disp.h> |
| #include <sys/sdt.h> |
| #include <sys/vnic.h> |
| #include <sys/vnic_impl.h> |
| #include <sys/vlan.h> |
| #include <inet/ip.h> |
| #include <inet/ip6.h> |
| #include <sys/exacct.h> |
| #include <sys/exacct_impl.h> |
| #include <inet/nd.h> |
| #include <sys/ethernet.h> |
| #include <sys/pool.h> |
| #include <sys/pool_pset.h> |
| #include <sys/cpupart.h> |
| #include <inet/wifi_ioctl.h> |
| #include <net/wpa.h> |
| |
| #define IMPL_HASHSZ 67 /* prime */ |
| |
| kmem_cache_t *i_mac_impl_cachep; |
| mod_hash_t *i_mac_impl_hash; |
| krwlock_t i_mac_impl_lock; |
| uint_t i_mac_impl_count; |
| static kmem_cache_t *mac_ring_cache; |
| static id_space_t *minor_ids; |
| static uint32_t minor_count; |
| static pool_event_cb_t mac_pool_event_reg; |
| |
| /* |
| * Logging stuff. Perhaps mac_logging_interval could be broken into |
| * mac_flow_log_interval and mac_link_log_interval if we want to be |
| * able to schedule them differently. |
| */ |
| uint_t mac_logging_interval; |
| boolean_t mac_flow_log_enable; |
| boolean_t mac_link_log_enable; |
| timeout_id_t mac_logging_timer; |
| |
| #define MACTYPE_KMODDIR "mac" |
| #define MACTYPE_HASHSZ 67 |
| static mod_hash_t *i_mactype_hash; |
| /* |
| * i_mactype_lock synchronizes threads that obtain references to mactype_t |
| * structures through i_mactype_getplugin(). |
| */ |
| static kmutex_t i_mactype_lock; |
| |
| /* |
| * mac_tx_percpu_cnt |
| * |
| * Number of per cpu locks per mac_client_impl_t. Used by the transmit side |
| * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. |
| * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. |
| * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. |
| */ |
| int mac_tx_percpu_cnt; |
| int mac_tx_percpu_cnt_max = 128; |
| |
| /* |
| * Call back functions for the bridge module. These are guaranteed to be valid |
| * when holding a reference on a link or when holding mip->mi_bridge_lock and |
| * mi_bridge_link is non-NULL. |
| */ |
| mac_bridge_tx_t mac_bridge_tx_cb; |
| mac_bridge_rx_t mac_bridge_rx_cb; |
| mac_bridge_ref_t mac_bridge_ref_cb; |
| mac_bridge_ls_t mac_bridge_ls_cb; |
| |
| static int i_mac_constructor(void *, void *, int); |
| static void i_mac_destructor(void *, void *); |
| static int i_mac_ring_ctor(void *, void *, int); |
| static void i_mac_ring_dtor(void *, void *); |
| static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); |
| void mac_tx_client_flush(mac_client_impl_t *); |
| void mac_tx_client_block(mac_client_impl_t *); |
| static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); |
| static int mac_start_group_and_rings(mac_group_t *); |
| static void mac_stop_group_and_rings(mac_group_t *); |
| static void mac_pool_event_cb(pool_event_t, int, void *); |
| |
| typedef struct netinfo_s { |
| list_node_t ni_link; |
| void *ni_record; |
| int ni_size; |
| int ni_type; |
| } netinfo_t; |
| |
| /* |
| * Module initialization functions. |
| */ |
| |
| void |
| mac_init(void) |
| { |
| mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : |
| boot_max_ncpus); |
| |
| /* Upper bound is mac_tx_percpu_cnt_max */ |
| if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) |
| mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; |
| |
| if (mac_tx_percpu_cnt < 1) { |
| /* Someone set max_tx_percpu_cnt_max to 0 or less */ |
| mac_tx_percpu_cnt = 1; |
| } |
| |
| ASSERT(mac_tx_percpu_cnt >= 1); |
| mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); |
| /* |
| * Make it of the form 2**N - 1 in the range |
| * [0 .. mac_tx_percpu_cnt_max - 1] |
| */ |
| mac_tx_percpu_cnt--; |
| |
| i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", |
| sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, |
| NULL, NULL, NULL, 0); |
| ASSERT(i_mac_impl_cachep != NULL); |
| |
| mac_ring_cache = kmem_cache_create("mac_ring_cache", |
| sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, |
| NULL, NULL, 0); |
| ASSERT(mac_ring_cache != NULL); |
| |
| i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", |
| IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, |
| mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); |
| rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); |
| |
| mac_flow_init(); |
| mac_soft_ring_init(); |
| mac_bcast_init(); |
| mac_client_init(); |
| |
| i_mac_impl_count = 0; |
| |
| i_mactype_hash = mod_hash_create_extended("mactype_hash", |
| MACTYPE_HASHSZ, |
| mod_hash_null_keydtor, mod_hash_null_valdtor, |
| mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); |
| |
| /* |
| * Allocate an id space to manage minor numbers. The range of the |
| * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This |
| * leaves half of the 32-bit minors available for driver private use. |
| */ |
| minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, |
| MAC_PRIVATE_MINOR-1); |
| ASSERT(minor_ids != NULL); |
| minor_count = 0; |
| |
| /* Let's default to 20 seconds */ |
| mac_logging_interval = 20; |
| mac_flow_log_enable = B_FALSE; |
| mac_link_log_enable = B_FALSE; |
| mac_logging_timer = 0; |
| |
| /* Register to be notified of noteworthy pools events */ |
| mac_pool_event_reg.pec_func = mac_pool_event_cb; |
| mac_pool_event_reg.pec_arg = NULL; |
| pool_event_cb_register(&mac_pool_event_reg); |
| } |
| |
| int |
| mac_fini(void) |
| { |
| |
| if (i_mac_impl_count > 0 || minor_count > 0) |
| return (EBUSY); |
| |
| pool_event_cb_unregister(&mac_pool_event_reg); |
| |
| id_space_destroy(minor_ids); |
| mac_flow_fini(); |
| |
| mod_hash_destroy_hash(i_mac_impl_hash); |
| rw_destroy(&i_mac_impl_lock); |
| |
| mac_client_fini(); |
| kmem_cache_destroy(mac_ring_cache); |
| |
| mod_hash_destroy_hash(i_mactype_hash); |
| mac_soft_ring_finish(); |
| |
| |
| return (0); |
| } |
| |
| /* |
| * Initialize a GLDv3 driver's device ops. A driver that manages its own ops |
| * (e.g. softmac) may pass in a NULL ops argument. |
| */ |
| void |
| mac_init_ops(struct dev_ops *ops, const char *name) |
| { |
| major_t major = ddi_name_to_major((char *)name); |
| |
| /* |
| * By returning on error below, we are not letting the driver continue |
| * in an undefined context. The mac_register() function will faill if |
| * DN_GLDV3_DRIVER isn't set. |
| */ |
| if (major == DDI_MAJOR_T_NONE) |
| return; |
| LOCK_DEV_OPS(&devnamesp[major].dn_lock); |
| devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); |
| UNLOCK_DEV_OPS(&devnamesp[major].dn_lock); |
| if (ops != NULL) |
| dld_init_ops(ops, name); |
| } |
| |
| void |
| mac_fini_ops(struct dev_ops *ops) |
| { |
| dld_fini_ops(ops); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| i_mac_constructor(void *buf, void *arg, int kmflag) |
| { |
| mac_impl_t *mip = buf; |
| |
| bzero(buf, sizeof (mac_impl_t)); |
| |
| mip->mi_linkstate = LINK_STATE_UNKNOWN; |
| |
| rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); |
| mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); |
| mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); |
| mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); |
| |
| mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; |
| cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); |
| mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; |
| cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); |
| |
| mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL); |
| |
| return (0); |
| } |
| |
| /*ARGSUSED*/ |
| static void |
| i_mac_destructor(void *buf, void *arg) |
| { |
| mac_impl_t *mip = buf; |
| mac_cb_info_t *mcbi; |
| |
| ASSERT(mip->mi_ref == 0); |
| ASSERT(mip->mi_active == 0); |
| ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); |
| ASSERT(mip->mi_devpromisc == 0); |
| ASSERT(mip->mi_ksp == NULL); |
| ASSERT(mip->mi_kstat_count == 0); |
| ASSERT(mip->mi_nclients == 0); |
| ASSERT(mip->mi_nactiveclients == 0); |
| ASSERT(mip->mi_single_active_client == NULL); |
| ASSERT(mip->mi_state_flags == 0); |
| ASSERT(mip->mi_factory_addr == NULL); |
| ASSERT(mip->mi_factory_addr_num == 0); |
| ASSERT(mip->mi_default_tx_ring == NULL); |
| |
| mcbi = &mip->mi_notify_cb_info; |
| ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); |
| ASSERT(mip->mi_notify_bits == 0); |
| ASSERT(mip->mi_notify_thread == NULL); |
| ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); |
| mcbi->mcbi_lockp = NULL; |
| |
| mcbi = &mip->mi_promisc_cb_info; |
| ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); |
| ASSERT(mip->mi_promisc_list == NULL); |
| ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); |
| mcbi->mcbi_lockp = NULL; |
| |
| ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); |
| ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); |
| |
| rw_destroy(&mip->mi_rw_lock); |
| |
| mutex_destroy(&mip->mi_promisc_lock); |
| cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); |
| mutex_destroy(&mip->mi_notify_lock); |
| cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); |
| mutex_destroy(&mip->mi_ring_lock); |
| |
| ASSERT(mip->mi_bridge_link == NULL); |
| } |
| |
| /* ARGSUSED */ |
| static int |
| i_mac_ring_ctor(void *buf, void *arg, int kmflag) |
| { |
| mac_ring_t *ring = (mac_ring_t *)buf; |
| |
| bzero(ring, sizeof (mac_ring_t)); |
| cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); |
| mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); |
| ring->mr_state = MR_FREE; |
| return (0); |
| } |
| |
| /* ARGSUSED */ |
| static void |
| i_mac_ring_dtor(void *buf, void *arg) |
| { |
| mac_ring_t *ring = (mac_ring_t *)buf; |
| |
| cv_destroy(&ring->mr_cv); |
| mutex_destroy(&ring->mr_lock); |
| } |
| |
| /* |
| * Common functions to do mac callback addition and deletion. Currently this is |
| * used by promisc callbacks and notify callbacks. List addition and deletion |
| * need to take care of list walkers. List walkers in general, can't hold list |
| * locks and make upcall callbacks due to potential lock order and recursive |
| * reentry issues. Instead list walkers increment the list walker count to mark |
| * the presence of a walker thread. Addition can be carefully done to ensure |
| * that the list walker always sees either the old list or the new list. |
| * However the deletion can't be done while the walker is active, instead the |
| * deleting thread simply marks the entry as logically deleted. The last walker |
| * physically deletes and frees up the logically deleted entries when the walk |
| * is complete. |
| */ |
| void |
| mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, |
| mac_cb_t *mcb_elem) |
| { |
| mac_cb_t *p; |
| mac_cb_t **pp; |
| |
| /* Verify it is not already in the list */ |
| for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { |
| if (p == mcb_elem) |
| break; |
| } |
| VERIFY(p == NULL); |
| |
| /* |
| * Add it to the head of the callback list. The membar ensures that |
| * the following list pointer manipulations reach global visibility |
| * in exactly the program order below. |
| */ |
| ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); |
| |
| mcb_elem->mcb_nextp = *mcb_head; |
| membar_producer(); |
| *mcb_head = mcb_elem; |
| } |
| |
| /* |
| * Mark the entry as logically deleted. If there aren't any walkers unlink |
| * from the list. In either case return the corresponding status. |
| */ |
| boolean_t |
| mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, |
| mac_cb_t *mcb_elem) |
| { |
| mac_cb_t *p; |
| mac_cb_t **pp; |
| |
| ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); |
| /* |
| * Search the callback list for the entry to be removed |
| */ |
| for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { |
| if (p == mcb_elem) |
| break; |
| } |
| VERIFY(p != NULL); |
| |
| /* |
| * If there are walkers just mark it as deleted and the last walker |
| * will remove from the list and free it. |
| */ |
| if (mcbi->mcbi_walker_cnt != 0) { |
| p->mcb_flags |= MCB_CONDEMNED; |
| mcbi->mcbi_del_cnt++; |
| return (B_FALSE); |
| } |
| |
| ASSERT(mcbi->mcbi_del_cnt == 0); |
| *pp = p->mcb_nextp; |
| p->mcb_nextp = NULL; |
| return (B_TRUE); |
| } |
| |
| /* |
| * Wait for all pending callback removals to be completed |
| */ |
| void |
| mac_callback_remove_wait(mac_cb_info_t *mcbi) |
| { |
| ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); |
| while (mcbi->mcbi_del_cnt != 0) { |
| DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); |
| cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); |
| } |
| } |
| |
| /* |
| * The last mac callback walker does the cleanup. Walk the list and unlik |
| * all the logically deleted entries and construct a temporary list of |
| * removed entries. Return the list of removed entries to the caller. |
| */ |
| mac_cb_t * |
| mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) |
| { |
| mac_cb_t *p; |
| mac_cb_t **pp; |
| mac_cb_t *rmlist = NULL; /* List of removed elements */ |
| int cnt = 0; |
| |
| ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); |
| ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); |
| |
| pp = mcb_head; |
| while (*pp != NULL) { |
| if ((*pp)->mcb_flags & MCB_CONDEMNED) { |
| p = *pp; |
| *pp = p->mcb_nextp; |
| p->mcb_nextp = rmlist; |
| rmlist = p; |
| cnt++; |
| continue; |
| } |
| pp = &(*pp)->mcb_nextp; |
| } |
| |
| ASSERT(mcbi->mcbi_del_cnt == cnt); |
| mcbi->mcbi_del_cnt = 0; |
| return (rmlist); |
| } |
| |
| boolean_t |
| mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) |
| { |
| mac_cb_t *mcb; |
| |
| /* Verify it is not already in the list */ |
| for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { |
| if (mcb == mcb_elem) |
| return (B_TRUE); |
| } |
| |
| return (B_FALSE); |
| } |
| |
| boolean_t |
| mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) |
| { |
| boolean_t found; |
| |
| mutex_enter(mcbi->mcbi_lockp); |
| found = mac_callback_lookup(mcb_headp, mcb_elem); |
| mutex_exit(mcbi->mcbi_lockp); |
| |
| return (found); |
| } |
| |
| /* Free the list of removed callbacks */ |
| void |
| mac_callback_free(mac_cb_t *rmlist) |
| { |
| mac_cb_t *mcb; |
| mac_cb_t *mcb_next; |
| |
| for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { |
| mcb_next = mcb->mcb_nextp; |
| kmem_free(mcb->mcb_objp, mcb->mcb_objsize); |
| } |
| } |
| |
| /* |
| * The promisc callbacks are in 2 lists, one off the 'mip' and another off the |
| * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there |
| * is only a single shared total walker count, and an entry can't be physically |
| * unlinked if a walker is active on either list. The last walker does this |
| * cleanup of logically deleted entries. |
| */ |
| void |
| i_mac_promisc_walker_cleanup(mac_impl_t *mip) |
| { |
| mac_cb_t *rmlist; |
| mac_cb_t *mcb; |
| mac_cb_t *mcb_next; |
| mac_promisc_impl_t *mpip; |
| |
| /* |
| * Construct a temporary list of deleted callbacks by walking the |
| * the mi_promisc_list. Then for each entry in the temporary list, |
| * remove it from the mci_promisc_list and free the entry. |
| */ |
| rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, |
| &mip->mi_promisc_list); |
| |
| for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { |
| mcb_next = mcb->mcb_nextp; |
| mpip = (mac_promisc_impl_t *)mcb->mcb_objp; |
| VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, |
| &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); |
| mcb->mcb_flags = 0; |
| mcb->mcb_nextp = NULL; |
| kmem_cache_free(mac_promisc_impl_cache, mpip); |
| } |
| } |
| |
| void |
| i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) |
| { |
| mac_cb_info_t *mcbi; |
| |
| /* |
| * Signal the notify thread even after mi_ref has become zero and |
| * mi_disabled is set. The synchronization with the notify thread |
| * happens in mac_unregister and that implies the driver must make |
| * sure it is single-threaded (with respect to mac calls) and that |
| * all pending mac calls have returned before it calls mac_unregister |
| */ |
| rw_enter(&i_mac_impl_lock, RW_READER); |
| if (mip->mi_state_flags & MIS_DISABLED) |
| goto exit; |
| |
| /* |
| * Guard against incorrect notifications. (Running a newer |
| * mac client against an older implementation?) |
| */ |
| if (type >= MAC_NNOTE) |
| goto exit; |
| |
| mcbi = &mip->mi_notify_cb_info; |
| mutex_enter(mcbi->mcbi_lockp); |
| mip->mi_notify_bits |= (1 << type); |
| cv_broadcast(&mcbi->mcbi_cv); |
| mutex_exit(mcbi->mcbi_lockp); |
| |
| exit: |
| rw_exit(&i_mac_impl_lock); |
| } |
| |
| /* |
| * Mac serialization primitives. Please see the block comment at the |
| * top of the file. |
| */ |
| void |
| i_mac_perim_enter(mac_impl_t *mip) |
| { |
| mac_client_impl_t *mcip; |
| |
| if (mip->mi_state_flags & MIS_IS_VNIC) { |
| /* |
| * This is a VNIC. Return the lower mac since that is what |
| * we want to serialize on. |
| */ |
| mcip = mac_vnic_lower(mip); |
| mip = mcip->mci_mip; |
| } |
| |
| mutex_enter(&mip->mi_perim_lock); |
| if (mip->mi_perim_owner == curthread) { |
| mip->mi_perim_ocnt++; |
| mutex_exit(&mip->mi_perim_lock); |
| return; |
| } |
| |
| while (mip->mi_perim_owner != NULL) |
| cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); |
| |
| mip->mi_perim_owner = curthread; |
| ASSERT(mip->mi_perim_ocnt == 0); |
| mip->mi_perim_ocnt++; |
| #ifdef DEBUG |
| mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, |
| MAC_PERIM_STACK_DEPTH); |
| #endif |
| mutex_exit(&mip->mi_perim_lock); |
| } |
| |
| int |
| i_mac_perim_enter_nowait(mac_impl_t *mip) |
| { |
| /* |
| * The vnic is a special case, since the serialization is done based |
| * on the lower mac. If the lower mac is busy, it does not imply the |
| * vnic can't be unregistered. But in the case of other drivers, |
| * a busy perimeter or open mac handles implies that the mac is busy |
| * and can't be unregistered. |
| */ |
| if (mip->mi_state_flags & MIS_IS_VNIC) { |
| i_mac_perim_enter(mip); |
| return (0); |
| } |
| |
| mutex_enter(&mip->mi_perim_lock); |
| if (mip->mi_perim_owner != NULL) { |
| mutex_exit(&mip->mi_perim_lock); |
| return (EBUSY); |
| } |
| ASSERT(mip->mi_perim_ocnt == 0); |
| mip->mi_perim_owner = curthread; |
| mip->mi_perim_ocnt++; |
| mutex_exit(&mip->mi_perim_lock); |
| |
| return (0); |
| } |
| |
| void |
| i_mac_perim_exit(mac_impl_t *mip) |
| { |
| mac_client_impl_t *mcip; |
| |
| if (mip->mi_state_flags & MIS_IS_VNIC) { |
| /* |
| * This is a VNIC. Return the lower mac since that is what |
| * we want to serialize on. |
| */ |
| mcip = mac_vnic_lower(mip); |
| mip = mcip->mci_mip; |
| } |
| |
| ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); |
| |
| mutex_enter(&mip->mi_perim_lock); |
| if (--mip->mi_perim_ocnt == 0) { |
| mip->mi_perim_owner = NULL; |
| cv_signal(&mip->mi_perim_cv); |
| } |
| mutex_exit(&mip->mi_perim_lock); |
| } |
| |
| /* |
| * Returns whether the current thread holds the mac perimeter. Used in making |
| * assertions. |
| */ |
| boolean_t |
| mac_perim_held(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_client_impl_t *mcip; |
| |
| if (mip->mi_state_flags & MIS_IS_VNIC) { |
| /* |
| * This is a VNIC. Return the lower mac since that is what |
| * we want to serialize on. |
| */ |
| mcip = mac_vnic_lower(mip); |
| mip = mcip->mci_mip; |
| } |
| return (mip->mi_perim_owner == curthread); |
| } |
| |
| /* |
| * mac client interfaces to enter the mac perimeter of a mac end point, given |
| * its mac handle, or macname or linkid. |
| */ |
| void |
| mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| i_mac_perim_enter(mip); |
| /* |
| * The mac_perim_handle_t returned encodes the 'mip' and whether a |
| * mac_open has been done internally while entering the perimeter. |
| * This information is used in mac_perim_exit |
| */ |
| MAC_ENCODE_MPH(*mphp, mip, 0); |
| } |
| |
| int |
| mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) |
| { |
| int err; |
| mac_handle_t mh; |
| |
| if ((err = mac_open(name, &mh)) != 0) |
| return (err); |
| |
| mac_perim_enter_by_mh(mh, mphp); |
| MAC_ENCODE_MPH(*mphp, mh, 1); |
| return (0); |
| } |
| |
| int |
| mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) |
| { |
| int err; |
| mac_handle_t mh; |
| |
| if ((err = mac_open_by_linkid(linkid, &mh)) != 0) |
| return (err); |
| |
| mac_perim_enter_by_mh(mh, mphp); |
| MAC_ENCODE_MPH(*mphp, mh, 1); |
| return (0); |
| } |
| |
| void |
| mac_perim_exit(mac_perim_handle_t mph) |
| { |
| mac_impl_t *mip; |
| boolean_t need_close; |
| |
| MAC_DECODE_MPH(mph, mip, need_close); |
| i_mac_perim_exit(mip); |
| if (need_close) |
| mac_close((mac_handle_t)mip); |
| } |
| |
| int |
| mac_hold(const char *macname, mac_impl_t **pmip) |
| { |
| mac_impl_t *mip; |
| int err; |
| |
| /* |
| * Check the device name length to make sure it won't overflow our |
| * buffer. |
| */ |
| if (strlen(macname) >= MAXNAMELEN) |
| return (EINVAL); |
| |
| /* |
| * Look up its entry in the global hash table. |
| */ |
| rw_enter(&i_mac_impl_lock, RW_WRITER); |
| err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, |
| (mod_hash_val_t *)&mip); |
| |
| if (err != 0) { |
| rw_exit(&i_mac_impl_lock); |
| return (ENOENT); |
| } |
| |
| if (mip->mi_state_flags & MIS_DISABLED) { |
| rw_exit(&i_mac_impl_lock); |
| return (ENOENT); |
| } |
| |
| if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { |
| rw_exit(&i_mac_impl_lock); |
| return (EBUSY); |
| } |
| |
| mip->mi_ref++; |
| rw_exit(&i_mac_impl_lock); |
| |
| *pmip = mip; |
| return (0); |
| } |
| |
| void |
| mac_rele(mac_impl_t *mip) |
| { |
| rw_enter(&i_mac_impl_lock, RW_WRITER); |
| ASSERT(mip->mi_ref != 0); |
| if (--mip->mi_ref == 0) { |
| ASSERT(mip->mi_nactiveclients == 0 && |
| !(mip->mi_state_flags & MIS_EXCLUSIVE)); |
| } |
| rw_exit(&i_mac_impl_lock); |
| } |
| |
| /* |
| * Private GLDv3 function to start a MAC instance. |
| */ |
| int |
| mac_start(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| int err = 0; |
| mac_group_t *defgrp; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); |
| ASSERT(mip->mi_start != NULL); |
| |
| /* |
| * Check whether the device is already started. |
| */ |
| if (mip->mi_active++ == 0) { |
| mac_ring_t *ring = NULL; |
| |
| /* |
| * Start the device. |
| */ |
| err = mip->mi_start(mip->mi_driver); |
| if (err != 0) { |
| mip->mi_active--; |
| return (err); |
| } |
| |
| /* |
| * Start the default tx ring. |
| */ |
| if (mip->mi_default_tx_ring != NULL) { |
| |
| ring = (mac_ring_t *)mip->mi_default_tx_ring; |
| if (ring->mr_state != MR_INUSE) { |
| err = mac_start_ring(ring); |
| if (err != 0) { |
| mip->mi_active--; |
| return (err); |
| } |
| } |
| } |
| |
| if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { |
| /* |
| * Start the default ring, since it will be needed |
| * to receive broadcast and multicast traffic for |
| * both primary and non-primary MAC clients. |
| */ |
| ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); |
| err = mac_start_group_and_rings(defgrp); |
| if (err != 0) { |
| mip->mi_active--; |
| if ((ring != NULL) && |
| (ring->mr_state == MR_INUSE)) |
| mac_stop_ring(ring); |
| return (err); |
| } |
| mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); |
| } |
| } |
| |
| return (err); |
| } |
| |
| /* |
| * Private GLDv3 function to stop a MAC instance. |
| */ |
| void |
| mac_stop(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_group_t *grp; |
| |
| ASSERT(mip->mi_stop != NULL); |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); |
| |
| /* |
| * Check whether the device is still needed. |
| */ |
| ASSERT(mip->mi_active != 0); |
| if (--mip->mi_active == 0) { |
| if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { |
| /* |
| * There should be no more active clients since the |
| * MAC is being stopped. Stop the default RX group |
| * and transition it back to registered state. |
| * |
| * When clients are torn down, the groups |
| * are release via mac_release_rx_group which |
| * knows the the default group is always in |
| * started mode since broadcast uses it. So |
| * we can assert that their are no clients |
| * (since mac_bcast_add doesn't register itself |
| * as a client) and group is in SHARED state. |
| */ |
| ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); |
| ASSERT(MAC_GROUP_NO_CLIENT(grp) && |
| mip->mi_nactiveclients == 0); |
| mac_stop_group_and_rings(grp); |
| mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); |
| } |
| |
| if (mip->mi_default_tx_ring != NULL) { |
| mac_ring_t *ring; |
| |
| ring = (mac_ring_t *)mip->mi_default_tx_ring; |
| if (ring->mr_state == MR_INUSE) { |
| mac_stop_ring(ring); |
| ring->mr_flag = 0; |
| } |
| } |
| |
| /* |
| * Stop the device. |
| */ |
| mip->mi_stop(mip->mi_driver); |
| } |
| } |
| |
| int |
| i_mac_promisc_set(mac_impl_t *mip, boolean_t on) |
| { |
| int err = 0; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); |
| ASSERT(mip->mi_setpromisc != NULL); |
| |
| if (on) { |
| /* |
| * Enable promiscuous mode on the device if not yet enabled. |
| */ |
| if (mip->mi_devpromisc++ == 0) { |
| err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); |
| if (err != 0) { |
| mip->mi_devpromisc--; |
| return (err); |
| } |
| i_mac_notify(mip, MAC_NOTE_DEVPROMISC); |
| } |
| } else { |
| if (mip->mi_devpromisc == 0) |
| return (EPROTO); |
| |
| /* |
| * Disable promiscuous mode on the device if this is the last |
| * enabling. |
| */ |
| if (--mip->mi_devpromisc == 0) { |
| err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); |
| if (err != 0) { |
| mip->mi_devpromisc++; |
| return (err); |
| } |
| i_mac_notify(mip, MAC_NOTE_DEVPROMISC); |
| } |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * The promiscuity state can change any time. If the caller needs to take |
| * actions that are atomic with the promiscuity state, then the caller needs |
| * to bracket the entire sequence with mac_perim_enter/exit |
| */ |
| boolean_t |
| mac_promisc_get(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| /* |
| * Return the current promiscuity. |
| */ |
| return (mip->mi_devpromisc != 0); |
| } |
| |
| /* |
| * Invoked at MAC instance attach time to initialize the list |
| * of factory MAC addresses supported by a MAC instance. This function |
| * builds a local cache in the mac_impl_t for the MAC addresses |
| * supported by the underlying hardware. The MAC clients themselves |
| * use the mac_addr_factory*() functions to query and reserve |
| * factory MAC addresses. |
| */ |
| void |
| mac_addr_factory_init(mac_impl_t *mip) |
| { |
| mac_capab_multifactaddr_t capab; |
| uint8_t *addr; |
| int i; |
| |
| /* |
| * First round to see how many factory MAC addresses are available. |
| */ |
| bzero(&capab, sizeof (capab)); |
| if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, |
| &capab) || (capab.mcm_naddr == 0)) { |
| /* |
| * The MAC instance doesn't support multiple factory |
| * MAC addresses, we're done here. |
| */ |
| return; |
| } |
| |
| /* |
| * Allocate the space and get all the factory addresses. |
| */ |
| addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); |
| capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); |
| |
| mip->mi_factory_addr_num = capab.mcm_naddr; |
| mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * |
| sizeof (mac_factory_addr_t), KM_SLEEP); |
| |
| for (i = 0; i < capab.mcm_naddr; i++) { |
| bcopy(addr + i * MAXMACADDRLEN, |
| mip->mi_factory_addr[i].mfa_addr, |
| mip->mi_type->mt_addr_length); |
| mip->mi_factory_addr[i].mfa_in_use = B_FALSE; |
| } |
| |
| kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); |
| } |
| |
| void |
| mac_addr_factory_fini(mac_impl_t *mip) |
| { |
| if (mip->mi_factory_addr == NULL) { |
| ASSERT(mip->mi_factory_addr_num == 0); |
| return; |
| } |
| |
| kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * |
| sizeof (mac_factory_addr_t)); |
| |
| mip->mi_factory_addr = NULL; |
| mip->mi_factory_addr_num = 0; |
| } |
| |
| /* |
| * Reserve a factory MAC address. If *slot is set to -1, the function |
| * attempts to reserve any of the available factory MAC addresses and |
| * returns the reserved slot id. If no slots are available, the function |
| * returns ENOSPC. If *slot is not set to -1, the function reserves |
| * the specified slot if it is available, or returns EBUSY is the slot |
| * is already used. Returns ENOTSUP if the underlying MAC does not |
| * support multiple factory addresses. If the slot number is not -1 but |
| * is invalid, returns EINVAL. |
| */ |
| int |
| mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_impl_t *mip = mcip->mci_mip; |
| int i, ret = 0; |
| |
| i_mac_perim_enter(mip); |
| /* |
| * Protect against concurrent readers that may need a self-consistent |
| * view of the factory addresses |
| */ |
| rw_enter(&mip->mi_rw_lock, RW_WRITER); |
| |
| if (mip->mi_factory_addr_num == 0) { |
| ret = ENOTSUP; |
| goto bail; |
| } |
| |
| if (*slot != -1) { |
| /* check the specified slot */ |
| if (*slot < 1 || *slot > mip->mi_factory_addr_num) { |
| ret = EINVAL; |
| goto bail; |
| } |
| if (mip->mi_factory_addr[*slot-1].mfa_in_use) { |
| ret = EBUSY; |
| goto bail; |
| } |
| } else { |
| /* pick the next available slot */ |
| for (i = 0; i < mip->mi_factory_addr_num; i++) { |
| if (!mip->mi_factory_addr[i].mfa_in_use) |
| break; |
| } |
| |
| if (i == mip->mi_factory_addr_num) { |
| ret = ENOSPC; |
| goto bail; |
| } |
| *slot = i+1; |
| } |
| |
| mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; |
| mip->mi_factory_addr[*slot-1].mfa_client = mcip; |
| |
| bail: |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (ret); |
| } |
| |
| /* |
| * Release the specified factory MAC address slot. |
| */ |
| void |
| mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_impl_t *mip = mcip->mci_mip; |
| |
| i_mac_perim_enter(mip); |
| /* |
| * Protect against concurrent readers that may need a self-consistent |
| * view of the factory addresses |
| */ |
| rw_enter(&mip->mi_rw_lock, RW_WRITER); |
| |
| ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); |
| ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); |
| |
| mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; |
| |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| } |
| |
| /* |
| * Stores in mac_addr the value of the specified MAC address. Returns |
| * 0 on success, or EINVAL if the slot number is not valid for the MAC. |
| * The caller must provide a string of at least MAXNAMELEN bytes. |
| */ |
| void |
| mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, |
| uint_t *addr_len, char *client_name, boolean_t *in_use_arg) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| boolean_t in_use; |
| |
| ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); |
| |
| /* |
| * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter |
| * and mi_rw_lock |
| */ |
| rw_enter(&mip->mi_rw_lock, RW_READER); |
| bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); |
| *addr_len = mip->mi_type->mt_addr_length; |
| in_use = mip->mi_factory_addr[slot-1].mfa_in_use; |
| if (in_use && client_name != NULL) { |
| bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, |
| client_name, MAXNAMELEN); |
| } |
| if (in_use_arg != NULL) |
| *in_use_arg = in_use; |
| rw_exit(&mip->mi_rw_lock); |
| } |
| |
| /* |
| * Returns the number of factory MAC addresses (in addition to the |
| * primary MAC address), 0 if the underlying MAC doesn't support |
| * that feature. |
| */ |
| uint_t |
| mac_addr_factory_num(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| return (mip->mi_factory_addr_num); |
| } |
| |
| |
| void |
| mac_rx_group_unmark(mac_group_t *grp, uint_t flag) |
| { |
| mac_ring_t *ring; |
| |
| for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) |
| ring->mr_flag &= ~flag; |
| } |
| |
| /* |
| * The following mac_hwrings_xxx() functions are private mac client functions |
| * used by the aggr driver to access and control the underlying HW Rx group |
| * and rings. In this case, the aggr driver has exclusive control of the |
| * underlying HW Rx group/rings, it calls the following functions to |
| * start/stop the HW Rx rings, disable/enable polling, add/remove mac' |
| * addresses, or set up the Rx callback. |
| */ |
| /* ARGSUSED */ |
| static void |
| mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, |
| mblk_t *mp_chain, boolean_t loopback) |
| { |
| mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; |
| mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; |
| mac_direct_rx_t proc; |
| void *arg1; |
| mac_resource_handle_t arg2; |
| |
| proc = srs_rx->sr_func; |
| arg1 = srs_rx->sr_arg1; |
| arg2 = mac_srs->srs_mrh; |
| |
| proc(arg1, arg2, mp_chain, NULL); |
| } |
| |
| /* |
| * This function is called to get the list of HW rings that are reserved by |
| * an exclusive mac client. |
| * |
| * Return value: the number of HW rings. |
| */ |
| int |
| mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, |
| mac_ring_handle_t *hwrh, mac_ring_type_t rtype) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| flow_entry_t *flent = mcip->mci_flent; |
| mac_group_t *grp; |
| mac_ring_t *ring; |
| int cnt = 0; |
| |
| if (rtype == MAC_RING_TYPE_RX) { |
| grp = flent->fe_rx_ring_group; |
| } else if (rtype == MAC_RING_TYPE_TX) { |
| grp = flent->fe_tx_ring_group; |
| } else { |
| ASSERT(B_FALSE); |
| return (-1); |
| } |
| /* |
| * The mac client did not reserve any RX group, return directly. |
| * This is probably because the underlying MAC does not support |
| * any groups. |
| */ |
| if (hwgh != NULL) |
| *hwgh = NULL; |
| if (grp == NULL) |
| return (0); |
| /* |
| * This group must be reserved by this mac client. |
| */ |
| ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && |
| (mcip == MAC_GROUP_ONLY_CLIENT(grp))); |
| |
| for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { |
| ASSERT(cnt < MAX_RINGS_PER_GROUP); |
| hwrh[cnt] = (mac_ring_handle_t)ring; |
| } |
| if (hwgh != NULL) |
| *hwgh = (mac_group_handle_t)grp; |
| |
| return (cnt); |
| } |
| |
| /* |
| * This function is called to get info about Tx/Rx rings. |
| * |
| * Return value: returns uint_t which will have various bits set |
| * that indicates different properties of the ring. |
| */ |
| uint_t |
| mac_hwring_getinfo(mac_ring_handle_t rh) |
| { |
| mac_ring_t *ring = (mac_ring_t *)rh; |
| mac_ring_info_t *info = &ring->mr_info; |
| |
| return (info->mri_flags); |
| } |
| |
| /* |
| * Export ddi interrupt handles from the HW ring to the pseudo ring and |
| * setup the RX callback of the mac client which exclusively controls |
| * HW ring. |
| */ |
| void |
| mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, |
| mac_ring_handle_t pseudo_rh) |
| { |
| mac_ring_t *hw_ring = (mac_ring_t *)hwrh; |
| mac_ring_t *pseudo_ring; |
| mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; |
| |
| if (pseudo_rh != NULL) { |
| pseudo_ring = (mac_ring_t *)pseudo_rh; |
| /* Export the ddi handles to pseudo ring */ |
| pseudo_ring->mr_info.mri_intr.mi_ddi_handle = |
| hw_ring->mr_info.mri_intr.mi_ddi_handle; |
| pseudo_ring->mr_info.mri_intr.mi_ddi_shared = |
| hw_ring->mr_info.mri_intr.mi_ddi_shared; |
| /* |
| * Save a pointer to pseudo ring in the hw ring. If |
| * interrupt handle changes, the hw ring will be |
| * notified of the change (see mac_ring_intr_set()) |
| * and the appropriate change has to be made to |
| * the pseudo ring that has exported the ddi handle. |
| */ |
| hw_ring->mr_prh = pseudo_rh; |
| } |
| |
| if (hw_ring->mr_type == MAC_RING_TYPE_RX) { |
| ASSERT(!(mac_srs->srs_type & SRST_TX)); |
| mac_srs->srs_mrh = prh; |
| mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; |
| } |
| } |
| |
| void |
| mac_hwring_teardown(mac_ring_handle_t hwrh) |
| { |
| mac_ring_t *hw_ring = (mac_ring_t *)hwrh; |
| mac_soft_ring_set_t *mac_srs; |
| |
| if (hw_ring == NULL) |
| return; |
| hw_ring->mr_prh = NULL; |
| if (hw_ring->mr_type == MAC_RING_TYPE_RX) { |
| mac_srs = hw_ring->mr_srs; |
| ASSERT(!(mac_srs->srs_type & SRST_TX)); |
| mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; |
| mac_srs->srs_mrh = NULL; |
| } |
| } |
| |
| int |
| mac_hwring_disable_intr(mac_ring_handle_t rh) |
| { |
| mac_ring_t *rr_ring = (mac_ring_t *)rh; |
| mac_intr_t *intr = &rr_ring->mr_info.mri_intr; |
| |
| return (intr->mi_disable(intr->mi_handle)); |
| } |
| |
| int |
| mac_hwring_enable_intr(mac_ring_handle_t rh) |
| { |
| mac_ring_t *rr_ring = (mac_ring_t *)rh; |
| mac_intr_t *intr = &rr_ring->mr_info.mri_intr; |
| |
| return (intr->mi_enable(intr->mi_handle)); |
| } |
| |
| int |
| mac_hwring_start(mac_ring_handle_t rh) |
| { |
| mac_ring_t *rr_ring = (mac_ring_t *)rh; |
| |
| MAC_RING_UNMARK(rr_ring, MR_QUIESCE); |
| return (0); |
| } |
| |
| void |
| mac_hwring_stop(mac_ring_handle_t rh) |
| { |
| mac_ring_t *rr_ring = (mac_ring_t *)rh; |
| |
| mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); |
| } |
| |
| mblk_t * |
| mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) |
| { |
| mac_ring_t *rr_ring = (mac_ring_t *)rh; |
| mac_ring_info_t *info = &rr_ring->mr_info; |
| |
| return (info->mri_poll(info->mri_driver, bytes_to_pickup)); |
| } |
| |
| /* |
| * Send packets through a selected tx ring. |
| */ |
| mblk_t * |
| mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) |
| { |
| mac_ring_t *ring = (mac_ring_t *)rh; |
| mac_ring_info_t *info = &ring->mr_info; |
| |
| ASSERT(ring->mr_type == MAC_RING_TYPE_TX && |
| ring->mr_state >= MR_INUSE); |
| return (info->mri_tx(info->mri_driver, mp)); |
| } |
| |
| /* |
| * Query stats for a particular rx/tx ring |
| */ |
| int |
| mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) |
| { |
| mac_ring_t *ring = (mac_ring_t *)rh; |
| mac_ring_info_t *info = &ring->mr_info; |
| |
| return (info->mri_stat(info->mri_driver, stat, val)); |
| } |
| |
| /* |
| * Private function that is only used by aggr to send packets through |
| * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports |
| * that does not expose Tx rings, aggr_ring_tx() entry point needs |
| * access to mac_impl_t to send packets through m_tx() entry point. |
| * It accomplishes this by calling mac_hwring_send_priv() function. |
| */ |
| mblk_t * |
| mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_impl_t *mip = mcip->mci_mip; |
| |
| MAC_TX(mip, rh, mp, mcip); |
| return (mp); |
| } |
| |
| /* |
| * Private function that is only used by aggr to update the default transmission |
| * ring. Because aggr exposes a pseudo Tx ring even for ports that may |
| * temporarily be down, it may need to update the default ring that is used by |
| * MAC such that it refers to a link that can actively be used to send traffic. |
| * Note that this is different from the case where the port has been removed |
| * from the group. In those cases, all of the rings will be torn down because |
| * the ring will no longer exist. It's important to give aggr a case where the |
| * rings can still exist such that it may be able to continue to send LACP PDUs |
| * to potentially restore the link. |
| * |
| * Finally, we explicitly don't do anything if the ring hasn't been enabled yet. |
| * This is to help out aggr which doesn't really know the internal state that |
| * MAC does about the rings and can't know that it's not quite ready for use |
| * yet. |
| */ |
| void |
| mac_hwring_set_default(mac_handle_t mh, mac_ring_handle_t rh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_ring_t *ring = (mac_ring_t *)rh; |
| |
| ASSERT(MAC_PERIM_HELD(mh)); |
| VERIFY(mip->mi_state_flags & MIS_IS_AGGR); |
| |
| if (ring->mr_state != MR_INUSE) |
| return; |
| |
| mip->mi_default_tx_ring = rh; |
| } |
| |
| int |
| mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) |
| { |
| mac_group_t *group = (mac_group_t *)gh; |
| |
| return (mac_group_addmac(group, addr)); |
| } |
| |
| int |
| mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) |
| { |
| mac_group_t *group = (mac_group_t *)gh; |
| |
| return (mac_group_remmac(group, addr)); |
| } |
| |
| /* |
| * Set the RX group to be shared/reserved. Note that the group must be |
| * started/stopped outside of this function. |
| */ |
| void |
| mac_set_group_state(mac_group_t *grp, mac_group_state_t state) |
| { |
| /* |
| * If there is no change in the group state, just return. |
| */ |
| if (grp->mrg_state == state) |
| return; |
| |
| switch (state) { |
| case MAC_GROUP_STATE_RESERVED: |
| /* |
| * Successfully reserved the group. |
| * |
| * Given that there is an exclusive client controlling this |
| * group, we enable the group level polling when available, |
| * so that SRSs get to turn on/off individual rings they's |
| * assigned to. |
| */ |
| ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); |
| |
| if (grp->mrg_type == MAC_RING_TYPE_RX && |
| GROUP_INTR_DISABLE_FUNC(grp) != NULL) { |
| GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); |
| } |
| break; |
| |
| case MAC_GROUP_STATE_SHARED: |
| /* |
| * Set all rings of this group to software classified. |
| * If the group has an overriding interrupt, then re-enable it. |
| */ |
| ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); |
| |
| if (grp->mrg_type == MAC_RING_TYPE_RX && |
| GROUP_INTR_ENABLE_FUNC(grp) != NULL) { |
| GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); |
| } |
| /* The ring is not available for reservations any more */ |
| break; |
| |
| case MAC_GROUP_STATE_REGISTERED: |
| /* Also callable from mac_register, perim is not held */ |
| break; |
| |
| default: |
| ASSERT(B_FALSE); |
| break; |
| } |
| |
| grp->mrg_state = state; |
| } |
| |
| /* |
| * Quiesce future hardware classified packets for the specified Rx ring |
| */ |
| static void |
| mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) |
| { |
| ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); |
| ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); |
| |
| mutex_enter(&rx_ring->mr_lock); |
| rx_ring->mr_flag |= ring_flag; |
| while (rx_ring->mr_refcnt != 0) |
| cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); |
| mutex_exit(&rx_ring->mr_lock); |
| } |
| |
| /* |
| * Please see mac_tx for details about the per cpu locking scheme |
| */ |
| static void |
| mac_tx_lock_all(mac_client_impl_t *mcip) |
| { |
| int i; |
| |
| for (i = 0; i <= mac_tx_percpu_cnt; i++) |
| mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); |
| } |
| |
| static void |
| mac_tx_unlock_all(mac_client_impl_t *mcip) |
| { |
| int i; |
| |
| for (i = mac_tx_percpu_cnt; i >= 0; i--) |
| mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); |
| } |
| |
| static void |
| mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) |
| { |
| int i; |
| |
| for (i = mac_tx_percpu_cnt; i > 0; i--) |
| mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); |
| } |
| |
| static int |
| mac_tx_sum_refcnt(mac_client_impl_t *mcip) |
| { |
| int i; |
| int refcnt = 0; |
| |
| for (i = 0; i <= mac_tx_percpu_cnt; i++) |
| refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; |
| |
| return (refcnt); |
| } |
| |
| /* |
| * Stop future Tx packets coming down from the client in preparation for |
| * quiescing the Tx side. This is needed for dynamic reclaim and reassignment |
| * of rings between clients |
| */ |
| void |
| mac_tx_client_block(mac_client_impl_t *mcip) |
| { |
| mac_tx_lock_all(mcip); |
| mcip->mci_tx_flag |= MCI_TX_QUIESCE; |
| while (mac_tx_sum_refcnt(mcip) != 0) { |
| mac_tx_unlock_allbutzero(mcip); |
| cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); |
| mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); |
| mac_tx_lock_all(mcip); |
| } |
| mac_tx_unlock_all(mcip); |
| } |
| |
| void |
| mac_tx_client_unblock(mac_client_impl_t *mcip) |
| { |
| mac_tx_lock_all(mcip); |
| mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; |
| mac_tx_unlock_all(mcip); |
| /* |
| * We may fail to disable flow control for the last MAC_NOTE_TX |
| * notification because the MAC client is quiesced. Send the |
| * notification again. |
| */ |
| i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); |
| } |
| |
| /* |
| * Wait for an SRS to quiesce. The SRS worker will signal us when the |
| * quiesce is done. |
| */ |
| static void |
| mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) |
| { |
| mutex_enter(&srs->srs_lock); |
| while (!(srs->srs_state & srs_flag)) |
| cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); |
| mutex_exit(&srs->srs_lock); |
| } |
| |
| /* |
| * Quiescing an Rx SRS is achieved by the following sequence. The protocol |
| * works bottom up by cutting off packet flow from the bottommost point in the |
| * mac, then the SRS, and then the soft rings. There are 2 use cases of this |
| * mechanism. One is a temporary quiesce of the SRS, such as say while changing |
| * the Rx callbacks. Another use case is Rx SRS teardown. In the former case |
| * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used |
| * for the SRS and MR flags. In the former case the threads pause waiting for |
| * a restart, while in the latter case the threads exit. The Tx SRS teardown |
| * is also mostly similar to the above. |
| * |
| * 1. Stop future hardware classified packets at the lowest level in the mac. |
| * Remove any hardware classification rule (CONDEMNED case) and mark the |
| * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt |
| * from increasing. Upcalls from the driver that come through hardware |
| * classification will be dropped in mac_rx from now on. Then we wait for |
| * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are |
| * sure there aren't any upcall threads from the driver through hardware |
| * classification. In the case of SRS teardown we also remove the |
| * classification rule in the driver. |
| * |
| * 2. Stop future software classified packets by marking the flow entry with |
| * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from |
| * increasing. We also remove the flow entry from the table in the latter |
| * case. Then wait for the fe_refcnt to reach an appropriate quiescent value |
| * that indicates there aren't any active threads using that flow entry. |
| * |
| * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, |
| * SRS worker thread, and the soft ring threads are quiesced in sequence |
| * with the SRS worker thread serving as a master controller. This |
| * mechansim is explained in mac_srs_worker_quiesce(). |
| * |
| * The restart mechanism to reactivate the SRS and softrings is explained |
| * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the |
| * restart sequence. |
| */ |
| void |
| mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) |
| { |
| flow_entry_t *flent = srs->srs_flent; |
| uint_t mr_flag, srs_done_flag; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); |
| ASSERT(!(srs->srs_type & SRST_TX)); |
| |
| if (srs_quiesce_flag == SRS_CONDEMNED) { |
| mr_flag = MR_CONDEMNED; |
| srs_done_flag = SRS_CONDEMNED_DONE; |
| if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) |
| mac_srs_client_poll_disable(srs->srs_mcip, srs); |
| } else { |
| ASSERT(srs_quiesce_flag == SRS_QUIESCE); |
| mr_flag = MR_QUIESCE; |
| srs_done_flag = SRS_QUIESCE_DONE; |
| if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) |
| mac_srs_client_poll_quiesce(srs->srs_mcip, srs); |
| } |
| |
| if (srs->srs_ring != NULL) { |
| mac_rx_ring_quiesce(srs->srs_ring, mr_flag); |
| } else { |
| /* |
| * SRS is driven by software classification. In case |
| * of CONDEMNED, the top level teardown functions will |
| * deal with flow removal. |
| */ |
| if (srs_quiesce_flag != SRS_CONDEMNED) { |
| FLOW_MARK(flent, FE_QUIESCE); |
| mac_flow_wait(flent, FLOW_DRIVER_UPCALL); |
| } |
| } |
| |
| /* |
| * Signal the SRS to quiesce itself, and then cv_wait for the |
| * SRS quiesce to complete. The SRS worker thread will wake us |
| * up when the quiesce is complete |
| */ |
| mac_srs_signal(srs, srs_quiesce_flag); |
| mac_srs_quiesce_wait(srs, srs_done_flag); |
| } |
| |
| /* |
| * Remove an SRS. |
| */ |
| void |
| mac_rx_srs_remove(mac_soft_ring_set_t *srs) |
| { |
| flow_entry_t *flent = srs->srs_flent; |
| int i; |
| |
| mac_rx_srs_quiesce(srs, SRS_CONDEMNED); |
| /* |
| * Locate and remove our entry in the fe_rx_srs[] array, and |
| * adjust the fe_rx_srs array entries and array count by |
| * moving the last entry into the vacated spot. |
| */ |
| mutex_enter(&flent->fe_lock); |
| for (i = 0; i < flent->fe_rx_srs_cnt; i++) { |
| if (flent->fe_rx_srs[i] == srs) |
| break; |
| } |
| |
| ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); |
| if (i != flent->fe_rx_srs_cnt - 1) { |
| flent->fe_rx_srs[i] = |
| flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; |
| i = flent->fe_rx_srs_cnt - 1; |
| } |
| |
| flent->fe_rx_srs[i] = NULL; |
| flent->fe_rx_srs_cnt--; |
| mutex_exit(&flent->fe_lock); |
| |
| mac_srs_free(srs); |
| } |
| |
| static void |
| mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) |
| { |
| mutex_enter(&srs->srs_lock); |
| srs->srs_state &= ~flag; |
| mutex_exit(&srs->srs_lock); |
| } |
| |
| void |
| mac_rx_srs_restart(mac_soft_ring_set_t *srs) |
| { |
| flow_entry_t *flent = srs->srs_flent; |
| mac_ring_t *mr; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); |
| ASSERT((srs->srs_type & SRST_TX) == 0); |
| |
| /* |
| * This handles a change in the number of SRSs between the quiesce and |
| * and restart operation of a flow. |
| */ |
| if (!SRS_QUIESCED(srs)) |
| return; |
| |
| /* |
| * Signal the SRS to restart itself. Wait for the restart to complete |
| * Note that we only restart the SRS if it is not marked as |
| * permanently quiesced. |
| */ |
| if (!SRS_QUIESCED_PERMANENT(srs)) { |
| mac_srs_signal(srs, SRS_RESTART); |
| mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); |
| mac_srs_clear_flag(srs, SRS_RESTART_DONE); |
| |
| mac_srs_client_poll_restart(srs->srs_mcip, srs); |
| } |
| |
| /* Finally clear the flags to let the packets in */ |
| mr = srs->srs_ring; |
| if (mr != NULL) { |
| MAC_RING_UNMARK(mr, MR_QUIESCE); |
| /* In case the ring was stopped, safely restart it */ |
| if (mr->mr_state != MR_INUSE) |
| (void) mac_start_ring(mr); |
| } else { |
| FLOW_UNMARK(flent, FE_QUIESCE); |
| } |
| } |
| |
| /* |
| * Temporary quiesce of a flow and associated Rx SRS. |
| * Please see block comment above mac_rx_classify_flow_rem. |
| */ |
| /* ARGSUSED */ |
| int |
| mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) |
| { |
| int i; |
| |
| for (i = 0; i < flent->fe_rx_srs_cnt; i++) { |
| mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], |
| SRS_QUIESCE); |
| } |
| return (0); |
| } |
| |
| /* |
| * Restart a flow and associated Rx SRS that has been quiesced temporarily |
| * Please see block comment above mac_rx_classify_flow_rem |
| */ |
| /* ARGSUSED */ |
| int |
| mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) |
| { |
| int i; |
| |
| for (i = 0; i < flent->fe_rx_srs_cnt; i++) |
| mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); |
| |
| return (0); |
| } |
| |
| void |
| mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| flow_entry_t *flent = mcip->mci_flent; |
| mac_impl_t *mip = mcip->mci_mip; |
| mac_soft_ring_set_t *mac_srs; |
| int i; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); |
| |
| if (flent == NULL) |
| return; |
| |
| for (i = 0; i < flent->fe_rx_srs_cnt; i++) { |
| mac_srs = flent->fe_rx_srs[i]; |
| mutex_enter(&mac_srs->srs_lock); |
| if (on) |
| mac_srs->srs_state |= SRS_QUIESCE_PERM; |
| else |
| mac_srs->srs_state &= ~SRS_QUIESCE_PERM; |
| mutex_exit(&mac_srs->srs_lock); |
| } |
| } |
| |
| void |
| mac_rx_client_quiesce(mac_client_handle_t mch) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_impl_t *mip = mcip->mci_mip; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); |
| |
| if (MCIP_DATAPATH_SETUP(mcip)) { |
| (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, |
| NULL); |
| (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, |
| mac_rx_classify_flow_quiesce, NULL); |
| } |
| } |
| |
| void |
| mac_rx_client_restart(mac_client_handle_t mch) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_impl_t *mip = mcip->mci_mip; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); |
| |
| if (MCIP_DATAPATH_SETUP(mcip)) { |
| (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); |
| (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, |
| mac_rx_classify_flow_restart, NULL); |
| } |
| } |
| |
| /* |
| * This function only quiesces the Tx SRS and softring worker threads. Callers |
| * need to make sure that there aren't any mac client threads doing current or |
| * future transmits in the mac before calling this function. |
| */ |
| void |
| mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) |
| { |
| mac_client_impl_t *mcip = srs->srs_mcip; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); |
| |
| ASSERT(srs->srs_type & SRST_TX); |
| ASSERT(srs_quiesce_flag == SRS_CONDEMNED || |
| srs_quiesce_flag == SRS_QUIESCE); |
| |
| /* |
| * Signal the SRS to quiesce itself, and then cv_wait for the |
| * SRS quiesce to complete. The SRS worker thread will wake us |
| * up when the quiesce is complete |
| */ |
| mac_srs_signal(srs, srs_quiesce_flag); |
| mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? |
| SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); |
| } |
| |
| void |
| mac_tx_srs_restart(mac_soft_ring_set_t *srs) |
| { |
| /* |
| * Resizing the fanout could result in creation of new SRSs. |
| * They may not necessarily be in the quiesced state in which |
| * case it need be restarted |
| */ |
| if (!SRS_QUIESCED(srs)) |
| return; |
| |
| mac_srs_signal(srs, SRS_RESTART); |
| mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); |
| mac_srs_clear_flag(srs, SRS_RESTART_DONE); |
| } |
| |
| /* |
| * Temporary quiesce of a flow and associated Rx SRS. |
| * Please see block comment above mac_rx_srs_quiesce |
| */ |
| /* ARGSUSED */ |
| int |
| mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) |
| { |
| /* |
| * The fe_tx_srs is null for a subflow on an interface that is |
| * not plumbed |
| */ |
| if (flent->fe_tx_srs != NULL) |
| mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); |
| return (0); |
| } |
| |
| /* ARGSUSED */ |
| int |
| mac_tx_flow_restart(flow_entry_t *flent, void *arg) |
| { |
| /* |
| * The fe_tx_srs is null for a subflow on an interface that is |
| * not plumbed |
| */ |
| if (flent->fe_tx_srs != NULL) |
| mac_tx_srs_restart(flent->fe_tx_srs); |
| return (0); |
| } |
| |
| static void |
| i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); |
| |
| mac_tx_client_block(mcip); |
| if (MCIP_TX_SRS(mcip) != NULL) { |
| mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); |
| (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, |
| mac_tx_flow_quiesce, NULL); |
| } |
| } |
| |
| void |
| mac_tx_client_quiesce(mac_client_handle_t mch) |
| { |
| i_mac_tx_client_quiesce(mch, SRS_QUIESCE); |
| } |
| |
| void |
| mac_tx_client_condemn(mac_client_handle_t mch) |
| { |
| i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); |
| } |
| |
| void |
| mac_tx_client_restart(mac_client_handle_t mch) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); |
| |
| mac_tx_client_unblock(mcip); |
| if (MCIP_TX_SRS(mcip) != NULL) { |
| mac_tx_srs_restart(MCIP_TX_SRS(mcip)); |
| (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, |
| mac_tx_flow_restart, NULL); |
| } |
| } |
| |
| void |
| mac_tx_client_flush(mac_client_impl_t *mcip) |
| { |
| ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); |
| |
| mac_tx_client_quiesce((mac_client_handle_t)mcip); |
| mac_tx_client_restart((mac_client_handle_t)mcip); |
| } |
| |
| void |
| mac_client_quiesce(mac_client_impl_t *mcip) |
| { |
| mac_rx_client_quiesce((mac_client_handle_t)mcip); |
| mac_tx_client_quiesce((mac_client_handle_t)mcip); |
| } |
| |
| void |
| mac_client_restart(mac_client_impl_t *mcip) |
| { |
| mac_rx_client_restart((mac_client_handle_t)mcip); |
| mac_tx_client_restart((mac_client_handle_t)mcip); |
| } |
| |
| /* |
| * Allocate a minor number. |
| */ |
| minor_t |
| mac_minor_hold(boolean_t sleep) |
| { |
| minor_t minor; |
| |
| /* |
| * Grab a value from the arena. |
| */ |
| atomic_inc_32(&minor_count); |
| |
| if (sleep) |
| minor = (uint_t)id_alloc(minor_ids); |
| else |
| minor = (uint_t)id_alloc_nosleep(minor_ids); |
| |
| if (minor == 0) { |
| atomic_dec_32(&minor_count); |
| return (0); |
| } |
| |
| return (minor); |
| } |
| |
| /* |
| * Release a previously allocated minor number. |
| */ |
| void |
| mac_minor_rele(minor_t minor) |
| { |
| /* |
| * Return the value to the arena. |
| */ |
| id_free(minor_ids, minor); |
| atomic_dec_32(&minor_count); |
| } |
| |
| uint32_t |
| mac_no_notification(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? |
| mip->mi_capab_legacy.ml_unsup_note : 0); |
| } |
| |
| /* |
| * Prevent any new opens of this mac in preparation for unregister |
| */ |
| int |
| i_mac_disable(mac_impl_t *mip) |
| { |
| mac_client_impl_t *mcip; |
| |
| rw_enter(&i_mac_impl_lock, RW_WRITER); |
| if (mip->mi_state_flags & MIS_DISABLED) { |
| /* Already disabled, return success */ |
| rw_exit(&i_mac_impl_lock); |
| return (0); |
| } |
| /* |
| * See if there are any other references to this mac_t (e.g., VLAN's). |
| * If so return failure. If all the other checks below pass, then |
| * set mi_disabled atomically under the i_mac_impl_lock to prevent |
| * any new VLAN's from being created or new mac client opens of this |
| * mac end point. |
| */ |
| if (mip->mi_ref > 0) { |
| rw_exit(&i_mac_impl_lock); |
| return (EBUSY); |
| } |
| |
| /* |
| * mac clients must delete all multicast groups they join before |
| * closing. bcast groups are reference counted, the last client |
| * to delete the group will wait till the group is physically |
| * deleted. Since all clients have closed this mac end point |
| * mi_bcast_ngrps must be zero at this point |
| */ |
| ASSERT(mip->mi_bcast_ngrps == 0); |
| |
| /* |
| * Don't let go of this if it has some flows. |
| * All other code guarantees no flows are added to a disabled |
| * mac, therefore it is sufficient to check for the flow table |
| * only here. |
| */ |
| mcip = mac_primary_client_handle(mip); |
| if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { |
| rw_exit(&i_mac_impl_lock); |
| return (ENOTEMPTY); |
| } |
| |
| mip->mi_state_flags |= MIS_DISABLED; |
| rw_exit(&i_mac_impl_lock); |
| return (0); |
| } |
| |
| int |
| mac_disable_nowait(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| int err; |
| |
| if ((err = i_mac_perim_enter_nowait(mip)) != 0) |
| return (err); |
| err = i_mac_disable(mip); |
| i_mac_perim_exit(mip); |
| return (err); |
| } |
| |
| int |
| mac_disable(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| int err; |
| |
| i_mac_perim_enter(mip); |
| err = i_mac_disable(mip); |
| i_mac_perim_exit(mip); |
| |
| /* |
| * Clean up notification thread and wait for it to exit. |
| */ |
| if (err == 0) |
| i_mac_notify_exit(mip); |
| |
| return (err); |
| } |
| |
| /* |
| * Called when the MAC instance has a non empty flow table, to de-multiplex |
| * incoming packets to the right flow. |
| * The MAC's rw lock is assumed held as a READER. |
| */ |
| /* ARGSUSED */ |
| static mblk_t * |
| mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) |
| { |
| flow_entry_t *flent = NULL; |
| uint_t flags = FLOW_INBOUND; |
| int err; |
| |
| /* |
| * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN |
| * to mac_flow_lookup() so that the VLAN packets can be successfully |
| * passed to the non-VLAN aggregation flows. |
| * |
| * Note that there is possibly a race between this and |
| * mac_unicast_remove/add() and VLAN packets could be incorrectly |
| * classified to non-VLAN flows of non-aggregation mac clients. These |
| * VLAN packets will be then filtered out by the mac module. |
| */ |
| if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) |
| flags |= FLOW_IGNORE_VLAN; |
| |
| err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); |
| if (err != 0) { |
| /* no registered receive function */ |
| return (mp); |
| } else { |
| mac_client_impl_t *mcip; |
| |
| /* |
| * This flent might just be an additional one on the MAC client, |
| * i.e. for classification purposes (different fdesc), however |
| * the resources, SRS et. al., are in the mci_flent, so if |
| * this isn't the mci_flent, we need to get it. |
| */ |
| if ((mcip = flent->fe_mcip) != NULL && |
| mcip->mci_flent != flent) { |
| FLOW_REFRELE(flent); |
| flent = mcip->mci_flent; |
| FLOW_TRY_REFHOLD(flent, err); |
| if (err != 0) |
| return (mp); |
| } |
| (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, |
| B_FALSE); |
| FLOW_REFRELE(flent); |
| } |
| return (NULL); |
| } |
| |
| mblk_t * |
| mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mblk_t *bp, *bp1, **bpp, *list = NULL; |
| |
| /* |
| * We walk the chain and attempt to classify each packet. |
| * The packets that couldn't be classified will be returned |
| * back to the caller. |
| */ |
| bp = mp_chain; |
| bpp = &list; |
| while (bp != NULL) { |
| bp1 = bp; |
| bp = bp->b_next; |
| bp1->b_next = NULL; |
| |
| if (mac_rx_classify(mip, mrh, bp1) != NULL) { |
| *bpp = bp1; |
| bpp = &bp1->b_next; |
| } |
| } |
| return (list); |
| } |
| |
| static int |
| mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) |
| { |
| mac_ring_handle_t ring = arg; |
| |
| if (flent->fe_tx_srs) |
| mac_tx_srs_wakeup(flent->fe_tx_srs, ring); |
| return (0); |
| } |
| |
| void |
| i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) |
| { |
| mac_client_impl_t *cclient; |
| mac_soft_ring_set_t *mac_srs; |
| |
| /* |
| * After grabbing the mi_rw_lock, the list of clients can't change. |
| * If there are any clients mi_disabled must be B_FALSE and can't |
| * get set since there are clients. If there aren't any clients we |
| * don't do anything. In any case the mip has to be valid. The driver |
| * must make sure that it goes single threaded (with respect to mac |
| * calls) and wait for all pending mac calls to finish before calling |
| * mac_unregister. |
| */ |
| rw_enter(&i_mac_impl_lock, RW_READER); |
| if (mip->mi_state_flags & MIS_DISABLED) { |
| rw_exit(&i_mac_impl_lock); |
| return; |
| } |
| |
| /* |
| * Get MAC tx srs from walking mac_client_handle list. |
| */ |
| rw_enter(&mip->mi_rw_lock, RW_READER); |
| for (cclient = mip->mi_clients_list; cclient != NULL; |
| cclient = cclient->mci_client_next) { |
| if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { |
| mac_tx_srs_wakeup(mac_srs, ring); |
| } else { |
| /* |
| * Aggr opens underlying ports in exclusive mode |
| * and registers flow control callbacks using |
| * mac_tx_client_notify(). When opened in |
| * exclusive mode, Tx SRS won't be created |
| * during mac_unicast_add(). |
| */ |
| if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { |
| mac_tx_invoke_callbacks(cclient, |
| (mac_tx_cookie_t)ring); |
| } |
| } |
| (void) mac_flow_walk(cclient->mci_subflow_tab, |
| mac_tx_flow_srs_wakeup, ring); |
| } |
| rw_exit(&mip->mi_rw_lock); |
| rw_exit(&i_mac_impl_lock); |
| } |
| |
| /* ARGSUSED */ |
| void |
| mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, |
| boolean_t add) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| i_mac_perim_enter((mac_impl_t *)mh); |
| /* |
| * If no specific refresh function was given then default to the |
| * driver's m_multicst entry point. |
| */ |
| if (refresh == NULL) { |
| refresh = mip->mi_multicst; |
| arg = mip->mi_driver; |
| } |
| |
| mac_bcast_refresh(mip, refresh, arg, add); |
| i_mac_perim_exit((mac_impl_t *)mh); |
| } |
| |
| void |
| mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| /* |
| * If no specific refresh function was given then default to the |
| * driver's m_promisc entry point. |
| */ |
| if (refresh == NULL) { |
| refresh = mip->mi_setpromisc; |
| arg = mip->mi_driver; |
| } |
| ASSERT(refresh != NULL); |
| |
| /* |
| * Call the refresh function with the current promiscuity. |
| */ |
| refresh(arg, (mip->mi_devpromisc != 0)); |
| } |
| |
| /* |
| * The mac client requests that the mac not to change its margin size to |
| * be less than the specified value. If "current" is B_TRUE, then the client |
| * requests the mac not to change its margin size to be smaller than the |
| * current size. Further, return the current margin size value in this case. |
| * |
| * We keep every requested size in an ordered list from largest to smallest. |
| */ |
| int |
| mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_margin_req_t **pp, *p; |
| int err = 0; |
| |
| rw_enter(&(mip->mi_rw_lock), RW_WRITER); |
| if (current) |
| *marginp = mip->mi_margin; |
| |
| /* |
| * If the current margin value cannot satisfy the margin requested, |
| * return ENOTSUP directly. |
| */ |
| if (*marginp > mip->mi_margin) { |
| err = ENOTSUP; |
| goto done; |
| } |
| |
| /* |
| * Check whether the given margin is already in the list. If so, |
| * bump the reference count. |
| */ |
| for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { |
| if (p->mmr_margin == *marginp) { |
| /* |
| * The margin requested is already in the list, |
| * so just bump the reference count. |
| */ |
| p->mmr_ref++; |
| goto done; |
| } |
| if (p->mmr_margin < *marginp) |
| break; |
| } |
| |
| |
| p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); |
| p->mmr_margin = *marginp; |
| p->mmr_ref++; |
| p->mmr_nextp = *pp; |
| *pp = p; |
| |
| done: |
| rw_exit(&(mip->mi_rw_lock)); |
| return (err); |
| } |
| |
| /* |
| * The mac client requests to cancel its previous mac_margin_add() request. |
| * We remove the requested margin size from the list. |
| */ |
| int |
| mac_margin_remove(mac_handle_t mh, uint32_t margin) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_margin_req_t **pp, *p; |
| int err = 0; |
| |
| rw_enter(&(mip->mi_rw_lock), RW_WRITER); |
| /* |
| * Find the entry in the list for the given margin. |
| */ |
| for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { |
| if (p->mmr_margin == margin) { |
| if (--p->mmr_ref == 0) |
| break; |
| |
| /* |
| * There is still a reference to this address so |
| * there's nothing more to do. |
| */ |
| goto done; |
| } |
| } |
| |
| /* |
| * We did not find an entry for the given margin. |
| */ |
| if (p == NULL) { |
| err = ENOENT; |
| goto done; |
| } |
| |
| ASSERT(p->mmr_ref == 0); |
| |
| /* |
| * Remove it from the list. |
| */ |
| *pp = p->mmr_nextp; |
| kmem_free(p, sizeof (mac_margin_req_t)); |
| done: |
| rw_exit(&(mip->mi_rw_lock)); |
| return (err); |
| } |
| |
| boolean_t |
| mac_margin_update(mac_handle_t mh, uint32_t margin) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| uint32_t margin_needed = 0; |
| |
| rw_enter(&(mip->mi_rw_lock), RW_WRITER); |
| |
| if (mip->mi_mmrp != NULL) |
| margin_needed = mip->mi_mmrp->mmr_margin; |
| |
| if (margin_needed <= margin) |
| mip->mi_margin = margin; |
| |
| rw_exit(&(mip->mi_rw_lock)); |
| |
| if (margin_needed <= margin) |
| i_mac_notify(mip, MAC_NOTE_MARGIN); |
| |
| return (margin_needed <= margin); |
| } |
| |
| /* |
| * MAC clients use this interface to request that a MAC device not change its |
| * MTU below the specified amount. At this time, that amount must be within the |
| * range of the device's current minimum and the device's current maximum. eg. a |
| * client cannot request a 3000 byte MTU when the device's MTU is currently |
| * 2000. |
| * |
| * If "current" is set to B_TRUE, then the request is to simply to reserve the |
| * current underlying mac's maximum for this mac client and return it in mtup. |
| */ |
| int |
| mac_mtu_add(mac_handle_t mh, uint32_t *mtup, boolean_t current) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_mtu_req_t *prev, *cur; |
| mac_propval_range_t mpr; |
| int err; |
| |
| i_mac_perim_enter(mip); |
| rw_enter(&mip->mi_rw_lock, RW_WRITER); |
| |
| if (current == B_TRUE) |
| *mtup = mip->mi_sdu_max; |
| mpr.mpr_count = 1; |
| err = mac_prop_info(mh, MAC_PROP_MTU, "mtu", NULL, 0, &mpr, NULL); |
| if (err != 0) { |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (err); |
| } |
| |
| if (*mtup > mip->mi_sdu_max || |
| *mtup < mpr.mpr_range_uint32[0].mpur_min) { |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (ENOTSUP); |
| } |
| |
| prev = NULL; |
| for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { |
| if (*mtup == cur->mtr_mtu) { |
| cur->mtr_ref++; |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (0); |
| } |
| |
| if (*mtup > cur->mtr_mtu) |
| break; |
| |
| prev = cur; |
| } |
| |
| cur = kmem_alloc(sizeof (mac_mtu_req_t), KM_SLEEP); |
| cur->mtr_mtu = *mtup; |
| cur->mtr_ref = 1; |
| if (prev != NULL) { |
| cur->mtr_nextp = prev->mtr_nextp; |
| prev->mtr_nextp = cur; |
| } else { |
| cur->mtr_nextp = mip->mi_mtrp; |
| mip->mi_mtrp = cur; |
| } |
| |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (0); |
| } |
| |
| int |
| mac_mtu_remove(mac_handle_t mh, uint32_t mtu) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| mac_mtu_req_t *cur, *prev; |
| |
| i_mac_perim_enter(mip); |
| rw_enter(&mip->mi_rw_lock, RW_WRITER); |
| |
| prev = NULL; |
| for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { |
| if (cur->mtr_mtu == mtu) { |
| ASSERT(cur->mtr_ref > 0); |
| cur->mtr_ref--; |
| if (cur->mtr_ref == 0) { |
| if (prev == NULL) { |
| mip->mi_mtrp = cur->mtr_nextp; |
| } else { |
| prev->mtr_nextp = cur->mtr_nextp; |
| } |
| kmem_free(cur, sizeof (mac_mtu_req_t)); |
| } |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (0); |
| } |
| |
| prev = cur; |
| } |
| |
| rw_exit(&mip->mi_rw_lock); |
| i_mac_perim_exit(mip); |
| return (ENOENT); |
| } |
| |
| /* |
| * MAC Type Plugin functions. |
| */ |
| |
| mactype_t * |
| mactype_getplugin(const char *pname) |
| { |
| mactype_t *mtype = NULL; |
| boolean_t tried_modload = B_FALSE; |
| |
| mutex_enter(&i_mactype_lock); |
| |
| find_registered_mactype: |
| if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, |
| (mod_hash_val_t *)&mtype) != 0) { |
| if (!tried_modload) { |
| /* |
| * If the plugin has not yet been loaded, then |
| * attempt to load it now. If modload() succeeds, |
| * the plugin should have registered using |
| * mactype_register(), in which case we can go back |
| * and attempt to find it again. |
| */ |
| if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { |
| tried_modload = B_TRUE; |
| goto find_registered_mactype; |
| } |
| } |
| } else { |
| /* |
| * Note that there's no danger that the plugin we've loaded |
| * could be unloaded between the modload() step and the |
| * reference count bump here, as we're holding |
| * i_mactype_lock, which mactype_unregister() also holds. |
| */ |
| atomic_inc_32(&mtype->mt_ref); |
| } |
| |
| mutex_exit(&i_mactype_lock); |
| return (mtype); |
| } |
| |
| mactype_register_t * |
| mactype_alloc(uint_t mactype_version) |
| { |
| mactype_register_t *mtrp; |
| |
| /* |
| * Make sure there isn't a version mismatch between the plugin and |
| * the framework. In the future, if multiple versions are |
| * supported, this check could become more sophisticated. |
| */ |
| if (mactype_version != MACTYPE_VERSION) |
| return (NULL); |
| |
| mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); |
| mtrp->mtr_version = mactype_version; |
| return (mtrp); |
| } |
| |
| void |
| mactype_free(mactype_register_t *mtrp) |
| { |
| kmem_free(mtrp, sizeof (mactype_register_t)); |
| } |
| |
| int |
| mactype_register(mactype_register_t *mtrp) |
| { |
| mactype_t *mtp; |
| mactype_ops_t *ops = mtrp->mtr_ops; |
| |
| /* Do some sanity checking before we register this MAC type. */ |
| if (mtrp->mtr_ident == NULL || ops == NULL) |
| return (EINVAL); |
| |
| /* |
| * Verify that all mandatory callbacks are set in the ops |
| * vector. |
| */ |
| if (ops->mtops_unicst_verify == NULL || |
| ops->mtops_multicst_verify == NULL || |
| ops->mtops_sap_verify == NULL || |
| ops->mtops_header == NULL || |
| ops->mtops_header_info == NULL) { |
| return (EINVAL); |
| } |
| |
| mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); |
| mtp->mt_ident = mtrp->mtr_ident; |
| mtp->mt_ops = *ops; |
| mtp->mt_type = mtrp->mtr_mactype; |
| mtp->mt_nativetype = mtrp->mtr_nativetype; |
| mtp->mt_addr_length = mtrp->mtr_addrlen; |
| if (mtrp->mtr_brdcst_addr != NULL) { |
| mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); |
| bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, |
| mtrp->mtr_addrlen); |
| } |
| |
| mtp->mt_stats = mtrp->mtr_stats; |
| mtp->mt_statcount = mtrp->mtr_statcount; |
| |
| mtp->mt_mapping = mtrp->mtr_mapping; |
| mtp->mt_mappingcount = mtrp->mtr_mappingcount; |
| |
| if (mod_hash_insert(i_mactype_hash, |
| (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { |
| kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); |
| kmem_free(mtp, sizeof (*mtp)); |
| return (EEXIST); |
| } |
| return (0); |
| } |
| |
| int |
| mactype_unregister(const char *ident) |
| { |
| mactype_t *mtp; |
| mod_hash_val_t val; |
| int err; |
| |
| /* |
| * Let's not allow MAC drivers to use this plugin while we're |
| * trying to unregister it. Holding i_mactype_lock also prevents a |
| * plugin from unregistering while a MAC driver is attempting to |
| * hold a reference to it in i_mactype_getplugin(). |
| */ |
| mutex_enter(&i_mactype_lock); |
| |
| if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, |
| (mod_hash_val_t *)&mtp)) != 0) { |
| /* A plugin is trying to unregister, but it never registered. */ |
| err = ENXIO; |
| goto done; |
| } |
| |
| if (mtp->mt_ref != 0) { |
| err = EBUSY; |
| goto done; |
| } |
| |
| err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); |
| ASSERT(err == 0); |
| if (err != 0) { |
| /* This should never happen, thus the ASSERT() above. */ |
| err = EINVAL; |
| goto done; |
| } |
| ASSERT(mtp == (mactype_t *)val); |
| |
| if (mtp->mt_brdcst_addr != |