| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| * |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| #include <inet/arp.h> |
| #include <inet/ip.h> |
| #include <inet/ip6.h> |
| #include <inet/ip_if.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ip_multi.h> |
| #include <inet/ip_rts.h> |
| #include <inet/mi.h> |
| #include <net/if_types.h> |
| #include <sys/dlpi.h> |
| #include <sys/kmem.h> |
| #include <sys/modhash.h> |
| #include <sys/sdt.h> |
| #include <sys/strsun.h> |
| #include <sys/sunddi.h> |
| #include <sys/types.h> |
| |
| /* |
| * Convenience macros for getting the ip_stack_t associated with an |
| * ipmp_illgrp_t or ipmp_grp_t. |
| */ |
| #define IPMP_GRP_TO_IPST(grp) PHYINT_TO_IPST((grp)->gr_phyint) |
| #define IPMP_ILLGRP_TO_IPST(illg) ((illg)->ig_ipmp_ill->ill_ipst) |
| |
| /* |
| * Assorted constants that aren't important enough to be tunable. |
| */ |
| #define IPMP_GRP_HASH_SIZE 64 |
| #define IPMP_ILL_REFRESH_TIMEOUT 120 /* seconds */ |
| |
| /* |
| * Templates for IPMP ARP messages. |
| */ |
| static const arie_t ipmp_aract_template = { |
| AR_IPMP_ACTIVATE, |
| sizeof (arie_t), /* Name offset */ |
| sizeof (arie_t) /* Name length (set by ill_arp_alloc) */ |
| }; |
| |
| static const arie_t ipmp_ardeact_template = { |
| AR_IPMP_DEACTIVATE, |
| sizeof (arie_t), /* Name offset */ |
| sizeof (arie_t) /* Name length (set by ill_arp_alloc) */ |
| }; |
| |
| /* |
| * IPMP meta-interface kstats (based on those in PSARC/1997/198). |
| */ |
| static const kstat_named_t ipmp_kstats[IPMP_KSTAT_MAX] = { |
| { "obytes", KSTAT_DATA_UINT32 }, |
| { "obytes64", KSTAT_DATA_UINT64 }, |
| { "rbytes", KSTAT_DATA_UINT32 }, |
| { "rbytes64", KSTAT_DATA_UINT64 }, |
| { "opackets", KSTAT_DATA_UINT32 }, |
| { "opackets64", KSTAT_DATA_UINT64 }, |
| { "oerrors", KSTAT_DATA_UINT32 }, |
| { "ipackets", KSTAT_DATA_UINT32 }, |
| { "ipackets64", KSTAT_DATA_UINT64 }, |
| { "ierrors", KSTAT_DATA_UINT32 }, |
| { "multircv", KSTAT_DATA_UINT32 }, |
| { "multixmt", KSTAT_DATA_UINT32 }, |
| { "brdcstrcv", KSTAT_DATA_UINT32 }, |
| { "brdcstxmt", KSTAT_DATA_UINT32 }, |
| { "link_up", KSTAT_DATA_UINT32 } |
| }; |
| |
| static void ipmp_grp_insert(ipmp_grp_t *, mod_hash_hndl_t); |
| static int ipmp_grp_create_kstats(ipmp_grp_t *); |
| static int ipmp_grp_update_kstats(kstat_t *, int); |
| static void ipmp_grp_destroy_kstats(ipmp_grp_t *); |
| static ill_t *ipmp_illgrp_min_ill(ipmp_illgrp_t *); |
| static ill_t *ipmp_illgrp_max_ill(ipmp_illgrp_t *); |
| static void ipmp_illgrp_set_cast(ipmp_illgrp_t *, ill_t *); |
| static void ipmp_illgrp_set_mtu(ipmp_illgrp_t *, uint_t); |
| static boolean_t ipmp_ill_activate(ill_t *); |
| static void ipmp_ill_deactivate(ill_t *); |
| static void ipmp_ill_ire_mark_testhidden(ire_t *, char *); |
| static void ipmp_ill_ire_clear_testhidden(ire_t *, char *); |
| static void ipmp_ill_refresh_active_timer_start(ill_t *); |
| static void ipmp_ill_rtsaddrmsg(ill_t *, int); |
| static void ipmp_ill_bind_ipif(ill_t *, ipif_t *, enum ip_resolver_action); |
| static ipif_t *ipmp_ill_unbind_ipif(ill_t *, ipif_t *, boolean_t); |
| static void ipmp_phyint_get_kstats(phyint_t *, uint64_t *); |
| static boolean_t ipmp_ipif_is_up_dataaddr(const ipif_t *); |
| |
| /* |
| * Initialize IPMP state for IP stack `ipst'; called from ip_stack_init(). |
| */ |
| void |
| ipmp_init(ip_stack_t *ipst) |
| { |
| ipst->ips_ipmp_grp_hash = mod_hash_create_extended("ipmp_grp_hash", |
| IPMP_GRP_HASH_SIZE, mod_hash_null_keydtor, mod_hash_null_valdtor, |
| mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); |
| rw_init(&ipst->ips_ipmp_lock, NULL, RW_DEFAULT, 0); |
| } |
| |
| /* |
| * Destroy IPMP state for IP stack `ipst'; called from ip_stack_fini(). |
| */ |
| void |
| ipmp_destroy(ip_stack_t *ipst) |
| { |
| mod_hash_destroy_hash(ipst->ips_ipmp_grp_hash); |
| rw_destroy(&ipst->ips_ipmp_lock); |
| } |
| |
| /* |
| * Create an IPMP group named `grname', associate it with IPMP phyint `phyi', |
| * and add it to the hash. On success, return a pointer to the created group. |
| * Caller must ensure `grname' is not yet in the hash. Assumes that the IPMP |
| * meta-interface associated with the group also has the same name (but they |
| * may differ later via ipmp_grp_rename()). |
| */ |
| ipmp_grp_t * |
| ipmp_grp_create(const char *grname, phyint_t *phyi) |
| { |
| ipmp_grp_t *grp; |
| ip_stack_t *ipst = PHYINT_TO_IPST(phyi); |
| mod_hash_hndl_t mh; |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| if ((grp = kmem_zalloc(sizeof (ipmp_grp_t), KM_NOSLEEP)) == NULL) |
| return (NULL); |
| |
| (void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name)); |
| (void) strlcpy(grp->gr_ifname, grname, sizeof (grp->gr_ifname)); |
| |
| /* |
| * Cache the group's phyint. This is safe since a phyint_t will |
| * outlive its ipmp_grp_t. |
| */ |
| grp->gr_phyint = phyi; |
| |
| /* |
| * Create IPMP group kstats. |
| */ |
| if (ipmp_grp_create_kstats(grp) != 0) { |
| kmem_free(grp, sizeof (ipmp_grp_t)); |
| return (NULL); |
| } |
| |
| /* |
| * Insert the group into the hash. |
| */ |
| if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0) { |
| ipmp_grp_destroy_kstats(grp); |
| kmem_free(grp, sizeof (ipmp_grp_t)); |
| return (NULL); |
| } |
| ipmp_grp_insert(grp, mh); |
| |
| return (grp); |
| } |
| |
| /* |
| * Create IPMP kstat structures for `grp'. Return an errno upon failure. |
| */ |
| static int |
| ipmp_grp_create_kstats(ipmp_grp_t *grp) |
| { |
| kstat_t *ksp; |
| netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid; |
| |
| ksp = kstat_create_netstack("ipmp", 0, grp->gr_ifname, "net", |
| KSTAT_TYPE_NAMED, IPMP_KSTAT_MAX, 0, id); |
| if (ksp == NULL) |
| return (ENOMEM); |
| |
| ksp->ks_update = ipmp_grp_update_kstats; |
| ksp->ks_private = grp; |
| bcopy(ipmp_kstats, ksp->ks_data, sizeof (ipmp_kstats)); |
| |
| kstat_install(ksp); |
| grp->gr_ksp = ksp; |
| return (0); |
| } |
| |
| /* |
| * Update the IPMP kstats tracked by `ksp'; called by the kstats framework. |
| */ |
| static int |
| ipmp_grp_update_kstats(kstat_t *ksp, int rw) |
| { |
| uint_t i; |
| kstat_named_t *kn = KSTAT_NAMED_PTR(ksp); |
| ipmp_grp_t *grp = ksp->ks_private; |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| ipsq_t *ipsq, *grp_ipsq = grp->gr_phyint->phyint_ipsq; |
| phyint_t *phyi; |
| uint64_t phyi_kstats[IPMP_KSTAT_MAX]; |
| |
| if (rw == KSTAT_WRITE) |
| return (EACCES); |
| |
| /* |
| * Start with the group's baseline values. |
| */ |
| for (i = 0; i < IPMP_KSTAT_MAX; i++) { |
| if (kn[i].data_type == KSTAT_DATA_UINT32) { |
| kn[i].value.ui32 = grp->gr_kstats0[i]; |
| } else { |
| ASSERT(kn[i].data_type == KSTAT_DATA_UINT64); |
| kn[i].value.ui64 = grp->gr_kstats0[i]; |
| } |
| } |
| |
| /* |
| * Add in the stats of each phyint currently in the group. Since we |
| * don't directly track the phyints in a group, we cheat by walking |
| * the IPSQ set under ill_g_lock. (The IPSQ list cannot change while |
| * ill_g_lock is held.) |
| */ |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| ipsq = grp_ipsq->ipsq_next; |
| for (; ipsq != grp_ipsq; ipsq = ipsq->ipsq_next) { |
| phyi = ipsq->ipsq_phyint; |
| |
| /* |
| * If a phyint in a group is being unplumbed, it's possible |
| * that ill_glist_delete() -> phyint_free() already freed the |
| * phyint (and set ipsq_phyint to NULL), but the unplumb |
| * operation has yet to complete (and thus ipsq_dq() has yet |
| * to remove the phyint's IPSQ from the group IPSQ's phyint |
| * list). We skip those phyints here (note that their kstats |
| * have already been added to gr_kstats0[]). |
| */ |
| if (phyi == NULL) |
| continue; |
| |
| ipmp_phyint_get_kstats(phyi, phyi_kstats); |
| |
| for (i = 0; i < IPMP_KSTAT_MAX; i++) { |
| phyi_kstats[i] -= phyi->phyint_kstats0[i]; |
| if (kn[i].data_type == KSTAT_DATA_UINT32) |
| kn[i].value.ui32 += phyi_kstats[i]; |
| else |
| kn[i].value.ui64 += phyi_kstats[i]; |
| } |
| } |
| |
| kn[IPMP_KSTAT_LINK_UP].value.ui32 = |
| (grp->gr_phyint->phyint_flags & PHYI_RUNNING) != 0; |
| |
| rw_exit(&ipst->ips_ill_g_lock); |
| return (0); |
| } |
| |
| /* |
| * Destroy IPMP kstat structures for `grp'. |
| */ |
| static void |
| ipmp_grp_destroy_kstats(ipmp_grp_t *grp) |
| { |
| netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid; |
| |
| kstat_delete_netstack(grp->gr_ksp, id); |
| bzero(grp->gr_kstats0, sizeof (grp->gr_kstats0)); |
| grp->gr_ksp = NULL; |
| } |
| |
| /* |
| * Look up an IPMP group named `grname' on IP stack `ipst'. Return NULL if it |
| * does not exist. |
| */ |
| ipmp_grp_t * |
| ipmp_grp_lookup(const char *grname, ip_stack_t *ipst) |
| { |
| ipmp_grp_t *grp; |
| |
| ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock)); |
| |
| if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname, |
| (mod_hash_val_t *)&grp) == 0) |
| return (grp); |
| |
| return (NULL); |
| } |
| |
| /* |
| * Place information about group `grp' into `lifgr'. |
| */ |
| void |
| ipmp_grp_info(const ipmp_grp_t *grp, lifgroupinfo_t *lifgr) |
| { |
| ill_t *ill; |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock)); |
| |
| lifgr->gi_v4 = (grp->gr_v4 != NULL); |
| lifgr->gi_v6 = (grp->gr_v6 != NULL); |
| lifgr->gi_nv4 = grp->gr_nv4 + grp->gr_pendv4; |
| lifgr->gi_nv6 = grp->gr_nv6 + grp->gr_pendv6; |
| lifgr->gi_mactype = grp->gr_nif > 0 ? grp->gr_mactype : SUNW_DL_IPMP; |
| (void) strlcpy(lifgr->gi_grifname, grp->gr_ifname, LIFNAMSIZ); |
| lifgr->gi_m4ifname[0] = '\0'; |
| lifgr->gi_m6ifname[0] = '\0'; |
| lifgr->gi_bcifname[0] = '\0'; |
| |
| if (grp->gr_v4 != NULL && (ill = grp->gr_v4->ig_cast_ill) != NULL) { |
| (void) strlcpy(lifgr->gi_m4ifname, ill->ill_name, LIFNAMSIZ); |
| (void) strlcpy(lifgr->gi_bcifname, ill->ill_name, LIFNAMSIZ); |
| } |
| |
| if (grp->gr_v6 != NULL && (ill = grp->gr_v6->ig_cast_ill) != NULL) |
| (void) strlcpy(lifgr->gi_m6ifname, ill->ill_name, LIFNAMSIZ); |
| } |
| |
| /* |
| * Insert `grp' into the hash using the reserved hash entry `mh'. |
| * Caller must ensure `grp' is not yet in the hash. |
| */ |
| static void |
| ipmp_grp_insert(ipmp_grp_t *grp, mod_hash_hndl_t mh) |
| { |
| int err; |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| /* |
| * Since grp->gr_name will exist at least as long as `grp' is in the |
| * hash, we use it directly as the key. |
| */ |
| err = mod_hash_insert_reserve(ipst->ips_ipmp_grp_hash, |
| (mod_hash_key_t)grp->gr_name, (mod_hash_val_t)grp, mh); |
| if (err != 0) { |
| /* |
| * This should never happen since `mh' was preallocated. |
| */ |
| panic("cannot insert IPMP group \"%s\" (err %d)", |
| grp->gr_name, err); |
| } |
| } |
| |
| /* |
| * Remove `grp' from the hash. Caller must ensure `grp' is in it. |
| */ |
| static void |
| ipmp_grp_remove(ipmp_grp_t *grp) |
| { |
| int err; |
| mod_hash_val_t val; |
| mod_hash_key_t key = (mod_hash_key_t)grp->gr_name; |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| err = mod_hash_remove(ipst->ips_ipmp_grp_hash, key, &val); |
| if (err != 0 || val != grp) { |
| panic("cannot remove IPMP group \"%s\" (err %d)", |
| grp->gr_name, err); |
| } |
| } |
| |
| /* |
| * Attempt to rename `grp' to new name `grname'. Return an errno if the new |
| * group name already exists or is invalid, or if there isn't enough memory. |
| */ |
| int |
| ipmp_grp_rename(ipmp_grp_t *grp, const char *grname) |
| { |
| mod_hash_hndl_t mh; |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| if (grname[0] == '\0') |
| return (EINVAL); |
| |
| if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname, |
| (mod_hash_val_t *)&grp) != MH_ERR_NOTFOUND) |
| return (EEXIST); |
| |
| /* |
| * Before we remove the group from the hash, ensure we'll be able to |
| * re-insert it by reserving space. |
| */ |
| if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0) |
| return (ENOMEM); |
| |
| ipmp_grp_remove(grp); |
| (void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name)); |
| ipmp_grp_insert(grp, mh); |
| |
| return (0); |
| } |
| |
| /* |
| * Destroy `grp' and remove it from the hash. Caller must ensure `grp' is in |
| * the hash, and that there are no interfaces on it. |
| */ |
| void |
| ipmp_grp_destroy(ipmp_grp_t *grp) |
| { |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| /* |
| * If there are still interfaces using this group, panic before things |
| * go really off the rails. |
| */ |
| if (grp->gr_nif != 0) |
| panic("cannot destroy IPMP group \"%s\": in use", grp->gr_name); |
| |
| ipmp_grp_remove(grp); |
| ipmp_grp_destroy_kstats(grp); |
| |
| ASSERT(grp->gr_v4 == NULL); |
| ASSERT(grp->gr_v6 == NULL); |
| ASSERT(grp->gr_nv4 == 0); |
| ASSERT(grp->gr_nv6 == 0); |
| ASSERT(grp->gr_nactif == 0); |
| ASSERT(grp->gr_linkdownmp == NULL); |
| grp->gr_phyint = NULL; |
| |
| kmem_free(grp, sizeof (ipmp_grp_t)); |
| } |
| |
| /* |
| * Check whether `ill' is suitable for inclusion into `grp', and return an |
| * errno describing the problem (if any). NOTE: many of these errno values |
| * are interpreted by ifconfig, which will take corrective action and retry |
| * the SIOCSLIFGROUPNAME, so please exercise care when changing them. |
| */ |
| static int |
| ipmp_grp_vet_ill(ipmp_grp_t *grp, ill_t *ill) |
| { |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock)); |
| |
| /* |
| * To sidestep complicated address migration logic in the kernel and |
| * to force the kernel's all-hosts multicast memberships to be blown |
| * away, all addresses that had been brought up must be brought back |
| * down prior to adding an interface to a group. (This includes |
| * addresses currently down due to DAD.) Once the interface has been |
| * added to the group, its addresses can then be brought back up, at |
| * which point they will be moved to the IPMP meta-interface. |
| * NOTE: we do this before ill_appaddr_cnt() since bringing down the |
| * link-local causes in.ndpd to remove its ADDRCONF'd addresses. |
| */ |
| if (ill->ill_ipif_up_count + ill->ill_ipif_dup_count > 0) |
| return (EADDRINUSE); |
| |
| /* |
| * To avoid confusing applications by changing addresses that are |
| * under their control, all such control must be removed prior to |
| * adding an interface into a group. |
| */ |
| if (ill_appaddr_cnt(ill) != 0) |
| return (EADDRNOTAVAIL); |
| |
| /* |
| * Since PTP addresses do not share the same broadcast domain, they |
| * are not allowed to be in an IPMP group. |
| */ |
| if (ill_ptpaddr_cnt(ill) != 0) |
| return (EINVAL); |
| |
| /* |
| * An ill must support multicast to be allowed into a group. |
| */ |
| if (!(ill->ill_flags & ILLF_MULTICAST)) |
| return (ENOTSUP); |
| |
| /* |
| * An ill must strictly be using ARP and/or ND for address |
| * resolution for it to be allowed into a group. |
| */ |
| if (ill->ill_flags & (ILLF_NONUD | ILLF_NOARP | ILLF_XRESOLV)) |
| return (ENOTSUP); |
| |
| /* |
| * An ill cannot also be using usesrc groups. (Although usesrc uses |
| * ill_g_usesrc_lock, we don't need to grab it since usesrc also does |
| * all its modifications as writer.) |
| */ |
| if (IS_USESRC_ILL(ill) || IS_USESRC_CLI_ILL(ill)) |
| return (ENOTSUP); |
| |
| /* |
| * All ills in a group must be the same mactype. |
| */ |
| if (grp->gr_nif > 0 && grp->gr_mactype != ill->ill_mactype) |
| return (EINVAL); |
| |
| return (0); |
| } |
| |
| /* |
| * Check whether `phyi' is suitable for inclusion into `grp', and return an |
| * errno describing the problem (if any). See comment above ipmp_grp_vet_ill() |
| * regarding errno values. |
| */ |
| int |
| ipmp_grp_vet_phyint(ipmp_grp_t *grp, phyint_t *phyi) |
| { |
| int err = 0; |
| ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp); |
| |
| ASSERT(IAM_WRITER_IPSQ(phyi->phyint_ipsq)); |
| ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock)); |
| |
| /* |
| * An interface cannot have address families plumbed that are not |
| * configured in the group. |
| */ |
| if (phyi->phyint_illv4 != NULL && grp->gr_v4 == NULL || |
| phyi->phyint_illv6 != NULL && grp->gr_v6 == NULL) |
| return (EAFNOSUPPORT); |
| |
| if (phyi->phyint_illv4 != NULL) |
| err = ipmp_grp_vet_ill(grp, phyi->phyint_illv4); |
| if (err == 0 && phyi->phyint_illv6 != NULL) |
| err = ipmp_grp_vet_ill(grp, phyi->phyint_illv6); |
| |
| return (err); |
| } |
| |
| /* |
| * Create a new illgrp on IPMP meta-interface `ill'. |
| */ |
| ipmp_illgrp_t * |
| ipmp_illgrp_create(ill_t *ill) |
| { |
| uint_t mtu = ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU; |
| ipmp_illgrp_t *illg; |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(IS_IPMP(ill)); |
| ASSERT(ill->ill_grp == NULL); |
| |
| if ((illg = kmem_zalloc(sizeof (ipmp_illgrp_t), KM_NOSLEEP)) == NULL) |
| return (NULL); |
| |
| list_create(&illg->ig_if, sizeof (ill_t), offsetof(ill_t, ill_grpnode)); |
| list_create(&illg->ig_actif, sizeof (ill_t), |
| offsetof(ill_t, ill_actnode)); |
| list_create(&illg->ig_arpent, sizeof (ipmp_arpent_t), |
| offsetof(ipmp_arpent_t, ia_node)); |
| |
| illg->ig_ipmp_ill = ill; |
| ill->ill_grp = illg; |
| ipmp_illgrp_set_mtu(illg, mtu); |
| |
| return (illg); |
| } |
| |
| /* |
| * Destroy illgrp `illg', and disconnect it from its IPMP meta-interface. |
| */ |
| void |
| ipmp_illgrp_destroy(ipmp_illgrp_t *illg) |
| { |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| ASSERT(IS_IPMP(illg->ig_ipmp_ill)); |
| |
| /* |
| * Verify `illg' is empty. |
| */ |
| ASSERT(illg->ig_next_ill == NULL); |
| ASSERT(illg->ig_cast_ill == NULL); |
| ASSERT(list_is_empty(&illg->ig_arpent)); |
| ASSERT(list_is_empty(&illg->ig_if)); |
| ASSERT(list_is_empty(&illg->ig_actif)); |
| ASSERT(illg->ig_nactif == 0); |
| |
| /* |
| * Destroy `illg'. |
| */ |
| illg->ig_ipmp_ill->ill_grp = NULL; |
| illg->ig_ipmp_ill = NULL; |
| list_destroy(&illg->ig_if); |
| list_destroy(&illg->ig_actif); |
| list_destroy(&illg->ig_arpent); |
| kmem_free(illg, sizeof (ipmp_illgrp_t)); |
| } |
| |
| /* |
| * Add `ipif' to the pool of usable data addresses on `illg' and attempt to |
| * bind it to an underlying ill, while keeping an even address distribution. |
| * If the bind is successful, return a pointer to the bound ill. |
| */ |
| ill_t * |
| ipmp_illgrp_add_ipif(ipmp_illgrp_t *illg, ipif_t *ipif) |
| { |
| ill_t *minill; |
| ipmp_arpent_t *entp; |
| |
| ASSERT(IAM_WRITER_IPIF(ipif)); |
| ASSERT(ipmp_ipif_is_dataaddr(ipif)); |
| |
| /* |
| * IPMP data address mappings are internally managed by IP itself, so |
| * delete any existing ARP entries associated with the address. |
| */ |
| if (!ipif->ipif_isv6) { |
| entp = ipmp_illgrp_lookup_arpent(illg, &ipif->ipif_lcl_addr); |
| if (entp != NULL) |
| ipmp_illgrp_destroy_arpent(illg, entp); |
| } |
| |
| if ((minill = ipmp_illgrp_min_ill(illg)) != NULL) |
| ipmp_ill_bind_ipif(minill, ipif, Res_act_none); |
| |
| return (ipif->ipif_bound ? ipif->ipif_bound_ill : NULL); |
| } |
| |
| /* |
| * Delete `ipif' from the pool of usable data addresses on `illg'. If it's |
| * bound, unbind it from the underlying ill while keeping an even address |
| * distribution. |
| */ |
| void |
| ipmp_illgrp_del_ipif(ipmp_illgrp_t *illg, ipif_t *ipif) |
| { |
| ill_t *maxill, *boundill = ipif->ipif_bound_ill; |
| |
| ASSERT(IAM_WRITER_IPIF(ipif)); |
| |
| if (boundill != NULL) { |
| (void) ipmp_ill_unbind_ipif(boundill, ipif, B_FALSE); |
| |
| maxill = ipmp_illgrp_max_ill(illg); |
| if (maxill->ill_bound_cnt > boundill->ill_bound_cnt + 1) { |
| ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE); |
| ipmp_ill_bind_ipif(boundill, ipif, Res_act_rebind); |
| } |
| } |
| } |
| |
| /* |
| * Return the active ill with the greatest number of data addresses in `illg'. |
| */ |
| static ill_t * |
| ipmp_illgrp_max_ill(ipmp_illgrp_t *illg) |
| { |
| ill_t *ill, *bestill = NULL; |
| |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| |
| ill = list_head(&illg->ig_actif); |
| for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) { |
| if (bestill == NULL || |
| ill->ill_bound_cnt > bestill->ill_bound_cnt) { |
| bestill = ill; |
| } |
| } |
| return (bestill); |
| } |
| |
| /* |
| * Return the active ill with the fewest number of data addresses in `illg'. |
| */ |
| static ill_t * |
| ipmp_illgrp_min_ill(ipmp_illgrp_t *illg) |
| { |
| ill_t *ill, *bestill = NULL; |
| |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| |
| ill = list_head(&illg->ig_actif); |
| for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) { |
| if (bestill == NULL || |
| ill->ill_bound_cnt < bestill->ill_bound_cnt) { |
| if (ill->ill_bound_cnt == 0) |
| return (ill); /* can't get better */ |
| bestill = ill; |
| } |
| } |
| return (bestill); |
| } |
| |
| /* |
| * Return a pointer to IPMP meta-interface for `illg' (which must exist). |
| * Since ig_ipmp_ill never changes for a given illg, no locks are needed. |
| */ |
| ill_t * |
| ipmp_illgrp_ipmp_ill(ipmp_illgrp_t *illg) |
| { |
| return (illg->ig_ipmp_ill); |
| } |
| |
| /* |
| * Return a pointer to the next available underlying ill in `illg', or NULL if |
| * one doesn't exist. Caller must be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_illgrp_next_ill(ipmp_illgrp_t *illg) |
| { |
| ill_t *ill; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| if ((ill = illg->ig_next_ill) != NULL) { |
| illg->ig_next_ill = list_next(&illg->ig_actif, ill); |
| if (illg->ig_next_ill == NULL) |
| illg->ig_next_ill = list_head(&illg->ig_actif); |
| } |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| return (ill); |
| } |
| |
| /* |
| * Return a held pointer to the next available underlying ill in `illg', or |
| * NULL if one doesn't exist. Caller need not be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_illgrp_hold_next_ill(ipmp_illgrp_t *illg) |
| { |
| ill_t *ill; |
| uint_t i; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| for (i = 0; i < illg->ig_nactif; i++) { |
| ill = illg->ig_next_ill; |
| illg->ig_next_ill = list_next(&illg->ig_actif, ill); |
| if (illg->ig_next_ill == NULL) |
| illg->ig_next_ill = list_head(&illg->ig_actif); |
| |
| if (ILL_CAN_LOOKUP(ill)) { |
| ill_refhold(ill); |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (ill); |
| } |
| } |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| return (NULL); |
| } |
| |
| /* |
| * Return a pointer to the nominated multicast ill in `illg', or NULL if one |
| * doesn't exist. Caller must be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_illgrp_cast_ill(ipmp_illgrp_t *illg) |
| { |
| /* |
| * Since an IPMP ill's ill_grp gets cleared during I_PUNLINK but |
| * this function can get called after that point, handle NULL. |
| */ |
| if (illg == NULL) |
| return (NULL); |
| |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| return (illg->ig_cast_ill); |
| } |
| |
| /* |
| * Return a held pointer to the nominated multicast ill in `illg', or NULL if |
| * one doesn't exist. Caller need not be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_illgrp_hold_cast_ill(ipmp_illgrp_t *illg) |
| { |
| ill_t *castill; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_READER); |
| castill = illg->ig_cast_ill; |
| if (castill != NULL && ILL_CAN_LOOKUP(castill)) { |
| ill_refhold(castill); |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (castill); |
| } |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (NULL); |
| } |
| |
| /* |
| * Set the nominated cast ill on `illg' to `castill'. If `castill' is NULL, |
| * any existing nomination is removed. Caller must be inside the IPSQ. |
| */ |
| static void |
| ipmp_illgrp_set_cast(ipmp_illgrp_t *illg, ill_t *castill) |
| { |
| ill_t *ocastill = illg->ig_cast_ill; |
| ill_t *ipmp_ill = illg->ig_ipmp_ill; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(IAM_WRITER_ILL(ipmp_ill)); |
| |
| /* |
| * Disable old nominated ill (if any). |
| */ |
| if (ocastill != NULL) { |
| DTRACE_PROBE2(ipmp__illgrp__cast__disable, ipmp_illgrp_t *, |
| illg, ill_t *, ocastill); |
| ASSERT(ocastill->ill_nom_cast); |
| ocastill->ill_nom_cast = B_FALSE; |
| /* |
| * If the IPMP meta-interface is down, we never did the join, |
| * so we must not try to leave. |
| */ |
| if (ipmp_ill->ill_dl_up) |
| ill_leave_multicast(ipmp_ill); |
| } |
| |
| /* |
| * Set new nomination. |
| */ |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| illg->ig_cast_ill = castill; |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| if (ocastill != NULL) { |
| /* |
| * Delete any IREs tied to the old nomination. We must do |
| * this after the new castill is set and has reached global |
| * visibility since the datapath has not been quiesced. |
| */ |
| ire_walk_ill(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, |
| ill_stq_cache_delete, ocastill, ocastill); |
| } |
| |
| /* |
| * Enable new nominated ill (if any). |
| */ |
| if (castill != NULL) { |
| DTRACE_PROBE2(ipmp__illgrp__cast__enable, ipmp_illgrp_t *, |
| illg, ill_t *, castill); |
| ASSERT(!castill->ill_nom_cast); |
| castill->ill_nom_cast = B_TRUE; |
| /* |
| * If the IPMP meta-interface is down, the attempt to recover |
| * will silently fail but ill_need_recover_multicast will be |
| * erroneously cleared -- so check first. |
| */ |
| if (ipmp_ill->ill_dl_up) |
| ill_recover_multicast(ipmp_ill); |
| } |
| |
| /* |
| * For IPv4, refresh our broadcast IREs. This needs to be done even |
| * if there's no new nomination since ill_refresh_bcast() still must |
| * update the IPMP meta-interface's broadcast IREs to point back at |
| * the IPMP meta-interface itself. |
| */ |
| if (!ipmp_ill->ill_isv6) |
| ill_refresh_bcast(ipmp_ill); |
| } |
| |
| /* |
| * Create an IPMP ARP entry and add it to the set tracked on `illg'. If an |
| * entry for the same IP address already exists, destroy it first. Return the |
| * created IPMP ARP entry, or NULL on failure. |
| */ |
| ipmp_arpent_t * |
| ipmp_illgrp_create_arpent(ipmp_illgrp_t *illg, mblk_t *mp, boolean_t proxyarp) |
| { |
| uchar_t *addrp; |
| area_t *area = (area_t *)mp->b_rptr; |
| ipmp_arpent_t *entp, *oentp; |
| |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| ASSERT(area->area_proto_addr_length == sizeof (ipaddr_t)); |
| |
| if ((entp = kmem_zalloc(sizeof (ipmp_arpent_t), KM_NOSLEEP)) == NULL) |
| return (NULL); |
| |
| if ((mp = copyb(mp)) == NULL) { |
| kmem_free(entp, sizeof (ipmp_arpent_t)); |
| return (NULL); |
| } |
| |
| DB_TYPE(mp) = M_PROTO; |
| entp->ia_area_mp = mp; |
| entp->ia_proxyarp = proxyarp; |
| addrp = mi_offset_paramc(mp, area->area_proto_addr_offset, |
| sizeof (ipaddr_t)); |
| bcopy(addrp, &entp->ia_ipaddr, sizeof (ipaddr_t)); |
| |
| if ((oentp = ipmp_illgrp_lookup_arpent(illg, &entp->ia_ipaddr)) != NULL) |
| ipmp_illgrp_destroy_arpent(illg, oentp); |
| |
| list_insert_head(&illg->ig_arpent, entp); |
| return (entp); |
| } |
| |
| /* |
| * Remove IPMP ARP entry `entp' from the set tracked on `illg' and destroy it. |
| */ |
| void |
| ipmp_illgrp_destroy_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp) |
| { |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| |
| list_remove(&illg->ig_arpent, entp); |
| freeb(entp->ia_area_mp); |
| kmem_free(entp, sizeof (ipmp_arpent_t)); |
| } |
| |
| /* |
| * Mark that ARP has been notified about the IP address on `entp'; `illg' is |
| * taken as a debugging aid for DTrace FBT probes. |
| */ |
| /* ARGSUSED */ |
| void |
| ipmp_illgrp_mark_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp) |
| { |
| entp->ia_notified = B_TRUE; |
| } |
| |
| /* |
| * Look up the IPMP ARP entry for IP address `addrp' on `illg'; if `addrp' is |
| * NULL, any IPMP ARP entry is requested. Return NULL if it does not exist. |
| */ |
| ipmp_arpent_t * |
| ipmp_illgrp_lookup_arpent(ipmp_illgrp_t *illg, ipaddr_t *addrp) |
| { |
| ipmp_arpent_t *entp = list_head(&illg->ig_arpent); |
| |
| ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill)); |
| |
| if (addrp == NULL) |
| return (entp); |
| |
| for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp)) |
| if (entp->ia_ipaddr == *addrp) |
| break; |
| return (entp); |
| } |
| |
| /* |
| * Refresh ARP entries on `illg' to be distributed across its active |
| * interfaces. Entries that cannot be refreshed (e.g., because there are no |
| * active interfaces) are marked so that subsequent calls can try again. |
| */ |
| void |
| ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *illg) |
| { |
| ill_t *ill, *ipmp_ill = illg->ig_ipmp_ill; |
| uint_t paddrlen = ipmp_ill->ill_phys_addr_length; |
| area_t *area; |
| mblk_t *area_mp; |
| uchar_t *physaddr; |
| ipmp_arpent_t *entp; |
| |
| ASSERT(IAM_WRITER_ILL(ipmp_ill)); |
| ASSERT(!ipmp_ill->ill_isv6); |
| |
| ill = list_head(&illg->ig_actif); |
| entp = list_head(&illg->ig_arpent); |
| for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp)) { |
| if (ill == NULL || ipmp_ill->ill_ipif_up_count == 0) { |
| entp->ia_notified = B_FALSE; |
| continue; |
| } |
| |
| area = (area_t *)entp->ia_area_mp->b_rptr; |
| ASSERT(paddrlen == ill->ill_phys_addr_length); |
| ASSERT(paddrlen == area->area_hw_addr_length); |
| physaddr = mi_offset_paramc(entp->ia_area_mp, |
| area->area_hw_addr_offset, paddrlen); |
| |
| /* |
| * If this is a proxy ARP entry, we can skip notifying ARP if |
| * the entry is already up-to-date. If it has changed, we |
| * update the entry's hardware address before notifying ARP. |
| */ |
| if (entp->ia_proxyarp) { |
| if (bcmp(ill->ill_phys_addr, physaddr, paddrlen) == 0 && |
| entp->ia_notified) |
| continue; |
| bcopy(ill->ill_phys_addr, physaddr, paddrlen); |
| } |
| |
| if ((area_mp = copyb(entp->ia_area_mp)) == NULL) { |
| entp->ia_notified = B_FALSE; |
| continue; |
| } |
| |
| putnext(ipmp_ill->ill_rq, area_mp); |
| ipmp_illgrp_mark_arpent(illg, entp); |
| |
| if ((ill = list_next(&illg->ig_actif, ill)) == NULL) |
| ill = list_head(&illg->ig_actif); |
| } |
| } |
| |
| /* |
| * Return an interface in `illg' with the specified `physaddr', or NULL if one |
| * doesn't exist. Caller must hold ill_g_lock if it's not inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_illgrp_find_ill(ipmp_illgrp_t *illg, uchar_t *physaddr, uint_t paddrlen) |
| { |
| ill_t *ill; |
| ill_t *ipmp_ill = illg->ig_ipmp_ill; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(IAM_WRITER_ILL(ipmp_ill) || RW_LOCK_HELD(&ipst->ips_ill_g_lock)); |
| |
| ill = list_head(&illg->ig_if); |
| for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) { |
| if (ill->ill_phys_addr_length == paddrlen && |
| bcmp(ill->ill_phys_addr, physaddr, paddrlen) == 0) |
| return (ill); |
| } |
| return (NULL); |
| } |
| |
| /* |
| * Asynchronously update the MTU for an IPMP ill by injecting a DL_NOTIFY_IND. |
| * Caller must be inside the IPSQ unless this is initialization. |
| */ |
| static void |
| ipmp_illgrp_set_mtu(ipmp_illgrp_t *illg, uint_t mtu) |
| { |
| ill_t *ill = illg->ig_ipmp_ill; |
| mblk_t *mp; |
| |
| ASSERT(illg->ig_mtu == 0 || IAM_WRITER_ILL(ill)); |
| |
| /* |
| * If allocation fails, we have bigger problems than MTU. |
| */ |
| if ((mp = ip_dlnotify_alloc(DL_NOTE_SDU_SIZE, mtu)) != NULL) { |
| illg->ig_mtu = mtu; |
| put(ill->ill_rq, mp); |
| } |
| } |
| |
| /* |
| * Recalculate the IPMP group MTU for `illg', and update its associated IPMP |
| * ill MTU if necessary. |
| */ |
| void |
| ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *illg) |
| { |
| ill_t *ill; |
| ill_t *ipmp_ill = illg->ig_ipmp_ill; |
| uint_t mtu = 0; |
| |
| ASSERT(IAM_WRITER_ILL(ipmp_ill)); |
| |
| /* |
| * Since ill_max_mtu can only change under ill_lock, we hold ill_lock |
| * for each ill as we iterate through the list. Any changes to the |
| * ill_max_mtu will also trigger an update, so even if we missed it |
| * this time around, the update will catch it. |
| */ |
| ill = list_head(&illg->ig_if); |
| for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) { |
| mutex_enter(&ill->ill_lock); |
| if (mtu == 0 || ill->ill_max_mtu < mtu) |
| mtu = ill->ill_max_mtu; |
| mutex_exit(&ill->ill_lock); |
| } |
| |
| /* |
| * MTU must be at least the minimum MTU. |
| */ |
| mtu = MAX(mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU); |
| |
| if (illg->ig_mtu != mtu) |
| ipmp_illgrp_set_mtu(illg, mtu); |
| } |
| |
| /* |
| * Link illgrp `illg' to IPMP group `grp'. To simplify the caller, silently |
| * allow the same link to be established more than once. |
| */ |
| void |
| ipmp_illgrp_link_grp(ipmp_illgrp_t *illg, ipmp_grp_t *grp) |
| { |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| if (illg->ig_ipmp_ill->ill_isv6) { |
| ASSERT(grp->gr_v6 == NULL || grp->gr_v6 == illg); |
| grp->gr_v6 = illg; |
| } else { |
| ASSERT(grp->gr_v4 == NULL || grp->gr_v4 == illg); |
| grp->gr_v4 = illg; |
| } |
| } |
| |
| /* |
| * Unlink illgrp `illg' from its IPMP group. Return an errno if the illgrp |
| * cannot be unlinked (e.g., because there are still interfaces using it). |
| */ |
| int |
| ipmp_illgrp_unlink_grp(ipmp_illgrp_t *illg) |
| { |
| ipmp_grp_t *grp = illg->ig_ipmp_ill->ill_phyint->phyint_grp; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock)); |
| |
| if (illg->ig_ipmp_ill->ill_isv6) { |
| if (grp->gr_nv6 + grp->gr_pendv6 != 0) |
| return (EBUSY); |
| grp->gr_v6 = NULL; |
| } else { |
| if (grp->gr_nv4 + grp->gr_pendv4 != 0) |
| return (EBUSY); |
| grp->gr_v4 = NULL; |
| } |
| return (0); |
| } |
| |
| /* |
| * Place `ill' into `illg', and rebalance the data addresses on `illg' |
| * to be spread evenly across the ills now in it. Also, adjust the IPMP |
| * ill as necessary to account for `ill' (e.g., MTU). |
| */ |
| void |
| ipmp_ill_join_illgrp(ill_t *ill, ipmp_illgrp_t *illg) |
| { |
| ill_t *ipmp_ill; |
| ipif_t *ipif; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| /* IS_UNDER_IPMP() requires ill_grp to be non-NULL */ |
| ASSERT(!IS_IPMP(ill) && ill->ill_phyint->phyint_grp != NULL); |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(ill->ill_grp == NULL); |
| |
| ipmp_ill = illg->ig_ipmp_ill; |
| |
| /* |
| * Account for `ill' joining the illgrp. |
| */ |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| if (ill->ill_isv6) |
| ill->ill_phyint->phyint_grp->gr_nv6++; |
| else |
| ill->ill_phyint->phyint_grp->gr_nv4++; |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| /* |
| * Ensure the ILLF_ROUTER flag remains consistent across the group. |
| */ |
| mutex_enter(&ill->ill_lock); |
| if (ipmp_ill->ill_flags & ILLF_ROUTER) |
| ill->ill_flags |= ILLF_ROUTER; |
| else |
| ill->ill_flags &= ~ILLF_ROUTER; |
| mutex_exit(&ill->ill_lock); |
| |
| /* |
| * Blow away all multicast memberships that currently exist on `ill'. |
| * This may seem odd, but it's consistent with the application view |
| * that `ill' no longer exists (e.g., due to ipmp_ill_rtsaddrmsg()). |
| */ |
| if (ill->ill_isv6) { |
| reset_conn_ill(ill); |
| reset_mrt_ill(ill); |
| } else { |
| ipif = ill->ill_ipif; |
| for (; ipif != NULL; ipif = ipif->ipif_next) { |
| reset_conn_ipif(ipif); |
| reset_mrt_vif_ipif(ipif); |
| } |
| } |
| ip_purge_allmulti(ill); |
| |
| /* |
| * Borrow the first ill's ill_phys_addr_length value for the illgrp's |
| * physical address length. All other ills must have the same value, |
| * since they are required to all be the same mactype. Also update |
| * the IPMP ill's MTU and CoS marking, if necessary. |
| */ |
| if (list_is_empty(&illg->ig_if)) { |
| ASSERT(ipmp_ill->ill_phys_addr_length == 0); |
| /* |
| * NOTE: we leave ill_phys_addr NULL since the IPMP group |
| * doesn't have a physical address. This means that code must |
| * not assume that ill_phys_addr is non-NULL just because |
| * ill_phys_addr_length is non-zero. Likewise for ill_nd_lla. |
| */ |
| ipmp_ill->ill_phys_addr_length = ill->ill_phys_addr_length; |
| ipmp_ill->ill_nd_lla_len = ill->ill_phys_addr_length; |
| ipmp_ill->ill_type = ill->ill_type; |
| |
| if (ill->ill_flags & ILLF_COS_ENABLED) { |
| mutex_enter(&ipmp_ill->ill_lock); |
| ipmp_ill->ill_flags |= ILLF_COS_ENABLED; |
| mutex_exit(&ipmp_ill->ill_lock); |
| } |
| ipmp_illgrp_set_mtu(illg, ill->ill_max_mtu); |
| } else { |
| ASSERT(ipmp_ill->ill_phys_addr_length == |
| ill->ill_phys_addr_length); |
| ASSERT(ipmp_ill->ill_type == ill->ill_type); |
| |
| if (!(ill->ill_flags & ILLF_COS_ENABLED)) { |
| mutex_enter(&ipmp_ill->ill_lock); |
| ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED; |
| mutex_exit(&ipmp_ill->ill_lock); |
| } |
| if (illg->ig_mtu > ill->ill_max_mtu) |
| ipmp_illgrp_set_mtu(illg, ill->ill_max_mtu); |
| } |
| |
| rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); |
| list_insert_tail(&illg->ig_if, ill); |
| ill->ill_grp = illg; |
| rw_exit(&ipst->ips_ill_g_lock); |
| |
| /* |
| * Hide the IREs on `ill' so that we don't accidentally find them when |
| * sending data traffic. |
| */ |
| ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_mark_testhidden, ill, ill); |
| |
| /* |
| * Merge any broadcast IREs, if need be. |
| */ |
| if (!ill->ill_isv6) |
| ill_refresh_bcast(ill); |
| |
| ipmp_ill_refresh_active(ill); |
| } |
| |
| /* |
| * Remove `ill' from its illgrp, and rebalance the data addresses in that |
| * illgrp to be spread evenly across the remaining ills. Also, adjust the |
| * IPMP ill as necessary now that `ill' is removed (e.g., MTU). |
| */ |
| void |
| ipmp_ill_leave_illgrp(ill_t *ill) |
| { |
| ill_t *ipmp_ill; |
| ipif_t *ipif; |
| ipmp_arpent_t *entp; |
| ipmp_illgrp_t *illg = ill->ill_grp; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(IS_UNDER_IPMP(ill)); |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(illg != NULL); |
| |
| ipmp_ill = illg->ig_ipmp_ill; |
| |
| /* |
| * Cancel IPMP-specific ill timeouts. |
| */ |
| (void) untimeout(ill->ill_refresh_tid); |
| |
| /* |
| * Expose any previously-hidden IREs on `ill'. |
| */ |
| ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_clear_testhidden, ill, ill); |
| |
| /* |
| * Ensure the multicast state for each ipif on `ill' is down so that |
| * our ipif_multicast_up() (once `ill' leaves the group) will rejoin |
| * all eligible groups. |
| */ |
| for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) |
| if (ipif->ipif_flags & IPIF_UP) |
| ipif_multicast_down(ipif); |
| |
| /* |
| * Account for `ill' leaving the illgrp. |
| */ |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| if (ill->ill_isv6) |
| ill->ill_phyint->phyint_grp->gr_nv6--; |
| else |
| ill->ill_phyint->phyint_grp->gr_nv4--; |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| /* |
| * Pull `ill' out of the interface lists. |
| */ |
| if (list_link_active(&ill->ill_actnode)) |
| ipmp_ill_deactivate(ill); |
| rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); |
| list_remove(&illg->ig_if, ill); |
| ill->ill_grp = NULL; |
| rw_exit(&ipst->ips_ill_g_lock); |
| |
| /* |
| * Recreate any broadcast IREs that had been shared, if need be. |
| */ |
| if (!ill->ill_isv6) |
| ill_refresh_bcast(ill); |
| |
| /* |
| * Re-establish multicast memberships that were previously being |
| * handled by the IPMP meta-interface. |
| */ |
| for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) |
| if (ipif->ipif_flags & IPIF_UP) |
| ipif_multicast_up(ipif); |
| |
| /* |
| * Refresh the group MTU based on the new interface list. |
| */ |
| ipmp_illgrp_refresh_mtu(illg); |
| |
| if (list_is_empty(&illg->ig_if)) { |
| /* |
| * No ills left in the illgrp; we no longer have a physical |
| * address length, nor can we support ARP, CoS, or anything |
| * else that depends on knowing the link layer type. |
| */ |
| while ((entp = ipmp_illgrp_lookup_arpent(illg, NULL)) != NULL) |
| ipmp_illgrp_destroy_arpent(illg, entp); |
| |
| ipmp_ill->ill_phys_addr_length = 0; |
| ipmp_ill->ill_nd_lla_len = 0; |
| ipmp_ill->ill_type = IFT_OTHER; |
| mutex_enter(&ipmp_ill->ill_lock); |
| ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED; |
| mutex_exit(&ipmp_ill->ill_lock); |
| } else { |
| /* |
| * If `ill' didn't support CoS, see if it can now be enabled. |
| */ |
| if (!(ill->ill_flags & ILLF_COS_ENABLED)) { |
| ASSERT(!(ipmp_ill->ill_flags & ILLF_COS_ENABLED)); |
| |
| ill = list_head(&illg->ig_if); |
| do { |
| if (!(ill->ill_flags & ILLF_COS_ENABLED)) |
| break; |
| } while ((ill = list_next(&illg->ig_if, ill)) != NULL); |
| |
| if (ill == NULL) { |
| mutex_enter(&ipmp_ill->ill_lock); |
| ipmp_ill->ill_flags |= ILLF_COS_ENABLED; |
| mutex_exit(&ipmp_ill->ill_lock); |
| } |
| } |
| } |
| } |
| |
| /* |
| * Check if `ill' should be active, and activate or deactivate if need be. |
| * Return B_FALSE if a refresh was necessary but could not be performed. |
| */ |
| static boolean_t |
| ipmp_ill_try_refresh_active(ill_t *ill) |
| { |
| boolean_t refreshed = B_TRUE; |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(IS_UNDER_IPMP(ill)); |
| |
| if (ipmp_ill_is_active(ill)) { |
| if (!list_link_active(&ill->ill_actnode)) |
| refreshed = ipmp_ill_activate(ill); |
| } else { |
| if (list_link_active(&ill->ill_actnode)) |
| ipmp_ill_deactivate(ill); |
| } |
| |
| return (refreshed); |
| } |
| |
| /* |
| * Check if `ill' should be active, and activate or deactivate if need be. |
| * If the refresh fails, schedule a timer to try again later. |
| */ |
| void |
| ipmp_ill_refresh_active(ill_t *ill) |
| { |
| if (!ipmp_ill_try_refresh_active(ill)) |
| ipmp_ill_refresh_active_timer_start(ill); |
| } |
| |
| /* |
| * Retry ipmp_ill_try_refresh_active() on the ill named by `ill_arg'. |
| */ |
| static void |
| ipmp_ill_refresh_active_timer(void *ill_arg) |
| { |
| ill_t *ill = ill_arg; |
| boolean_t refreshed = B_FALSE; |
| |
| /* |
| * Clear ill_refresh_tid to indicate that no timeout is pending |
| * (another thread could schedule a new timeout while we're still |
| * running, but that's harmless). If the ill is going away, bail. |
| */ |
| mutex_enter(&ill->ill_lock); |
| ill->ill_refresh_tid = 0; |
| if (ill->ill_state_flags & ILL_CONDEMNED) { |
| mutex_exit(&ill->ill_lock); |
| return; |
| } |
| mutex_exit(&ill->ill_lock); |
| |
| if (ipsq_try_enter(NULL, ill, NULL, NULL, NULL, NEW_OP, B_FALSE)) { |
| refreshed = ipmp_ill_try_refresh_active(ill); |
| ipsq_exit(ill->ill_phyint->phyint_ipsq); |
| } |
| |
| /* |
| * If the refresh failed, schedule another attempt. |
| */ |
| if (!refreshed) |
| ipmp_ill_refresh_active_timer_start(ill); |
| } |
| |
| /* |
| * Retry an ipmp_ill_try_refresh_active() on the ill named by `arg'. |
| */ |
| static void |
| ipmp_ill_refresh_active_timer_start(ill_t *ill) |
| { |
| mutex_enter(&ill->ill_lock); |
| |
| /* |
| * If the ill is going away or a refresh is already scheduled, bail. |
| */ |
| if (ill->ill_refresh_tid != 0 || |
| (ill->ill_state_flags & ILL_CONDEMNED)) { |
| mutex_exit(&ill->ill_lock); |
| return; |
| } |
| |
| ill->ill_refresh_tid = timeout(ipmp_ill_refresh_active_timer, ill, |
| SEC_TO_TICK(IPMP_ILL_REFRESH_TIMEOUT)); |
| |
| mutex_exit(&ill->ill_lock); |
| } |
| |
| /* |
| * Activate `ill' so it will be used to send and receive data traffic. Return |
| * B_FALSE if `ill' cannot be activated. Note that we allocate any messages |
| * needed to deactivate `ill' here as well so that deactivation cannot fail. |
| */ |
| static boolean_t |
| ipmp_ill_activate(ill_t *ill) |
| { |
| ipif_t *ipif; |
| mblk_t *actmp = NULL, *deactmp = NULL; |
| mblk_t *linkupmp = NULL, *linkdownmp = NULL; |
| ipmp_grp_t *grp = ill->ill_phyint->phyint_grp; |
| const char *grifname = grp->gr_ifname; |
| ipmp_illgrp_t *illg = ill->ill_grp; |
| ill_t *maxill; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(IS_UNDER_IPMP(ill)); |
| |
| /* |
| * If this will be the first active interface in the group, allocate |
| * the link-up and link-down messages. |
| */ |
| if (grp->gr_nactif == 0) { |
| linkupmp = ip_dlnotify_alloc(DL_NOTE_LINK_UP, 0); |
| linkdownmp = ip_dlnotify_alloc(DL_NOTE_LINK_DOWN, 0); |
| if (linkupmp == NULL || linkdownmp == NULL) |
| goto fail; |
| } |
| |
| /* |
| * For IPv4, allocate the activate/deactivate messages, and tell ARP. |
| */ |
| if (!ill->ill_isv6) { |
| actmp = ill_arie_alloc(ill, grifname, &ipmp_aract_template); |
| deactmp = ill_arie_alloc(ill, grifname, &ipmp_ardeact_template); |
| if (actmp == NULL || deactmp == NULL) |
| goto fail; |
| |
| ASSERT(ill->ill_ardeact_mp == NULL); |
| ill->ill_ardeact_mp = deactmp; |
| putnext(illg->ig_ipmp_ill->ill_rq, actmp); |
| } |
| |
| if (list_is_empty(&illg->ig_actif)) { |
| /* |
| * Now that we have an active ill, nominate it for multicast |
| * and broadcast duties. Do this before ipmp_ill_bind_ipif() |
| * since that may need to send multicast packets (e.g., IPv6 |
| * neighbor discovery probes). |
| */ |
| ipmp_illgrp_set_cast(illg, ill); |
| |
| /* |
| * This is the first active ill in the illgrp -- add 'em all. |
| * We can access/walk ig_ipmp_ill's ipif list since we're |
| * writer on its IPSQ as well. |
| */ |
| ipif = illg->ig_ipmp_ill->ill_ipif; |
| for (; ipif != NULL; ipif = ipif->ipif_next) |
| if (ipmp_ipif_is_up_dataaddr(ipif)) |
| ipmp_ill_bind_ipif(ill, ipif, Res_act_initial); |
| } else { |
| /* |
| * Redistribute the addresses by moving them from the ill with |
| * the most addresses until the ill being activated is at the |
| * same level as the rest of the ills. |
| */ |
| for (;;) { |
| maxill = ipmp_illgrp_max_ill(illg); |
| ASSERT(maxill != NULL); |
| if (ill->ill_bound_cnt + 1 >= maxill->ill_bound_cnt) |
| break; |
| ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE); |
| ipmp_ill_bind_ipif(ill, ipif, Res_act_rebind); |
| } |
| |
| /* |
| * TODO: explore whether it's advantageous to flush IRE_CACHE |
| * bindings to force existing connections to be redistributed |
| * to the new ill. |
| */ |
| } |
| |
| /* |
| * Put the interface in the active list. |
| */ |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| list_insert_tail(&illg->ig_actif, ill); |
| illg->ig_nactif++; |
| illg->ig_next_ill = ill; |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| /* |
| * Refresh ARP entries to use `ill', if need be. |
| */ |
| if (!ill->ill_isv6) |
| ipmp_illgrp_refresh_arpent(illg); |
| |
| /* |
| * Finally, mark the group link up, if necessary. |
| */ |
| if (grp->gr_nactif++ == 0) { |
| ASSERT(grp->gr_linkdownmp == NULL); |
| grp->gr_linkdownmp = linkdownmp; |
| put(illg->ig_ipmp_ill->ill_rq, linkupmp); |
| } |
| return (B_TRUE); |
| fail: |
| freemsg(actmp); |
| freemsg(deactmp); |
| freemsg(linkupmp); |
| freemsg(linkdownmp); |
| return (B_FALSE); |
| } |
| |
| /* |
| * Deactivate `ill' so it will not be used to send or receive data traffic. |
| */ |
| static void |
| ipmp_ill_deactivate(ill_t *ill) |
| { |
| ill_t *minill; |
| ipif_t *ipif, *ubnextipif, *ubheadipif = NULL; |
| mblk_t *mp; |
| ipmp_grp_t *grp = ill->ill_phyint->phyint_grp; |
| ipmp_illgrp_t *illg = ill->ill_grp; |
| ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg); |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(IS_UNDER_IPMP(ill)); |
| |
| /* |
| * Delete IRE_CACHE entries tied to this ill before they become stale. |
| */ |
| ire_walk_ill(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, |
| ill_stq_cache_delete, ill, ill); |
| |
| /* |
| * Pull the interface out of the active list. |
| */ |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| list_remove(&illg->ig_actif, ill); |
| illg->ig_nactif--; |
| illg->ig_next_ill = list_head(&illg->ig_actif); |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| /* |
| * If the ill that's being deactivated had been nominated for |
| * multicast/broadcast, nominate a new one. |
| */ |
| if (ill == illg->ig_cast_ill) |
| ipmp_illgrp_set_cast(illg, list_head(&illg->ig_actif)); |
| |
| /* |
| * Unbind all of the ipifs bound to this ill, and save 'em in a list; |
| * we'll rebind them after we tell the resolver the ill is no longer |
| * active. We must do things in this order or the resolver could |
| * accidentally rebind to the ill we're trying to remove if multiple |
| * ills in the group have the same hardware address (which is |
| * unsupported, but shouldn't lead to a wedged machine). |
| */ |
| while ((ipif = ipmp_ill_unbind_ipif(ill, NULL, B_TRUE)) != NULL) { |
| ipif->ipif_bound_next = ubheadipif; |
| ubheadipif = ipif; |
| } |
| |
| if (!ill->ill_isv6) { |
| /* |
| * Tell ARP `ill' is no longer active in the group. |
| */ |
| mp = ill->ill_ardeact_mp; |
| ill->ill_ardeact_mp = NULL; |
| ASSERT(mp != NULL); |
| putnext(illg->ig_ipmp_ill->ill_rq, mp); |
| |
| /* |
| * Refresh any ARP entries that had been using `ill'. |
| */ |
| ipmp_illgrp_refresh_arpent(illg); |
| } |
| |
| /* |
| * Rebind each ipif from the deactivated ill to the active ill with |
| * the fewest ipifs. If there are no active ills, the ipifs will |
| * remain unbound. |
| */ |
| for (ipif = ubheadipif; ipif != NULL; ipif = ubnextipif) { |
| ubnextipif = ipif->ipif_bound_next; |
| ipif->ipif_bound_next = NULL; |
| |
| if ((minill = ipmp_illgrp_min_ill(illg)) != NULL) |
| ipmp_ill_bind_ipif(minill, ipif, Res_act_rebind); |
| } |
| |
| /* |
| * Finally, mark the group link down, if necessary. |
| */ |
| if (--grp->gr_nactif == 0) { |
| mp = grp->gr_linkdownmp; |
| grp->gr_linkdownmp = NULL; |
| ASSERT(mp != NULL); |
| put(illg->ig_ipmp_ill->ill_rq, mp); |
| } |
| } |
| |
| /* |
| * Send the routing socket messages needed to make `ill' "appear" (RTM_ADD) |
| * or "disappear" (RTM_DELETE) to non-IPMP-aware routing socket listeners. |
| */ |
| static void |
| ipmp_ill_rtsaddrmsg(ill_t *ill, int cmd) |
| { |
| ipif_t *ipif; |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(cmd == RTM_ADD || cmd == RTM_DELETE); |
| |
| /* |
| * If `ill' is truly down, there are no messages to generate since: |
| * |
| * 1. If cmd == RTM_DELETE, then we're supposed to hide the interface |
| * and its addresses by bringing them down. But that's already |
| * true, so there's nothing to hide. |
| * |
| * 2. If cmd == RTM_ADD, then we're supposed to generate messages |
| * indicating that any previously-hidden up addresses are again |
| * back up (along with the interface). But they aren't, so |
| * there's nothing to expose. |
| */ |
| if (ill->ill_ipif_up_count == 0) |
| return; |
| |
| if (cmd == RTM_ADD) |
| ip_rts_xifmsg(ill->ill_ipif, IPIF_UP, 0, RTSQ_NORMAL); |
| |
| for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) |
| if (ipif->ipif_flags & IPIF_UP) |
| ip_rts_newaddrmsg(cmd, 0, ipif, RTSQ_NORMAL); |
| |
| if (cmd == RTM_DELETE) |
| ip_rts_xifmsg(ill->ill_ipif, 0, IPIF_UP, RTSQ_NORMAL); |
| } |
| |
| /* |
| * Bind the address named by `ipif' to the underlying ill named by `ill'. |
| * If `act' is Res_act_none, don't notify the resolver. Otherwise, `act' |
| * will indicate to the resolver whether this is an initial bringup of |
| * `ipif', or just a rebind to another ill. |
| */ |
| static void |
| ipmp_ill_bind_ipif(ill_t *ill, ipif_t *ipif, enum ip_resolver_action act) |
| { |
| int err = 0; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(IAM_WRITER_ILL(ill) && IAM_WRITER_IPIF(ipif)); |
| ASSERT(IS_UNDER_IPMP(ill) && IS_IPMP(ipif->ipif_ill)); |
| ASSERT(act == Res_act_none || ipmp_ipif_is_up_dataaddr(ipif)); |
| ASSERT(ipif->ipif_bound_ill == NULL); |
| ASSERT(ipif->ipif_bound_next == NULL); |
| |
| ipif->ipif_bound_next = ill->ill_bound_ipif; |
| ill->ill_bound_ipif = ipif; |
| ill->ill_bound_cnt++; |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| ipif->ipif_bound_ill = ill; |
| rw_exit(&ipst->ips_ipmp_lock); |
| |
| /* |
| * If necessary, tell ARP/NDP about the new mapping. Note that |
| * ipif_resolver_up() cannot fail for non-XRESOLV IPv6 ills. |
| */ |
| if (act != Res_act_none) { |
| if (ill->ill_isv6) { |
| VERIFY(ipif_resolver_up(ipif, act) == 0); |
| err = ipif_ndp_up(ipif, act == Res_act_initial); |
| } else { |
| err = ipif_resolver_up(ipif, act); |
| } |
| |
| /* |
| * Since ipif_ndp_up() never returns EINPROGRESS and |
| * ipif_resolver_up() only returns EINPROGRESS when the |
| * associated ill is not up, we should never be here with |
| * EINPROGRESS. We rely on this to simplify the design. |
| */ |
| ASSERT(err != EINPROGRESS); |
| } |
| /* TODO: retry binding on failure? when? */ |
| ipif->ipif_bound = (err == 0); |
| } |
| |
| /* |
| * Unbind the address named by `ipif' from the underlying ill named by `ill'. |
| * If `ipif' is NULL, then an arbitrary ipif on `ill' is unbound and returned. |
| * If no ipifs are bound to `ill', NULL is returned. If `notifyres' is |
| * B_TRUE, notify the resolver about the change. |
| */ |
| static ipif_t * |
| ipmp_ill_unbind_ipif(ill_t *ill, ipif_t *ipif, boolean_t notifyres) |
| { |
| ill_t *ipmp_ill; |
| ipif_t *previpif; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(IS_UNDER_IPMP(ill)); |
| |
| ipmp_ill = ill->ill_grp->ig_ipmp_ill; |
| |
| /* |
| * If necessary, find an ipif to unbind. |
| */ |
| if (ipif == NULL) { |
| if ((ipif = ill->ill_bound_ipif) == NULL) { |
| ASSERT(ill->ill_bound_cnt == 0); |
| return (NULL); |
| } |
| } |
| |
| ASSERT(IAM_WRITER_IPIF(ipif)); |
| ASSERT(IS_IPMP(ipif->ipif_ill)); |
| ASSERT(ipif->ipif_bound_ill == ill); |
| ASSERT(ill->ill_bound_cnt > 0); |
| |
| /* |
| * Unbind it. |
| */ |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| ipif->ipif_bound_ill = NULL; |
| rw_exit(&ipst->ips_ipmp_lock); |
| ill->ill_bound_cnt--; |
| |
| if (ill->ill_bound_ipif == ipif) { |
| ill->ill_bound_ipif = ipif->ipif_bound_next; |
| } else { |
| previpif = ill->ill_bound_ipif; |
| while (previpif->ipif_bound_next != ipif) |
| previpif = previpif->ipif_bound_next; |
| |
| previpif->ipif_bound_next = ipif->ipif_bound_next; |
| } |
| ipif->ipif_bound_next = NULL; |
| |
| /* |
| * If requested, notify the resolvers (provided we're bound). |
| */ |
| if (notifyres && ipif->ipif_bound) { |
| if (ill->ill_isv6) { |
| ipif_ndp_down(ipif); |
| } else { |
| ASSERT(ipif->ipif_arp_del_mp != NULL); |
| putnext(ipmp_ill->ill_rq, ipif->ipif_arp_del_mp); |
| ipif->ipif_arp_del_mp = NULL; |
| } |
| } |
| ipif->ipif_bound = B_FALSE; |
| |
| return (ipif); |
| } |
| |
| /* |
| * Check if `ill' is active. Caller must hold ill_lock and phyint_lock if |
| * it's not inside the IPSQ. Since ipmp_ill_try_refresh_active() calls this |
| * to determine whether an ill should be considered active, other consumers |
| * may race and learn about an ill that should be deactivated/activated before |
| * IPMP has performed the activation/deactivation. This should be safe though |
| * since at worst e.g. ire_atomic_start() will prematurely delete an IRE that |
| * would've been cleaned up by ipmp_ill_deactivate(). |
| */ |
| boolean_t |
| ipmp_ill_is_active(ill_t *ill) |
| { |
| phyint_t *phyi = ill->ill_phyint; |
| |
| ASSERT(IS_UNDER_IPMP(ill)); |
| ASSERT(IAM_WRITER_ILL(ill) || |
| (MUTEX_HELD(&ill->ill_lock) && MUTEX_HELD(&phyi->phyint_lock))); |
| |
| /* |
| * Note that PHYI_RUNNING isn't checked since we rely on in.mpathd to |
| * set PHYI_FAILED whenever PHYI_RUNNING is cleared. This allows the |
| * link flapping logic to be just in in.mpathd and allows us to ignore |
| * changes to PHYI_RUNNING. |
| */ |
| return (!(ill->ill_ipif_up_count == 0 || |
| (phyi->phyint_flags & (PHYI_OFFLINE|PHYI_INACTIVE|PHYI_FAILED)))); |
| } |
| |
| /* |
| * IRE walker callback: set IRE_MARK_TESTHIDDEN on cache/interface/offsubnet |
| * IREs with a source address on `ill_arg'. |
| */ |
| static void |
| ipmp_ill_ire_mark_testhidden(ire_t *ire, char *ill_arg) |
| { |
| ill_t *ill = (ill_t *)ill_arg; |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(!IS_IPMP(ill)); |
| |
| if (ire->ire_ipif->ipif_ill != ill) |
| return; |
| |
| switch (ire->ire_type) { |
| case IRE_HOST: |
| case IRE_PREFIX: |
| case IRE_DEFAULT: |
| case IRE_CACHE: |
| case IRE_IF_RESOLVER: |
| case IRE_IF_NORESOLVER: |
| DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire); |
| ire->ire_marks |= IRE_MARK_TESTHIDDEN; |
| break; |
| default: |
| break; |
| } |
| } |
| |
| /* |
| * IRE walker callback: clear IRE_MARK_TESTHIDDEN if the IRE has a source |
| * address on `ill_arg'. |
| */ |
| static void |
| ipmp_ill_ire_clear_testhidden(ire_t *ire, char *ill_arg) |
| { |
| ill_t *ill = (ill_t *)ill_arg; |
| |
| ASSERT(IAM_WRITER_ILL(ill)); |
| ASSERT(!IS_IPMP(ill)); |
| |
| if (ire->ire_ipif->ipif_ill == ill) { |
| DTRACE_PROBE1(ipmp__clear__testhidden, ire_t *, ire); |
| ire->ire_marks &= ~IRE_MARK_TESTHIDDEN; |
| } |
| } |
| |
| /* |
| * Return a held pointer to the IPMP ill for underlying interface `ill', or |
| * NULL if one doesn't exist. (Unfortunately, this function needs to take an |
| * underlying ill rather than an ipmp_illgrp_t because an underlying ill's |
| * ill_grp pointer may become stale when not under an IPSQ and not holding |
| * ipmp_lock.) Caller need not be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_ill_hold_ipmp_ill(ill_t *ill) |
| { |
| ip_stack_t *ipst = ill->ill_ipst; |
| ipmp_illgrp_t *illg; |
| |
| ASSERT(!IS_IPMP(ill)); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_READER); |
| illg = ill->ill_grp; |
| if (illg != NULL && ILL_CAN_LOOKUP(illg->ig_ipmp_ill)) { |
| ill_refhold(illg->ig_ipmp_ill); |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (illg->ig_ipmp_ill); |
| } |
| /* |
| * Assume `ill' was removed from the illgrp in the meantime. |
| */ |
| rw_exit(&ill->ill_ipst->ips_ipmp_lock); |
| return (NULL); |
| } |
| |
| /* |
| * Return the interface index for the IPMP ill tied to underlying interface |
| * `ill', or zero if one doesn't exist. Caller need not be inside the IPSQ. |
| */ |
| uint_t |
| ipmp_ill_get_ipmp_ifindex(const ill_t *ill) |
| { |
| uint_t ifindex = 0; |
| ip_stack_t *ipst = ill->ill_ipst; |
| ipmp_grp_t *grp; |
| |
| ASSERT(!IS_IPMP(ill)); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_READER); |
| if ((grp = ill->ill_phyint->phyint_grp) != NULL) |
| ifindex = grp->gr_phyint->phyint_ifindex; |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (ifindex); |
| } |
| |
| /* |
| * Place phyint `phyi' into IPMP group `grp'. |
| */ |
| void |
| ipmp_phyint_join_grp(phyint_t *phyi, ipmp_grp_t *grp) |
| { |
| ill_t *ill; |
| ipsq_t *ipsq = phyi->phyint_ipsq; |
| ipsq_t *grp_ipsq = grp->gr_phyint->phyint_ipsq; |
| ip_stack_t *ipst = PHYINT_TO_IPST(phyi); |
| |
| ASSERT(IAM_WRITER_IPSQ(ipsq)); |
| ASSERT(phyi->phyint_illv4 != NULL || phyi->phyint_illv6 != NULL); |
| |
| /* |
| * Send routing socket messages indicating that the phyint's ills |
| * and ipifs vanished. |
| */ |
| if (phyi->phyint_illv4 != NULL) { |
| ill = phyi->phyint_illv4; |
| ipmp_ill_rtsaddrmsg(ill, RTM_DELETE); |
| } |
| |
| if (phyi->phyint_illv6 != NULL) { |
| ill = phyi->phyint_illv6; |
| ipmp_ill_rtsaddrmsg(ill, RTM_DELETE); |
| } |
| |
| /* |
| * Snapshot the phyint's initial kstats as a baseline. |
| */ |
| ipmp_phyint_get_kstats(phyi, phyi->phyint_kstats0); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| |
| phyi->phyint_grp = grp; |
| if (++grp->gr_nif == 1) |
| grp->gr_mactype = ill->ill_mactype; |
| else |
| ASSERT(grp->gr_mactype == ill->ill_mactype); |
| |
| /* |
| * Now that we're in the group, request a switch to the group's xop |
| * when we ipsq_exit(). All future operations will be exclusive on |
| * the group xop until ipmp_phyint_leave_grp() is called. |
| */ |
| ASSERT(ipsq->ipsq_swxop == NULL); |
| ASSERT(grp_ipsq->ipsq_xop == &grp_ipsq->ipsq_ownxop); |
| ipsq->ipsq_swxop = &grp_ipsq->ipsq_ownxop; |
| |
| rw_exit(&ipst->ips_ipmp_lock); |
| } |
| |
| /* |
| * Remove phyint `phyi' from its current IPMP group. |
| */ |
| void |
| ipmp_phyint_leave_grp(phyint_t *phyi) |
| { |
| uint_t i; |
| ipsq_t *ipsq = phyi->phyint_ipsq; |
| ip_stack_t *ipst = PHYINT_TO_IPST(phyi); |
| uint64_t phyi_kstats[IPMP_KSTAT_MAX]; |
| |
| ASSERT(IAM_WRITER_IPSQ(ipsq)); |
| |
| /* |
| * If any of the phyint's ills are still in an illgrp, kick 'em out. |
| */ |
| if (phyi->phyint_illv4 != NULL && IS_UNDER_IPMP(phyi->phyint_illv4)) |
| ipmp_ill_leave_illgrp(phyi->phyint_illv4); |
| if (phyi->phyint_illv6 != NULL && IS_UNDER_IPMP(phyi->phyint_illv6)) |
| ipmp_ill_leave_illgrp(phyi->phyint_illv6); |
| |
| /* |
| * Send routing socket messages indicating that the phyint's ills |
| * and ipifs have reappeared. |
| */ |
| if (phyi->phyint_illv4 != NULL) |
| ipmp_ill_rtsaddrmsg(phyi->phyint_illv4, RTM_ADD); |
| if (phyi->phyint_illv6 != NULL) |
| ipmp_ill_rtsaddrmsg(phyi->phyint_illv6, RTM_ADD); |
| |
| /* |
| * Calculate the phyint's cumulative kstats while it was in the group, |
| * and add that to the group's baseline. |
| */ |
| ipmp_phyint_get_kstats(phyi, phyi_kstats); |
| for (i = 0; i < IPMP_KSTAT_MAX; i++) { |
| phyi_kstats[i] -= phyi->phyint_kstats0[i]; |
| atomic_add_64(&phyi->phyint_grp->gr_kstats0[i], phyi_kstats[i]); |
| } |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_WRITER); |
| |
| phyi->phyint_grp->gr_nif--; |
| phyi->phyint_grp = NULL; |
| |
| /* |
| * As our final act in leaving the group, request a switch back to our |
| * IPSQ's own xop when we ipsq_exit(). |
| */ |
| ASSERT(ipsq->ipsq_swxop == NULL); |
| ipsq->ipsq_swxop = &ipsq->ipsq_ownxop; |
| |
| rw_exit(&ipst->ips_ipmp_lock); |
| } |
| |
| /* |
| * Store the IPMP-related kstats for `phyi' into the array named by `kstats'. |
| * Assumes that `kstats' has at least IPMP_KSTAT_MAX elements. |
| */ |
| static void |
| ipmp_phyint_get_kstats(phyint_t *phyi, uint64_t kstats[]) |
| { |
| uint_t i, j; |
| const char *name; |
| kstat_t *ksp; |
| kstat_named_t *kn; |
| |
| bzero(kstats, sizeof (kstats[0]) * IPMP_KSTAT_MAX); |
| |
| /* |
| * NOTE: ALL_ZONES here assumes that there's at most one link |
| * with a given name on a given system (safe for now). |
| */ |
| ksp = kstat_hold_byname("link", 0, phyi->phyint_name, ALL_ZONES); |
| if (ksp == NULL) |
| return; |
| |
| KSTAT_ENTER(ksp); |
| |
| if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED) { |
| /* |
| * Bring kstats up-to-date before recording. |
| */ |
| (void) KSTAT_UPDATE(ksp, KSTAT_READ); |
| |
| kn = KSTAT_NAMED_PTR(ksp); |
| for (i = 0; i < IPMP_KSTAT_MAX; i++) { |
| name = ipmp_kstats[i].name; |
| kstats[i] = 0; |
| for (j = 0; j < ksp->ks_ndata; j++) { |
| if (strcmp(kn[j].name, name) != 0) |
| continue; |
| |
| switch (kn[j].data_type) { |
| case KSTAT_DATA_INT32: |
| case KSTAT_DATA_UINT32: |
| kstats[i] = kn[j].value.ui32; |
| break; |
| #ifdef _LP64 |
| case KSTAT_DATA_LONG: |
| case KSTAT_DATA_ULONG: |
| kstats[i] = kn[j].value.ul; |
| break; |
| #endif |
| case KSTAT_DATA_INT64: |
| case KSTAT_DATA_UINT64: |
| kstats[i] = kn[j].value.ui64; |
| break; |
| } |
| break; |
| } |
| } |
| } |
| |
| KSTAT_EXIT(ksp); |
| kstat_rele(ksp); |
| } |
| |
| /* |
| * Refresh the active state of all ills on `phyi'. |
| */ |
| void |
| ipmp_phyint_refresh_active(phyint_t *phyi) |
| { |
| if (phyi->phyint_illv4 != NULL) |
| ipmp_ill_refresh_active(phyi->phyint_illv4); |
| if (phyi->phyint_illv6 != NULL) |
| ipmp_ill_refresh_active(phyi->phyint_illv6); |
| } |
| |
| /* |
| * Return a held pointer to the underlying ill bound to `ipif', or NULL if one |
| * doesn't exist. Caller need not be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_ipif_hold_bound_ill(const ipif_t *ipif) |
| { |
| ill_t *boundill; |
| ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; |
| |
| ASSERT(IS_IPMP(ipif->ipif_ill)); |
| |
| rw_enter(&ipst->ips_ipmp_lock, RW_READER); |
| boundill = ipif->ipif_bound_ill; |
| if (boundill != NULL && ILL_CAN_LOOKUP(boundill)) { |
| ill_refhold(boundill); |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (boundill); |
| } |
| rw_exit(&ipst->ips_ipmp_lock); |
| return (NULL); |
| } |
| |
| /* |
| * Return a pointer to the underlying ill bound to `ipif', or NULL if one |
| * doesn't exist. Caller must be inside the IPSQ. |
| */ |
| ill_t * |
| ipmp_ipif_bound_ill(const ipif_t *ipif) |
| { |
| ASSERT(IAM_WRITER_ILL(ipif->ipif_ill)); |
| ASSERT(IS_IPMP(ipif->ipif_ill)); |
| |
| return (ipif->ipif_bound_ill); |
| } |
| |
| /* |
| * Check if `ipif' is a "stub" (placeholder address not being used). |
| */ |
| boolean_t |
| ipmp_ipif_is_stubaddr(const ipif_t *ipif) |
| { |
| if (ipif->ipif_flags & IPIF_UP) |
| return (B_FALSE); |
| if (ipif->ipif_ill->ill_isv6) |
| return (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)); |
| else |
| return (ipif->ipif_lcl_addr == INADDR_ANY); |
| } |
| |
| /* |
| * Check if `ipif' is an IPMP data address. |
| */ |
| boolean_t |
| ipmp_ipif_is_dataaddr(const ipif_t *ipif) |
| { |
| if (ipif->ipif_flags & IPIF_NOFAILOVER) |
| return (B_FALSE); |
| if (ipif->ipif_ill->ill_isv6) |
| return (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)); |
| else |
| return (ipif->ipif_lcl_addr != INADDR_ANY); |
| } |
| |
| /* |
| * Check if `ipif' is an IPIF_UP IPMP data address. |
| */ |
| static boolean_t |
| ipmp_ipif_is_up_dataaddr(const ipif_t *ipif) |
| { |
| return (ipmp_ipif_is_dataaddr(ipif) && (ipif->ipif_flags & IPIF_UP)); |
| } |