| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/stream.h> |
| #include <sys/stropts.h> |
| #include <sys/strsun.h> |
| #include <sys/sysmacros.h> |
| #include <sys/errno.h> |
| #include <sys/dlpi.h> |
| #include <sys/socket.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/cmn_err.h> |
| #include <sys/debug.h> |
| #include <sys/vtrace.h> |
| #include <sys/kmem.h> |
| #include <sys/zone.h> |
| #include <sys/ethernet.h> |
| #include <sys/sdt.h> |
| #include <sys/mac.h> |
| |
| #include <net/if.h> |
| #include <net/if_types.h> |
| #include <net/if_dl.h> |
| #include <net/route.h> |
| #include <netinet/in.h> |
| #include <netinet/ip6.h> |
| #include <netinet/icmp6.h> |
| |
| #include <inet/common.h> |
| #include <inet/mi.h> |
| #include <inet/mib2.h> |
| #include <inet/nd.h> |
| #include <inet/ip.h> |
| #include <inet/ip_impl.h> |
| #include <inet/ipclassifier.h> |
| #include <inet/ip_if.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ip_rts.h> |
| #include <inet/ip6.h> |
| #include <inet/ip_ndp.h> |
| #include <inet/sctp_ip.h> |
| #include <inet/ip_arp.h> |
| #include <inet/ip2mac_impl.h> |
| |
| #define ANNOUNCE_INTERVAL(isv6) \ |
| (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \ |
| ipst->ips_ip_arp_publish_interval) |
| |
| #define DEFENSE_INTERVAL(isv6) \ |
| (isv6 ? ipst->ips_ndp_defend_interval : \ |
| ipst->ips_arp_defend_interval) |
| |
| /* Non-tunable probe interval, based on link capabilities */ |
| #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) |
| |
| /* |
| * The IPv4 Link Local address space is special; we do extra duplicate checking |
| * there, as the entire assignment mechanism rests on random numbers. |
| */ |
| #define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \ |
| ((uchar_t *)ptr)[1] == 254) |
| |
| /* |
| * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed |
| * in to the ncec*add* functions. |
| * |
| * NCE_F_AUTHORITY means that we ignore any incoming adverts for that |
| * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means |
| * that we will respond to requests for the protocol address. |
| */ |
| #define NCE_EXTERNAL_FLAGS_MASK \ |
| (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \ |
| NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \ |
| NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC) |
| |
| /* |
| * Lock ordering: |
| * |
| * ndp_g_lock -> ill_lock -> ncec_lock |
| * |
| * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and |
| * ncec_next. ncec_lock protects the contents of the NCE (particularly |
| * ncec_refcnt). |
| */ |
| |
| static void nce_cleanup_list(ncec_t *ncec); |
| static void nce_set_ll(ncec_t *ncec, uchar_t *ll_addr); |
| static ncec_t *ncec_lookup_illgrp(ill_t *, const in6_addr_t *, |
| ncec_t *); |
| static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *); |
| static int nce_set_multicast_v6(ill_t *ill, const in6_addr_t *addr, |
| uint16_t ncec_flags, nce_t **newnce); |
| static int nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst, |
| uint16_t ncec_flags, nce_t **newnce); |
| static boolean_t ndp_xmit(ill_t *ill, uint32_t operation, |
| uint8_t *hwaddr, uint_t hwaddr_len, const in6_addr_t *sender, |
| const in6_addr_t *target, int flag); |
| static void ncec_refhold_locked(ncec_t *); |
| static boolean_t ill_defend_rate_limit(ill_t *, ncec_t *); |
| static void nce_queue_mp_common(ncec_t *, mblk_t *, boolean_t); |
| static int nce_add_common(ill_t *, uchar_t *, uint_t, const in6_addr_t *, |
| uint16_t, uint16_t, nce_t **); |
| static nce_t *nce_add_impl(ill_t *, ncec_t *, nce_t *, mblk_t *); |
| static nce_t *nce_add(ill_t *, ncec_t *); |
| static void nce_inactive(nce_t *); |
| extern nce_t *nce_lookup(ill_t *, const in6_addr_t *); |
| static nce_t *nce_ill_lookup_then_add(ill_t *, ncec_t *); |
| static int nce_add_v6(ill_t *, uchar_t *, uint_t, const in6_addr_t *, |
| uint16_t, uint16_t, nce_t **); |
| static int nce_add_v4(ill_t *, uchar_t *, uint_t, const in_addr_t *, |
| uint16_t, uint16_t, nce_t **); |
| static int nce_add_v6_postprocess(nce_t *); |
| static int nce_add_v4_postprocess(nce_t *); |
| static ill_t *nce_resolve_src(ncec_t *, in6_addr_t *); |
| static clock_t nce_fuzz_interval(clock_t, boolean_t); |
| static void nce_resolv_ipmp_ok(ncec_t *); |
| static void nce_walk_common(ill_t *, pfi_t, void *); |
| static void nce_start_timer(ncec_t *, uint_t); |
| static nce_t *nce_fastpath_create(ill_t *, ncec_t *); |
| static void nce_fastpath_trigger(nce_t *); |
| static nce_t *nce_fastpath(ncec_t *, boolean_t, nce_t *); |
| |
| #ifdef DEBUG |
| static void ncec_trace_cleanup(const ncec_t *); |
| #endif |
| |
| #define NCE_HASH_PTR_V4(ipst, addr) \ |
| (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) |
| |
| #define NCE_HASH_PTR_V6(ipst, addr) \ |
| (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ |
| NCE_TABLE_SIZE)])) |
| |
| extern kmem_cache_t *ncec_cache; |
| extern kmem_cache_t *nce_cache; |
| |
| /* |
| * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe |
| * If src_ill is not null, the ncec_addr is bound to src_ill. The |
| * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where |
| * the probe is sent on the ncec_ill (in the non-IPMP case) or the |
| * IPMP cast_ill (in the IPMP case). |
| * |
| * Note that the probe interval is based on the src_ill for IPv6, and |
| * the ncec_xmit_interval for IPv4. |
| */ |
| static void |
| nce_dad(ncec_t *ncec, ill_t *src_ill, boolean_t send_probe) |
| { |
| boolean_t dropped; |
| uint32_t probe_interval; |
| |
| ASSERT(!(ncec->ncec_flags & NCE_F_MCAST)); |
| ASSERT(!(ncec->ncec_flags & NCE_F_BCAST)); |
| if (ncec->ncec_ipversion == IPV6_VERSION) { |
| dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT, |
| ncec->ncec_lladdr, ncec->ncec_lladdr_length, |
| &ipv6_all_zeros, &ncec->ncec_addr, NDP_PROBE); |
| probe_interval = ILL_PROBE_INTERVAL(src_ill); |
| } else { |
| /* IPv4 DAD delay the initial probe. */ |
| if (send_probe) |
| dropped = arp_probe(ncec); |
| else |
| dropped = B_TRUE; |
| probe_interval = nce_fuzz_interval(ncec->ncec_xmit_interval, |
| !send_probe); |
| } |
| if (!dropped) { |
| mutex_enter(&ncec->ncec_lock); |
| ncec->ncec_pcnt--; |
| mutex_exit(&ncec->ncec_lock); |
| } |
| nce_restart_timer(ncec, probe_interval); |
| } |
| |
| /* |
| * Compute default flags to use for an advertisement of this ncec's address. |
| */ |
| static int |
| nce_advert_flags(const ncec_t *ncec) |
| { |
| int flag = 0; |
| |
| if (ncec->ncec_flags & NCE_F_ISROUTER) |
| flag |= NDP_ISROUTER; |
| if (!(ncec->ncec_flags & NCE_F_ANYCAST)) |
| flag |= NDP_ORIDE; |
| |
| return (flag); |
| } |
| |
| /* |
| * NDP Cache Entry creation routine. |
| * This routine must always be called with ndp6->ndp_g_lock held. |
| */ |
| int |
| nce_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, |
| const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce) |
| { |
| int err; |
| nce_t *nce; |
| |
| ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock)); |
| ASSERT(ill != NULL && ill->ill_isv6); |
| |
| err = nce_add_common(ill, hw_addr, hw_addr_len, addr, flags, state, |
| &nce); |
| if (err != 0) |
| return (err); |
| ASSERT(newnce != NULL); |
| *newnce = nce; |
| return (err); |
| } |
| |
| /* |
| * Post-processing routine to be executed after nce_add_v6(). This function |
| * triggers fastpath (if appropriate) and DAD on the newly added nce entry |
| * and must be called without any locks held. |
| */ |
| int |
| nce_add_v6_postprocess(nce_t *nce) |
| { |
| ncec_t *ncec = nce->nce_common; |
| boolean_t dropped = B_FALSE; |
| uchar_t *hw_addr = ncec->ncec_lladdr; |
| uint_t hw_addr_len = ncec->ncec_lladdr_length; |
| ill_t *ill = ncec->ncec_ill; |
| int err = 0; |
| uint16_t flags = ncec->ncec_flags; |
| ip_stack_t *ipst = ill->ill_ipst; |
| boolean_t trigger_fastpath = B_TRUE; |
| |
| /* |
| * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then |
| * we call nce_fastpath as soon as the ncec is resolved in nce_process. |
| * We call nce_fastpath from nce_update if the link layer address of |
| * the peer changes from nce_update |
| */ |
| if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) || |
| (hw_addr == NULL && ill->ill_net_type != IRE_IF_NORESOLVER)) |
| trigger_fastpath = B_FALSE; |
| |
| if (trigger_fastpath) |
| nce_fastpath_trigger(nce); |
| if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) { |
| ill_t *hwaddr_ill; |
| /* |
| * Unicast entry that needs DAD. |
| */ |
| if (IS_IPMP(ill)) { |
| hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp, |
| hw_addr, hw_addr_len); |
| } else { |
| hwaddr_ill = ill; |
| } |
| nce_dad(ncec, hwaddr_ill, B_TRUE); |
| err = EINPROGRESS; |
| } else if (flags & NCE_F_UNSOL_ADV) { |
| /* |
| * We account for the transmit below by assigning one |
| * less than the ndd variable. Subsequent decrements |
| * are done in nce_timer. |
| */ |
| mutex_enter(&ncec->ncec_lock); |
| ncec->ncec_unsolicit_count = |
| ipst->ips_ip_ndp_unsolicit_count - 1; |
| mutex_exit(&ncec->ncec_lock); |
| dropped = ndp_xmit(ill, |
| ND_NEIGHBOR_ADVERT, |
| hw_addr, |
| hw_addr_len, |
| &ncec->ncec_addr, /* Source and target of the adv */ |
| &ipv6_all_hosts_mcast, /* Destination of the packet */ |
| nce_advert_flags(ncec)); |
| mutex_enter(&ncec->ncec_lock); |
| if (dropped) |
| ncec->ncec_unsolicit_count++; |
| else |
| ncec->ncec_last_time_defended = ddi_get_lbolt(); |
| if (ncec->ncec_unsolicit_count != 0) { |
| nce_start_timer(ncec, |
| ipst->ips_ip_ndp_unsolicit_interval); |
| } |
| mutex_exit(&ncec->ncec_lock); |
| } |
| return (err); |
| } |
| |
| /* |
| * Atomically lookup and add (if needed) Neighbor Cache information for |
| * an address. |
| * |
| * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses |
| * are always added pointing at the ipmp_ill. Thus, when the ill passed |
| * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t |
| * entries will be created, both pointing at the same ncec_t. The nce_t |
| * entries will have their nce_ill set to the ipmp_ill and the under_ill |
| * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill. |
| * Local addresses are always created on the ill passed to nce_add_v6. |
| */ |
| int |
| nce_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, |
| const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce) |
| { |
| int err = 0; |
| ip_stack_t *ipst = ill->ill_ipst; |
| nce_t *nce, *upper_nce = NULL; |
| ill_t *in_ill = ill; |
| boolean_t need_ill_refrele = B_FALSE; |
| |
| if (flags & NCE_F_MCAST) { |
| /* |
| * hw_addr will be figured out in nce_set_multicast_v6; |
| * caller has to select the cast_ill |
| */ |
| ASSERT(hw_addr == NULL); |
| ASSERT(!IS_IPMP(ill)); |
| err = nce_set_multicast_v6(ill, addr, flags, newnce); |
| return (err); |
| } |
| ASSERT(ill->ill_isv6); |
| if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) { |
| ill = ipmp_ill_hold_ipmp_ill(ill); |
| if (ill == NULL) |
| return (ENXIO); |
| need_ill_refrele = B_TRUE; |
| } |
| |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| nce = nce_lookup_addr(ill, addr); |
| if (nce == NULL) { |
| err = nce_add_v6(ill, hw_addr, hw_addr_len, addr, flags, state, |
| &nce); |
| } else { |
| err = EEXIST; |
| } |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| if (err == 0) |
| err = nce_add_v6_postprocess(nce); |
| if (in_ill != ill && nce != NULL) { |
| nce_t *under_nce = NULL; |
| |
| /* |
| * in_ill was the under_ill. Try to create the under_nce. |
| * Hold the ill_g_lock to prevent changes to group membership |
| * until we are done. |
| */ |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| if (!IS_IN_SAME_ILLGRP(in_ill, ill)) { |
| DTRACE_PROBE2(ill__not__in__group, nce_t *, nce, |
| ill_t *, ill); |
| rw_exit(&ipst->ips_ill_g_lock); |
| err = ENXIO; |
| nce_refrele(nce); |
| nce = NULL; |
| goto bail; |
| } |
| under_nce = nce_fastpath_create(in_ill, nce->nce_common); |
| if (under_nce == NULL) { |
| rw_exit(&ipst->ips_ill_g_lock); |
| err = EINVAL; |
| nce_refrele(nce); |
| nce = NULL; |
| goto bail; |
| } |
| rw_exit(&ipst->ips_ill_g_lock); |
| upper_nce = nce; |
| nce = under_nce; /* will be returned to caller */ |
| if (NCE_ISREACHABLE(nce->nce_common)) |
| nce_fastpath_trigger(under_nce); |
| } |
| /* nce_refrele is deferred until the lock is dropped */ |
| if (nce != NULL) { |
| if (newnce != NULL) |
| *newnce = nce; |
| else |
| nce_refrele(nce); |
| } |
| bail: |
| if (upper_nce != NULL) |
| nce_refrele(upper_nce); |
| if (need_ill_refrele) |
| ill_refrele(ill); |
| return (err); |
| } |
| |
| /* |
| * Remove all the CONDEMNED nces from the appropriate hash table. |
| * We create a private list of NCEs, these may have ires pointing |
| * to them, so the list will be passed through to clean up dependent |
| * ires and only then we can do ncec_refrele() which can make NCE inactive. |
| */ |
| static void |
| nce_remove(ndp_g_t *ndp, ncec_t *ncec, ncec_t **free_nce_list) |
| { |
| ncec_t *ncec1; |
| ncec_t **ptpn; |
| |
| ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); |
| ASSERT(ndp->ndp_g_walker == 0); |
| for (; ncec; ncec = ncec1) { |
| ncec1 = ncec->ncec_next; |
| mutex_enter(&ncec->ncec_lock); |
| if (NCE_ISCONDEMNED(ncec)) { |
| ptpn = ncec->ncec_ptpn; |
| ncec1 = ncec->ncec_next; |
| if (ncec1 != NULL) |
| ncec1->ncec_ptpn = ptpn; |
| *ptpn = ncec1; |
| ncec->ncec_ptpn = NULL; |
| ncec->ncec_next = NULL; |
| ncec->ncec_next = *free_nce_list; |
| *free_nce_list = ncec; |
| } |
| mutex_exit(&ncec->ncec_lock); |
| } |
| } |
| |
| /* |
| * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup() |
| * will return this NCE. Also no new timeouts will |
| * be started (See nce_restart_timer). |
| * 2. Cancel any currently running timeouts. |
| * 3. If there is an ndp walker, return. The walker will do the cleanup. |
| * This ensures that walkers see a consistent list of NCEs while walking. |
| * 4. Otherwise remove the NCE from the list of NCEs |
| */ |
| void |
| ncec_delete(ncec_t *ncec) |
| { |
| ncec_t **ptpn; |
| ncec_t *ncec1; |
| int ipversion = ncec->ncec_ipversion; |
| ndp_g_t *ndp; |
| ip_stack_t *ipst = ncec->ncec_ipst; |
| |
| if (ipversion == IPV4_VERSION) |
| ndp = ipst->ips_ndp4; |
| else |
| ndp = ipst->ips_ndp6; |
| |
| /* Serialize deletes */ |
| mutex_enter(&ncec->ncec_lock); |
| if (NCE_ISCONDEMNED(ncec)) { |
| /* Some other thread is doing the delete */ |
| mutex_exit(&ncec->ncec_lock); |
| return; |
| } |
| /* |
| * Caller has a refhold. Also 1 ref for being in the list. Thus |
| * refcnt has to be >= 2 |
| */ |
| ASSERT(ncec->ncec_refcnt >= 2); |
| ncec->ncec_flags |= NCE_F_CONDEMNED; |
| mutex_exit(&ncec->ncec_lock); |
| |
| /* Count how many condemned ires for kmem_cache callback */ |
| atomic_inc_32(&ipst->ips_num_nce_condemned); |
| nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL); |
| |
| /* Complete any waiting callbacks */ |
| ncec_cb_dispatch(ncec); |
| |
| /* |
| * Cancel any running timer. Timeout can't be restarted |
| * since CONDEMNED is set. Can't hold ncec_lock across untimeout. |
| * Passing invalid timeout id is fine. |
| */ |
| if (ncec->ncec_timeout_id != 0) { |
| (void) untimeout(ncec->ncec_timeout_id); |
| ncec->ncec_timeout_id = 0; |
| } |
| |
| mutex_enter(&ndp->ndp_g_lock); |
| if (ncec->ncec_ptpn == NULL) { |
| /* |
| * The last ndp walker has already removed this ncec from |
| * the list after we marked the ncec CONDEMNED and before |
| * we grabbed the global lock. |
| */ |
| mutex_exit(&ndp->ndp_g_lock); |
| return; |
| } |
| if (ndp->ndp_g_walker > 0) { |
| /* |
| * Can't unlink. The walker will clean up |
| */ |
| ndp->ndp_g_walker_cleanup = B_TRUE; |
| mutex_exit(&ndp->ndp_g_lock); |
| return; |
| } |
| |
| /* |
| * Now remove the ncec from the list. nce_restart_timer won't restart |
| * the timer since it is marked CONDEMNED. |
| */ |
| ptpn = ncec->ncec_ptpn; |
| ncec1 = ncec->ncec_next; |
| if (ncec1 != NULL) |
| ncec1->ncec_ptpn = ptpn; |
| *ptpn = ncec1; |
| ncec->ncec_ptpn = NULL; |
| ncec->ncec_next = NULL; |
| mutex_exit(&ndp->ndp_g_lock); |
| |
| /* Removed from ncec_ptpn/ncec_next list */ |
| ncec_refrele_notr(ncec); |
| } |
| |
| void |
| ncec_inactive(ncec_t *ncec) |
| { |
| mblk_t **mpp; |
| ill_t *ill = ncec->ncec_ill; |
| ip_stack_t *ipst = ncec->ncec_ipst; |
| |
| ASSERT(ncec->ncec_refcnt == 0); |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| |
| /* Count how many condemned nces for kmem_cache callback */ |
| if (NCE_ISCONDEMNED(ncec)) |
| atomic_add_32(&ipst->ips_num_nce_condemned, -1); |
| |
| /* Free all allocated messages */ |
| mpp = &ncec->ncec_qd_mp; |
| while (*mpp != NULL) { |
| mblk_t *mp; |
| |
| mp = *mpp; |
| *mpp = mp->b_next; |
| |
| inet_freemsg(mp); |
| } |
| /* |
| * must have been cleaned up in ncec_delete |
| */ |
| ASSERT(list_is_empty(&ncec->ncec_cb)); |
| list_destroy(&ncec->ncec_cb); |
| /* |
| * free the ncec_lladdr if one was allocated in nce_add_common() |
| */ |
| if (ncec->ncec_lladdr_length > 0) |
| kmem_free(ncec->ncec_lladdr, ncec->ncec_lladdr_length); |
| |
| #ifdef DEBUG |
| ncec_trace_cleanup(ncec); |
| #endif |
| |
| mutex_enter(&ill->ill_lock); |
| DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, |
| (char *), "ncec", (void *), ncec); |
| ill->ill_ncec_cnt--; |
| ncec->ncec_ill = NULL; |
| /* |
| * If the number of ncec's associated with this ill have dropped |
| * to zero, check whether we need to restart any operation that |
| * is waiting for this to happen. |
| */ |
| if (ILL_DOWN_OK(ill)) { |
| /* ipif_ill_refrele_tail drops the ill_lock */ |
| ipif_ill_refrele_tail(ill); |
| } else { |
| mutex_exit(&ill->ill_lock); |
| } |
| |
| mutex_destroy(&ncec->ncec_lock); |
| kmem_cache_free(ncec_cache, ncec); |
| } |
| |
| /* |
| * ncec_walk routine. Delete the ncec if it is associated with the ill |
| * that is going away. Always called as a writer. |
| */ |
| void |
| ncec_delete_per_ill(ncec_t *ncec, void *arg) |
| { |
| if ((ncec != NULL) && ncec->ncec_ill == arg) { |
| ncec_delete(ncec); |
| } |
| } |
| |
| /* |
| * Neighbor Cache cleanup logic for a list of ncec_t entries. |
| */ |
| static void |
| nce_cleanup_list(ncec_t *ncec) |
| { |
| ncec_t *ncec_next; |
| |
| ASSERT(ncec != NULL); |
| while (ncec != NULL) { |
| ncec_next = ncec->ncec_next; |
| ncec->ncec_next = NULL; |
| |
| /* |
| * It is possible for the last ndp walker (this thread) |
| * to come here after ncec_delete has marked the ncec CONDEMNED |
| * and before it has removed the ncec from the fastpath list |
| * or called untimeout. So we need to do it here. It is safe |
| * for both ncec_delete and this thread to do it twice or |
| * even simultaneously since each of the threads has a |
| * reference on the ncec. |
| */ |
| nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL); |
| /* |
| * Cancel any running timer. Timeout can't be restarted |
| * since CONDEMNED is set. The ncec_lock can't be |
| * held across untimeout though passing invalid timeout |
| * id is fine. |
| */ |
| if (ncec->ncec_timeout_id != 0) { |
| (void) untimeout(ncec->ncec_timeout_id); |
| ncec->ncec_timeout_id = 0; |
| } |
| /* Removed from ncec_ptpn/ncec_next list */ |
| ncec_refrele_notr(ncec); |
| ncec = ncec_next; |
| } |
| } |
| |
| /* |
| * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. |
| */ |
| boolean_t |
| nce_restart_dad(ncec_t *ncec) |
| { |
| boolean_t started; |
| ill_t *ill, *hwaddr_ill; |
| |
| if (ncec == NULL) |
| return (B_FALSE); |
| ill = ncec->ncec_ill; |
| mutex_enter(&ncec->ncec_lock); |
| if (ncec->ncec_state == ND_PROBE) { |
| mutex_exit(&ncec->ncec_lock); |
| started = B_TRUE; |
| } else if (ncec->ncec_state == ND_REACHABLE) { |
| ASSERT(ncec->ncec_lladdr != NULL); |
| ncec->ncec_state = ND_PROBE; |
| ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; |
| /* |
| * Slight cheat here: we don't use the initial probe delay |
| * for IPv4 in this obscure case. |
| */ |
| mutex_exit(&ncec->ncec_lock); |
| if (IS_IPMP(ill)) { |
| hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp, |
| ncec->ncec_lladdr, ncec->ncec_lladdr_length); |
| } else { |
| hwaddr_ill = ill; |
| } |
| nce_dad(ncec, hwaddr_ill, B_TRUE); |
| started = B_TRUE; |
| } else { |
| mutex_exit(&ncec->ncec_lock); |
| started = B_FALSE; |
| } |
| return (started); |
| } |
| |
| /* |
| * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed. |
| * If one is found, the refcnt on the ncec will be incremented. |
| */ |
| ncec_t * |
| ncec_lookup_illgrp_v6(ill_t *ill, const in6_addr_t *addr) |
| { |
| ncec_t *ncec; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| |
| /* Get head of v6 hash table */ |
| ncec = *((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr)); |
| ncec = ncec_lookup_illgrp(ill, addr, ncec); |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| rw_exit(&ipst->ips_ill_g_lock); |
| return (ncec); |
| } |
| /* |
| * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed. |
| * If one is found, the refcnt on the ncec will be incremented. |
| */ |
| ncec_t * |
| ncec_lookup_illgrp_v4(ill_t *ill, const in_addr_t *addr) |
| { |
| ncec_t *ncec = NULL; |
| in6_addr_t addr6; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| mutex_enter(&ipst->ips_ndp4->ndp_g_lock); |
| |
| /* Get head of v4 hash table */ |
| ncec = *((ncec_t **)NCE_HASH_PTR_V4(ipst, *addr)); |
| IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); |
| ncec = ncec_lookup_illgrp(ill, &addr6, ncec); |
| mutex_exit(&ipst->ips_ndp4->ndp_g_lock); |
| rw_exit(&ipst->ips_ill_g_lock); |
| return (ncec); |
| } |
| |
| /* |
| * Cache entry lookup. Try to find an ncec matching the parameters passed. |
| * If an ncec is found, increment the hold count on that ncec. |
| * The caller passes in the start of the appropriate hash table, and must |
| * be holding the appropriate global lock (ndp_g_lock). In addition, since |
| * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock |
| * must be held as reader. |
| * |
| * This function always matches across the ipmp group. |
| */ |
| ncec_t * |
| ncec_lookup_illgrp(ill_t *ill, const in6_addr_t *addr, ncec_t *ncec) |
| { |
| ndp_g_t *ndp; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| if (ill->ill_isv6) |
| ndp = ipst->ips_ndp6; |
| else |
| ndp = ipst->ips_ndp4; |
| |
| ASSERT(ill != NULL); |
| ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); |
| if (IN6_IS_ADDR_UNSPECIFIED(addr)) |
| return (NULL); |
| for (; ncec != NULL; ncec = ncec->ncec_next) { |
| if (ncec->ncec_ill == ill || |
| IS_IN_SAME_ILLGRP(ill, ncec->ncec_ill)) { |
| if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) { |
| mutex_enter(&ncec->ncec_lock); |
| if (!NCE_ISCONDEMNED(ncec)) { |
| ncec_refhold_locked(ncec); |
| mutex_exit(&ncec->ncec_lock); |
| break; |
| } |
| mutex_exit(&ncec->ncec_lock); |
| } |
| } |
| } |
| return (ncec); |
| } |
| |
| /* |
| * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t |
| * entries for ill only, i.e., when ill is part of an ipmp group, |
| * nce_lookup_v4 will never try to match across the group. |
| */ |
| nce_t * |
| nce_lookup_v4(ill_t *ill, const in_addr_t *addr) |
| { |
| nce_t *nce; |
| in6_addr_t addr6; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| mutex_enter(&ipst->ips_ndp4->ndp_g_lock); |
| IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); |
| nce = nce_lookup_addr(ill, &addr6); |
| mutex_exit(&ipst->ips_ndp4->ndp_g_lock); |
| return (nce); |
| } |
| |
| /* |
| * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t |
| * entries for ill only, i.e., when ill is part of an ipmp group, |
| * nce_lookup_v6 will never try to match across the group. |
| */ |
| nce_t * |
| nce_lookup_v6(ill_t *ill, const in6_addr_t *addr6) |
| { |
| nce_t *nce; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| nce = nce_lookup_addr(ill, addr6); |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| return (nce); |
| } |
| |
| static nce_t * |
| nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) |
| { |
| nce_t *nce; |
| |
| ASSERT(ill != NULL); |
| #ifdef DEBUG |
| if (ill->ill_isv6) |
| ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock)); |
| else |
| ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock)); |
| #endif |
| mutex_enter(&ill->ill_lock); |
| nce = nce_lookup(ill, addr); |
| mutex_exit(&ill->ill_lock); |
| return (nce); |
| } |
| |
| |
| /* |
| * Router turned to host. We need to make sure that cached copies of the ncec |
| * are not used for forwarding packets if they were derived from the default |
| * route, and that the default route itself is removed, as required by |
| * section 7.2.5 of RFC 2461. |
| * |
| * Note that the ncec itself probably has valid link-layer information for the |
| * nexthop, so that there is no reason to delete the ncec, as long as the |
| * ISROUTER flag is turned off. |
| */ |
| static void |
| ncec_router_to_host(ncec_t *ncec) |
| { |
| ire_t *ire; |
| ip_stack_t *ipst = ncec->ncec_ipst; |
| |
| mutex_enter(&ncec->ncec_lock); |
| ncec->ncec_flags &= ~NCE_F_ISROUTER; |
| mutex_exit(&ncec->ncec_lock); |
| |
| ire = ire_ftable_lookup_v6(&ipv6_all_zeros, &ipv6_all_zeros, |
| &ncec->ncec_addr, IRE_DEFAULT, ncec->ncec_ill, ALL_ZONES, NULL, |
| MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW, 0, ipst, NULL); |
| if (ire != NULL) { |
| ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); |
| ire_delete(ire); |
| ire_refrele(ire); |
| } |
| } |
| |
| /* |
| * Process passed in parameters either from an incoming packet or via |
| * user ioctl. |
| */ |
| void |
| nce_process(ncec_t *ncec, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) |
| { |
| ill_t *ill = ncec->ncec_ill; |
| uint32_t hw_addr_len = ill->ill_phys_addr_length; |
| boolean_t ll_updated = B_FALSE; |
| boolean_t ll_changed; |
| nce_t *nce; |
| |
| ASSERT(ncec->ncec_ipversion == IPV6_VERSION); |
| /* |
| * No updates of link layer address or the neighbor state is |
| * allowed, when the cache is in NONUD state. This still |
| * allows for responding to reachability solicitation. |
| */ |
| mutex_enter(&ncec->ncec_lock); |
| if (ncec->ncec_state == ND_INCOMPLETE) { |
| if (hw_addr == NULL) { |
| mutex_exit(&ncec->ncec_lock); |
| return; |
| } |
| nce_set_ll(ncec, hw_addr); |
| /* |
| * Update ncec state and send the queued packets |
| * back to ip this time ire will be added. |
| */ |
| if (flag & ND_NA_FLAG_SOLICITED) { |
| nce_update(ncec, ND_REACHABLE, NULL); |
| } else { |
| nce_update(ncec, ND_STALE, NULL); |
| } |
| mutex_exit(&ncec->ncec_lock); |
| nce = nce_fastpath(ncec, B_TRUE, NULL); |
| nce_resolv_ok(ncec); |
| if (nce != NULL) |
| nce_refrele(nce); |
| return; |
| } |
| ll_changed = nce_cmp_ll_addr(ncec, hw_addr, hw_addr_len); |
| if (!is_adv) { |
| /* If this is a SOLICITATION request only */ |
| if (ll_changed) |
| nce_update(ncec, ND_STALE, hw_addr); |
| mutex_exit(&ncec->ncec_lock); |
| ncec_cb_dispatch(ncec); |
| return; |
| } |
| if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { |
| /* If in any other state than REACHABLE, ignore */ |
| if (ncec->ncec_state == ND_REACHABLE) { |
| nce_update(ncec, ND_STALE, NULL); |
| } |
| mutex_exit(&ncec->ncec_lock); |
| ncec_cb_dispatch(ncec); |
| return; |
| } else { |
| if (ll_changed) { |
| nce_update(ncec, ND_UNCHANGED, hw_addr); |
| ll_updated = B_TRUE; |
| } |
| if (flag & ND_NA_FLAG_SOLICITED) { |
| nce_update(ncec, ND_REACHABLE, NULL); |
| } else { |
| if (ll_updated) { |
| nce_update(ncec, ND_STALE, NULL); |
| } |
| } |
| mutex_exit(&ncec->ncec_lock); |
| if (!(flag & ND_NA_FLAG_ROUTER) && (ncec->ncec_flags & |
| NCE_F_ISROUTER)) { |
| ncec_router_to_host(ncec); |
| } else { |
| ncec_cb_dispatch(ncec); |
| } |
| } |
| } |
| |
| /* |
| * Pass arg1 to the cbf supplied, along with each ncec in existence. |
| * ncec_walk() places a REFHOLD on the ncec and drops the lock when |
| * walking the hash list. |
| */ |
| void |
| ncec_walk_common(ndp_g_t *ndp, ill_t *ill, ncec_walk_cb_t cbf, |
| void *arg1, boolean_t trace) |
| { |
| ncec_t *ncec; |
| ncec_t *ncec1; |
| ncec_t **ncep; |
| ncec_t *free_nce_list = NULL; |
| |
| mutex_enter(&ndp->ndp_g_lock); |
| /* Prevent ncec_delete from unlink and free of NCE */ |
| ndp->ndp_g_walker++; |
| mutex_exit(&ndp->ndp_g_lock); |
| for (ncep = ndp->nce_hash_tbl; |
| ncep < A_END(ndp->nce_hash_tbl); ncep++) { |
| for (ncec = *ncep; ncec != NULL; ncec = ncec1) { |
| ncec1 = ncec->ncec_next; |
| if (ill == NULL || ncec->ncec_ill == ill) { |
| if (trace) { |
| ncec_refhold(ncec); |
| (*cbf)(ncec, arg1); |
| ncec_refrele(ncec); |
| } else { |
| ncec_refhold_notr(ncec); |
| (*cbf)(ncec, arg1); |
| ncec_refrele_notr(ncec); |
| } |
| } |
| } |
| } |
| mutex_enter(&ndp->ndp_g_lock); |
| ndp->ndp_g_walker--; |
| if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { |
| /* Time to delete condemned entries */ |
| for (ncep = ndp->nce_hash_tbl; |
| ncep < A_END(ndp->nce_hash_tbl); ncep++) { |
| ncec = *ncep; |
| if (ncec != NULL) { |
| nce_remove(ndp, ncec, &free_nce_list); |
| } |
| } |
| ndp->ndp_g_walker_cleanup = B_FALSE; |
| } |
| |
| mutex_exit(&ndp->ndp_g_lock); |
| |
| if (free_nce_list != NULL) { |
| nce_cleanup_list(free_nce_list); |
| } |
| } |
| |
| /* |
| * Walk everything. |
| * Note that ill can be NULL hence can't derive the ipst from it. |
| */ |
| void |
| ncec_walk(ill_t *ill, ncec_walk_cb_t cbf, void *arg1, ip_stack_t *ipst) |
| { |
| ncec_walk_common(ipst->ips_ndp4, ill, cbf, arg1, B_TRUE); |
| ncec_walk_common(ipst->ips_ndp6, ill, cbf, arg1, B_TRUE); |
| } |
| |
| /* |
| * For each interface an entry is added for the unspecified multicast group. |
| * Here that mapping is used to form the multicast cache entry for a particular |
| * multicast destination. |
| */ |
| static int |
| nce_set_multicast_v6(ill_t *ill, const in6_addr_t *dst, |
| uint16_t flags, nce_t **newnce) |
| { |
| uchar_t *hw_addr; |
| int err = 0; |
| ip_stack_t *ipst = ill->ill_ipst; |
| nce_t *nce; |
| |
| ASSERT(ill != NULL); |
| ASSERT(ill->ill_isv6); |
| ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); |
| |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| nce = nce_lookup_addr(ill, dst); |
| if (nce != NULL) { |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| goto done; |
| } |
| if (ill->ill_net_type == IRE_IF_RESOLVER) { |
| /* |
| * For IRE_IF_RESOLVER a hardware mapping can be |
| * generated. |
| */ |
| hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); |
| if (hw_addr == NULL) { |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| return (ENOMEM); |
| } |
| ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr); |
| } else { |
| /* No hw_addr is needed for IRE_IF_NORESOLVER. */ |
| hw_addr = NULL; |
| } |
| ASSERT((flags & NCE_F_MCAST) != 0); |
| ASSERT((flags & NCE_F_NONUD) != 0); |
| /* nce_state will be computed by nce_add_common() */ |
| err = nce_add_v6(ill, hw_addr, ill->ill_phys_addr_length, dst, flags, |
| ND_UNCHANGED, &nce); |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| if (err == 0) |
| err = nce_add_v6_postprocess(nce); |
| if (hw_addr != NULL) |
| kmem_free(hw_addr, ill->ill_nd_lla_len); |
| if (err != 0) { |
| ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err)); |
| return (err); |
| } |
| done: |
| ASSERT(nce->nce_common->ncec_state == ND_REACHABLE); |
| if (newnce != NULL) |
| *newnce = nce; |
| else |
| nce_refrele(nce); |
| return (0); |
| } |
| |
| /* |
| * Return the link layer address, and any flags of a ncec. |
| */ |
| int |
| ndp_query(ill_t *ill, struct lif_nd_req *lnr) |
| { |
| ncec_t *ncec; |
| in6_addr_t *addr; |
| sin6_t *sin6; |
| |
| ASSERT(ill != NULL && ill->ill_isv6); |
| sin6 = (sin6_t *)&lnr->lnr_addr; |
| addr = &sin6->sin6_addr; |
| |
| /* |
| * NOTE: if the ill is an IPMP interface, then match against the whole |
| * illgrp. This e.g. allows in.ndpd to retrieve the link layer |
| * addresses for the data addresses on an IPMP interface even though |
| * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill. |
| */ |
| ncec = ncec_lookup_illgrp_v6(ill, addr); |
| if (ncec == NULL) |
| return (ESRCH); |
| /* If no link layer address is available yet, return ESRCH */ |
| if (!NCE_ISREACHABLE(ncec)) { |
| ncec_refrele(ncec); |
| return (ESRCH); |
| } |
| lnr->lnr_hdw_len = ill->ill_phys_addr_length; |
| bcopy(ncec->ncec_lladdr, (uchar_t *)&lnr->lnr_hdw_addr, |
| lnr->lnr_hdw_len); |
| if (ncec->ncec_flags & NCE_F_ISROUTER) |
| lnr->lnr_flags = NDF_ISROUTER_ON; |
| if (ncec->ncec_flags & NCE_F_ANYCAST) |
| lnr->lnr_flags |= NDF_ANYCAST_ON; |
| if (ncec->ncec_flags & NCE_F_STATIC) |
| lnr->lnr_flags |= NDF_STATIC; |
| ncec_refrele(ncec); |
| return (0); |
| } |
| |
| /* |
| * Finish setting up the Enable/Disable multicast for the driver. |
| */ |
| mblk_t * |
| ndp_mcastreq(ill_t *ill, const in6_addr_t *v6group, uint32_t hw_addr_len, |
| uint32_t hw_addr_offset, mblk_t *mp) |
| { |
| uchar_t *hw_addr; |
| ipaddr_t v4group; |
| uchar_t *addr; |
| |
| ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); |
| if (IN6_IS_ADDR_V4MAPPED(v6group)) { |
| IN6_V4MAPPED_TO_IPADDR(v6group, v4group); |
| |
| ASSERT(CLASSD(v4group)); |
| ASSERT(!(ill->ill_isv6)); |
| |
| addr = (uchar_t *)&v4group; |
| } else { |
| ASSERT(IN6_IS_ADDR_MULTICAST(v6group)); |
| ASSERT(ill->ill_isv6); |
| |
| addr = (uchar_t *)v6group; |
| } |
| hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); |
| if (hw_addr == NULL) { |
| ip0dbg(("ndp_mcastreq NULL hw_addr\n")); |
| freemsg(mp); |
| return (NULL); |
| } |
| |
| ip_mcast_mapping(ill, addr, hw_addr); |
| return (mp); |
| } |
| |
| void |
| ip_ndp_resolve(ncec_t *ncec) |
| { |
| in_addr_t sender4 = INADDR_ANY; |
| in6_addr_t sender6 = ipv6_all_zeros; |
| ill_t *src_ill; |
| uint32_t ms; |
| |
| src_ill = nce_resolve_src(ncec, &sender6); |
| if (src_ill == NULL) { |
| /* Make sure we try again later */ |
| ms = ncec->ncec_ill->ill_reachable_retrans_time; |
| nce_restart_timer(ncec, (clock_t)ms); |
| return; |
| } |
| if (ncec->ncec_ipversion == IPV4_VERSION) |
| IN6_V4MAPPED_TO_IPADDR(&sender6, sender4); |
| mutex_enter(&ncec->ncec_lock); |
| if (ncec->ncec_ipversion == IPV6_VERSION) |
| ms = ndp_solicit(ncec, sender6, src_ill); |
| else |
| ms = arp_request(ncec, sender4, src_ill); |
| mutex_exit(&ncec->ncec_lock); |
| if (ms == 0) { |
| if (ncec->ncec_state != ND_REACHABLE) { |
| if (ncec->ncec_ipversion == IPV6_VERSION) |
| ndp_resolv_failed(ncec); |
| else |
| arp_resolv_failed(ncec); |
| ASSERT((ncec->ncec_flags & NCE_F_STATIC) == 0); |
| nce_make_unreachable(ncec); |
| ncec_delete(ncec); |
| } |
| } else { |
| nce_restart_timer(ncec, (clock_t)ms); |
| } |
| done: |
| ill_refrele(src_ill); |
| } |
| |
| /* |
| * Send an IPv6 neighbor solicitation. |
| * Returns number of milliseconds after which we should either rexmit or abort. |
| * Return of zero means we should abort. |
| * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt. |
| * The optional source address is used as a hint to ndp_solicit for |
| * which source to use in the packet. |
| * |
| * NOTE: This routine drops ncec_lock (and later reacquires it) when sending |
| * the packet. |
| */ |
| uint32_t |
| ndp_solicit(ncec_t *ncec, in6_addr_t src, ill_t *ill) |
| { |
| in6_addr_t dst; |
| boolean_t dropped = B_FALSE; |
| |
| ASSERT(ncec->ncec_ipversion == IPV6_VERSION); |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| |
| if (ncec->ncec_rcnt == 0) |
| return (0); |
| |
| dst = ncec->ncec_addr; |
| ncec->ncec_rcnt--; |
| mutex_exit(&ncec->ncec_lock); |
| dropped = ndp_xmit(ill, ND_NEIGHBOR_SOLICIT, ill->ill_phys_addr, |
| ill->ill_phys_addr_length, &src, &dst, 0); |
| mutex_enter(&ncec->ncec_lock); |
| if (dropped) |
| ncec->ncec_rcnt++; |
| return (ncec->ncec_ill->ill_reachable_retrans_time); |
| } |
| |
| /* |
| * Attempt to recover an address on an interface that's been marked as a |
| * duplicate. Because NCEs are destroyed when the interface goes down, there's |
| * no easy way to just probe the address and have the right thing happen if |
| * it's no longer in use. Instead, we just bring it up normally and allow the |
| * regular interface start-up logic to probe for a remaining duplicate and take |
| * us back down if necessary. |
| * Neither DHCP nor temporary addresses arrive here; they're excluded by |
| * ip_ndp_excl. |
| */ |
| /* ARGSUSED */ |
| void |
| ip_addr_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) |
| { |
| ill_t *ill = rq->q_ptr; |
| ipif_t *ipif; |
| in6_addr_t *addr6 = (in6_addr_t *)mp->b_rptr; |
| in_addr_t *addr4 = (in_addr_t *)mp->b_rptr; |
| boolean_t addr_equal; |
| |
| for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { |
| /* |
| * We do not support recovery of proxy ARP'd interfaces, |
| * because the system lacks a complete proxy ARP mechanism. |
| */ |
| if (ill->ill_isv6) { |
| addr_equal = IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, |
| addr6); |
| } else { |
| addr_equal = (ipif->ipif_lcl_addr == *addr4); |
| } |
| |
| if ((ipif->ipif_flags & IPIF_POINTOPOINT) || !addr_equal) |
| continue; |
| |
| /* |
| * If we have already recovered or if the interface is going |
| * away, then ignore. |
| */ |
| mutex_enter(&ill->ill_lock); |
| if (!(ipif->ipif_flags & IPIF_DUPLICATE) || |
| (ipif->ipif_state_flags & IPIF_CONDEMNED)) { |
| mutex_exit(&ill->ill_lock); |
| continue; |
| } |
| |
| ipif->ipif_flags &= ~IPIF_DUPLICATE; |
| ill->ill_ipif_dup_count--; |
| mutex_exit(&ill->ill_lock); |
| ipif->ipif_was_dup = B_TRUE; |
| |
| if (ill->ill_isv6) { |
| VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); |
| (void) ipif_up_done_v6(ipif); |
| } else { |
| VERIFY(ipif_arp_up(ipif, Res_act_initial, B_TRUE) != |
| EINPROGRESS); |
| (void) ipif_up_done(ipif); |
| } |
| } |
| freeb(mp); |
| } |
| |
| /* |
| * Attempt to recover an IPv6 interface that's been shut down as a duplicate. |
| * As long as someone else holds the address, the interface will stay down. |
| * When that conflict goes away, the interface is brought back up. This is |
| * done so that accidental shutdowns of addresses aren't made permanent. Your |
| * server will recover from a failure. |
| * |
| * For DHCP and temporary addresses, recovery is not done in the kernel. |
| * Instead, it's handled by user space processes (dhcpagent and in.ndpd). |
| * |
| * This function is entered on a timer expiry; the ID is in ipif_recovery_id. |
| */ |
| void |
| ipif_dup_recovery(void *arg) |
| { |
| ipif_t *ipif = arg; |
| |
| ipif->ipif_recovery_id = 0; |
| if (!(ipif->ipif_flags & IPIF_DUPLICATE)) |
| return; |
| |
| /* |
| * No lock, because this is just an optimization. |
| */ |
| if (ipif->ipif_state_flags & IPIF_CONDEMNED) |
| return; |
| |
| /* If the link is down, we'll retry this later */ |
| if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) |
| return; |
| |
| ipif_do_recovery(ipif); |
| } |
| |
| /* |
| * Perform interface recovery by forcing the duplicate interfaces up and |
| * allowing the system to determine which ones should stay up. |
| * |
| * Called both by recovery timer expiry and link-up notification. |
| */ |
| void |
| ipif_do_recovery(ipif_t *ipif) |
| { |
| ill_t *ill = ipif->ipif_ill; |
| mblk_t *mp; |
| ip_stack_t *ipst = ill->ill_ipst; |
| size_t mp_size; |
| |
| if (ipif->ipif_isv6) |
| mp_size = sizeof (ipif->ipif_v6lcl_addr); |
| else |
| mp_size = sizeof (ipif->ipif_lcl_addr); |
| mp = allocb(mp_size, BPRI_MED); |
| if (mp == NULL) { |
| mutex_enter(&ill->ill_lock); |
| if (ipst->ips_ip_dup_recovery > 0 && |
| ipif->ipif_recovery_id == 0 && |
| !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { |
| ipif->ipif_recovery_id = timeout(ipif_dup_recovery, |
| ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); |
| } |
| mutex_exit(&ill->ill_lock); |
| } else { |
| /* |
| * A recovery timer may still be running if we got here from |
| * ill_restart_dad(); cancel that timer. |
| */ |
| if (ipif->ipif_recovery_id != 0) |
| (void) untimeout(ipif->ipif_recovery_id); |
| ipif->ipif_recovery_id = 0; |
| |
| if (ipif->ipif_isv6) { |
| bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, |
| sizeof (ipif->ipif_v6lcl_addr)); |
| } else { |
| bcopy(&ipif->ipif_lcl_addr, mp->b_rptr, |
| sizeof (ipif->ipif_lcl_addr)); |
| } |
| ill_refhold(ill); |
| qwriter_ip(ill, ill->ill_rq, mp, ip_addr_recover, NEW_OP, |
| B_FALSE); |
| } |
| } |
| |
| /* |
| * Find the MAC and IP addresses in an NA/NS message. |
| */ |
| static void |
| ip_ndp_find_addresses(mblk_t *mp, ip_recv_attr_t *ira, ill_t *ill, |
| in6_addr_t *targp, uchar_t **haddr, uint_t *haddrlenp) |
| { |
| icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; |
| uchar_t *addr; |
| int alen; |
| |
| /* icmp_inbound_v6 ensures this */ |
| ASSERT(ira->ira_flags & IRAF_L2SRC_SET); |
| |
| addr = ira->ira_l2src; |
| alen = ill->ill_phys_addr_length; |
| if (alen > 0) { |
| *haddr = addr; |
| *haddrlenp = alen; |
| } else { |
| *haddr = NULL; |
| *haddrlenp = 0; |
| } |
| |
| /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ |
| *targp = ns->nd_ns_target; |
| } |
| |
| /* |
| * This is for exclusive changes due to NDP duplicate address detection |
| * failure. |
| */ |
| /* ARGSUSED */ |
| static void |
| ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) |
| { |
| ill_t *ill = rq->q_ptr; |
| ipif_t *ipif; |
| uchar_t *haddr; |
| uint_t haddrlen; |
| ip_stack_t *ipst = ill->ill_ipst; |
| in6_addr_t targ; |
| ip_recv_attr_t iras; |
| mblk_t *attrmp; |
| |
| attrmp = mp; |
| mp = mp->b_cont; |
| attrmp->b_cont = NULL; |
| if (!ip_recv_attr_from_mblk(attrmp, &iras)) { |
| /* The ill or ip_stack_t disappeared on us */ |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| ip_drop_input("ip_recv_attr_from_mblk", mp, ill); |
| freemsg(mp); |
| ira_cleanup(&iras, B_TRUE); |
| return; |
| } |
| |
| ASSERT(ill == iras.ira_rill); |
| |
| ip_ndp_find_addresses(mp, &iras, ill, &targ, &haddr, &haddrlen); |
| if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { |
| /* |
| * Ignore conflicts generated by misbehaving switches that |
| * just reflect our own messages back to us. For IPMP, we may |
| * see reflections across any ill in the illgrp. |
| * |
| * RFC2462 and revisions tried to detect both the case |
| * when a statically configured IPv6 address is a duplicate, |
| * and the case when the L2 address itself is a duplicate. The |
| * later is important because, with stateles address autoconf, |
| * if the L2 address is a duplicate, the resulting IPv6 |
| * address(es) would also be duplicates. We rely on DAD of the |
| * IPv6 address itself to detect the latter case. |
| */ |
| /* For an under ill_grp can change under lock */ |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || |
| IS_UNDER_IPMP(ill) && |
| ipmp_illgrp_find_ill(ill->ill_grp, haddr, |
| haddrlen) != NULL) { |
| rw_exit(&ipst->ips_ill_g_lock); |
| goto ignore_conflict; |
| } |
| rw_exit(&ipst->ips_ill_g_lock); |
| } |
| |
| /* |
| * Look up the appropriate ipif. |
| */ |
| ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, ipst); |
| if (ipif == NULL) |
| goto ignore_conflict; |
| |
| /* Reload the ill to match the ipif */ |
| ill = ipif->ipif_ill; |
| |
| /* If it's already duplicate or ineligible, then don't do anything. */ |
| if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { |
| ipif_refrele(ipif); |
| goto ignore_conflict; |
| } |
| |
| /* |
| * If this is a failure during duplicate recovery, then don't |
| * complain. It may take a long time to recover. |
| */ |
| if (!ipif->ipif_was_dup) { |
| char ibuf[LIFNAMSIZ]; |
| char hbuf[MAC_STR_LEN]; |
| char sbuf[INET6_ADDRSTRLEN]; |
| |
| ipif_get_name(ipif, ibuf, sizeof (ibuf)); |
| cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" |
| " disabled", ibuf, |
| inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), |
| mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); |
| } |
| mutex_enter(&ill->ill_lock); |
| ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); |
| ipif->ipif_flags |= IPIF_DUPLICATE; |
| ill->ill_ipif_dup_count++; |
| mutex_exit(&ill->ill_lock); |
| (void) ipif_down(ipif, NULL, NULL); |
| (void) ipif_down_tail(ipif); |
| mutex_enter(&ill->ill_lock); |
| if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && |
| ill->ill_net_type == IRE_IF_RESOLVER && |
| !(ipif->ipif_state_flags & IPIF_CONDEMNED) && |
| ipst->ips_ip_dup_recovery > 0) { |
| ASSERT(ipif->ipif_recovery_id == 0); |
| ipif->ipif_recovery_id = timeout(ipif_dup_recovery, |
| ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); |
| } |
| mutex_exit(&ill->ill_lock); |
| ipif_refrele(ipif); |
| |
| ignore_conflict: |
| freemsg(mp); |
| ira_cleanup(&iras, B_TRUE); |
| } |
| |
| /* |
| * Handle failure by tearing down the ipifs with the specified address. Note |
| * that tearing down the ipif also means deleting the ncec through ipif_down, so |
| * it's not possible to do recovery by just restarting the ncec timer. Instead, |
| * we start a timer on the ipif. |
| * Caller has to free mp; |
| */ |
| static void |
| ndp_failure(mblk_t *mp, ip_recv_attr_t *ira) |
| { |
| const uchar_t *haddr; |
| ill_t *ill = ira->ira_rill; |
| |
| /* |
| * Ignore conflicts generated by misbehaving switches that just |
| * reflect our own messages back to us. |
| */ |
| |
| /* icmp_inbound_v6 ensures this */ |
| ASSERT(ira->ira_flags & IRAF_L2SRC_SET); |
| haddr = ira->ira_l2src; |
| if (haddr != NULL && |
| bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { |
| return; |
| } |
| |
| if ((mp = copymsg(mp)) != NULL) { |
| mblk_t *attrmp; |
| |
| attrmp = ip_recv_attr_to_mblk(ira); |
| if (attrmp == NULL) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| ip_drop_input("ipIfStatsInDiscards", mp, ill); |
| freemsg(mp); |
| } else { |
| ASSERT(attrmp->b_cont == NULL); |
| attrmp->b_cont = mp; |
| mp = attrmp; |
| ill_refhold(ill); |
| qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_excl, NEW_OP, |
| B_FALSE); |
| } |
| } |
| } |
| |
| /* |
| * Handle a discovered conflict: some other system is advertising that it owns |
| * one of our IP addresses. We need to defend ourselves, or just shut down the |
| * interface. |
| * |
| * Handles both IPv4 and IPv6 |
| */ |
| boolean_t |
| ip_nce_conflict(mblk_t *mp, ip_recv_attr_t *ira, ncec_t *ncec) |
| { |
| ipif_t *ipif; |
| clock_t now; |
| uint_t maxdefense; |
| uint_t defs; |
| ill_t *ill = ira->ira_ill; |
| ip_stack_t *ipst = ill->ill_ipst; |
| uint32_t elapsed; |
| boolean_t isv6 = ill->ill_isv6; |
| ipaddr_t ncec_addr; |
| |
| if (isv6) { |
| ipif = ipif_lookup_addr_v6(&ncec->ncec_addr, ill, ALL_ZONES, |
| ipst); |
| } else { |
| if (arp_no_defense) { |
| /* |
| * Yes, there is a conflict, but no, we do not |
| * defend ourself. |
| */ |
| return (B_TRUE); |
| } |
| IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr); |
| ipif = ipif_lookup_addr(ncec_addr, ill, ALL_ZONES, |
| ipst); |
| } |
| if (ipif == NULL) |
| return (B_FALSE); |
| |
| /* |
| * First, figure out if this address is disposable. |
| */ |
| if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) |
| maxdefense = ipst->ips_ip_max_temp_defend; |
| else |
| maxdefense = ipst->ips_ip_max_defend; |
| |
| /* |
| * Now figure out how many times we've defended ourselves. Ignore |
| * defenses that happened long in the past. |
| */ |
| now = ddi_get_lbolt(); |
| elapsed = (drv_hztousec(now - ncec->ncec_last_time_defended))/1000000; |
| mutex_enter(&ncec->ncec_lock); |
| if ((defs = ncec->ncec_defense_count) > 0 && |
| elapsed > ipst->ips_ip_defend_interval) { |
| /* |
| * ip_defend_interval has elapsed. |
| * reset the defense count. |
| */ |
| ncec->ncec_defense_count = defs = 0; |
| } |
| ncec->ncec_defense_count++; |
| ncec->ncec_last_time_defended = now; |
| mutex_exit(&ncec->ncec_lock); |
| ipif_refrele(ipif); |
| |
| /* |
| * If we've defended ourselves too many times already, then give up and |
| * tear down the interface(s) using this address. |
| * Otherwise, caller has to defend by sending out an announce. |
| */ |
| if (defs >= maxdefense) { |
| if (isv6) |
| ndp_failure(mp, ira); |
| else |
| arp_failure(mp, ira); |
| } else { |
| return (B_TRUE); /* caller must defend this address */ |
| } |
| return (B_FALSE); |
| } |
| |
| /* |
| * Handle reception of Neighbor Solicitation messages. |
| */ |
| static void |
| ndp_input_solicit(mblk_t *mp, ip_recv_attr_t *ira) |
| { |
| ill_t *ill = ira->ira_ill, *under_ill; |
| nd_neighbor_solicit_t *ns; |
| uint32_t hlen = ill->ill_phys_addr_length; |
| uchar_t *haddr = NULL; |
| icmp6_t *icmp_nd; |
| ip6_t *ip6h; |
| ncec_t *our_ncec = NULL; |
| in6_addr_t target; |
| in6_addr_t src; |
| int len; |
| int flag = 0; |
| nd_opt_hdr_t *opt = NULL; |
| boolean_t bad_solicit = B_FALSE; |
| mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
| boolean_t need_ill_refrele = B_FALSE; |
| |
| ip6h = (ip6_t *)mp->b_rptr; |
| icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
| src = ip6h->ip6_src; |
| ns = (nd_neighbor_solicit_t *)icmp_nd; |
| target = ns->nd_ns_target; |
| if (IN6_IS_ADDR_MULTICAST(&target) || IN6_IS_ADDR_V4MAPPED(&target) || |
| IN6_IS_ADDR_LOOPBACK(&target)) { |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("ndp_input_solicit: Martian Target %s\n", |
| AF_INET6, &target); |
| } |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| if (len > sizeof (nd_neighbor_solicit_t)) { |
| /* Options present */ |
| opt = (nd_opt_hdr_t *)&ns[1]; |
| len -= sizeof (nd_neighbor_solicit_t); |
| if (!ndp_verify_optlen(opt, len)) { |
| ip1dbg(("ndp_input_solicit: Bad opt len\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| } |
| if (IN6_IS_ADDR_UNSPECIFIED(&src)) { |
| /* Check to see if this is a valid DAD solicitation */ |
| if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("ndp_input_solicit: IPv6 " |
| "Destination is not solicited node " |
| "multicast %s\n", AF_INET6, |
| &ip6h->ip6_dst); |
| } |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| } |
| |
| /* |
| * NOTE: with IPMP, it's possible the nominated multicast ill (which |
| * received this packet if it's multicast) is not the ill tied to |
| * e.g. the IPMP ill's data link-local. So we match across the illgrp |
| * to ensure we find the associated NCE. |
| */ |
| our_ncec = ncec_lookup_illgrp_v6(ill, &target); |
| /* |
| * If this is a valid Solicitation for an address we are publishing, |
| * then a PUBLISH entry should exist in the cache |
| */ |
| if (our_ncec == NULL || !NCE_PUBLISH(our_ncec)) { |
| ip1dbg(("ndp_input_solicit: Wrong target in NS?!" |
| "ifname=%s ", ill->ill_name)); |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg(" dst %s\n", AF_INET6, &target); |
| } |
| if (our_ncec == NULL) |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| |
| /* At this point we should have a verified NS per spec */ |
| if (opt != NULL) { |
| opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); |
| if (opt != NULL) { |
| haddr = (uchar_t *)&opt[1]; |
| if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || |
| hlen == 0) { |
| ip1dbg(("ndp_input_advert: bad SLLA\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| } |
| } |
| |
| /* If sending directly to peer, set the unicast flag */ |
| if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) |
| flag |= NDP_UNICAST; |
| |
| /* |
| * Create/update the entry for the soliciting node on the ipmp_ill. |
| * or respond to outstanding queries, don't if |
| * the source is unspecified address. |
| */ |
| if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { |
| int err; |
| nce_t *nnce; |
| |
| ASSERT(ill->ill_isv6); |
| /* |
| * Regular solicitations *must* include the Source Link-Layer |
| * Address option. Ignore messages that do not. |
| */ |
| if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { |
| ip1dbg(("ndp_input_solicit: source link-layer address " |
| "option missing with a specified source.\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| |
| /* |
| * This is a regular solicitation. If we're still in the |
| * process of verifying the address, then don't respond at all |
| * and don't keep track of the sender. |
| */ |
| if (our_ncec->ncec_state == ND_PROBE) |
| goto done; |
| |
| /* |
| * If the solicitation doesn't have sender hardware address |
| * (legal for unicast solicitation), then process without |
| * installing the return NCE. Either we already know it, or |
| * we'll be forced to look it up when (and if) we reply to the |
| * packet. |
| */ |
| if (haddr == NULL) |
| goto no_source; |
| |
| under_ill = ill; |
| if (IS_UNDER_IPMP(under_ill)) { |
| ill = ipmp_ill_hold_ipmp_ill(under_ill); |
| if (ill == NULL) |
| ill = under_ill; |
| else |
| need_ill_refrele = B_TRUE; |
| } |
| err = nce_lookup_then_add_v6(ill, |
| haddr, hlen, |
| &src, /* Soliciting nodes address */ |
| 0, |
| ND_STALE, |
| &nnce); |
| |
| if (need_ill_refrele) { |
| ill_refrele(ill); |
| ill = under_ill; |
| need_ill_refrele = B_FALSE; |
| } |
| switch (err) { |
| case 0: |
| /* done with this entry */ |
| nce_refrele(nnce); |
| break; |
| case EEXIST: |
| /* |
| * B_FALSE indicates this is not an an advertisement. |
| */ |
| nce_process(nnce->nce_common, haddr, 0, B_FALSE); |
| nce_refrele(nnce); |
| break; |
| default: |
| ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", |
| err)); |
| goto done; |
| } |
| no_source: |
| flag |= NDP_SOLICITED; |
| } else { |
| /* |
| * No source link layer address option should be present in a |
| * valid DAD request. |
| */ |
| if (haddr != NULL) { |
| ip1dbg(("ndp_input_solicit: source link-layer address " |
| "option present with an unspecified source.\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| if (our_ncec->ncec_state == ND_PROBE) { |
| /* |
| * Internally looped-back probes will have |
| * IRAF_L2SRC_LOOPBACK set so we can ignore our own |
| * transmissions. |
| */ |
| if (!(ira->ira_flags & IRAF_L2SRC_LOOPBACK)) { |
| /* |
| * If someone else is probing our address, then |
| * we've crossed wires. Declare failure. |
| */ |
| ndp_failure(mp, ira); |
| } |
| goto done; |
| } |
| /* |
| * This is a DAD probe. Multicast the advertisement to the |
| * all-nodes address. |
| */ |
| src = ipv6_all_hosts_mcast; |
| } |
| flag |= nce_advert_flags(our_ncec); |
| (void) ndp_xmit(ill, |
| ND_NEIGHBOR_ADVERT, |
| our_ncec->ncec_lladdr, |
| our_ncec->ncec_lladdr_length, |
| &target, /* Source and target of the advertisement pkt */ |
| &src, /* IP Destination (source of original pkt) */ |
| flag); |
| done: |
| if (bad_solicit) |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); |
| if (our_ncec != NULL) |
| ncec_refrele(our_ncec); |
| } |
| |
| /* |
| * Handle reception of Neighbor Solicitation messages |
| */ |
| void |
| ndp_input_advert(mblk_t *mp, ip_recv_attr_t *ira) |
| { |
| ill_t *ill = ira->ira_ill; |
| nd_neighbor_advert_t *na; |
| uint32_t hlen = ill->ill_phys_addr_length; |
| uchar_t *haddr = NULL; |
| icmp6_t *icmp_nd; |
| ip6_t *ip6h; |
| ncec_t *dst_ncec = NULL; |
| in6_addr_t target; |
| nd_opt_hdr_t *opt = NULL; |
| int len; |
| ip_stack_t *ipst = ill->ill_ipst; |
| mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
| |
| ip6h = (ip6_t *)mp->b_rptr; |
| icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
| na = (nd_neighbor_advert_t *)icmp_nd; |
| |
| if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && |
| (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { |
| ip1dbg(("ndp_input_advert: Target is multicast but the " |
| "solicited flag is not zero\n")); |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| target = na->nd_na_target; |
| if (IN6_IS_ADDR_MULTICAST(&target) || IN6_IS_ADDR_V4MAPPED(&target) || |
| IN6_IS_ADDR_LOOPBACK(&target)) { |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("ndp_input_solicit: Martian Target %s\n", |
| AF_INET6, &target); |
| } |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| if (len > sizeof (nd_neighbor_advert_t)) { |
| opt = (nd_opt_hdr_t *)&na[1]; |
| if (!ndp_verify_optlen(opt, |
| len - sizeof (nd_neighbor_advert_t))) { |
| ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| /* At this point we have a verified NA per spec */ |
| len -= sizeof (nd_neighbor_advert_t); |
| opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); |
| if (opt != NULL) { |
| haddr = (uchar_t *)&opt[1]; |
| if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || |
| hlen == 0) { |
| ip1dbg(("ndp_input_advert: bad SLLA\n")); |
| BUMP_MIB(mib, |
| ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| } |
| } |
| |
| /* |
| * NOTE: we match across the illgrp since we need to do DAD for all of |
| * our local addresses, and those are spread across all the active |
| * ills in the group. |
| */ |
| if ((dst_ncec = ncec_lookup_illgrp_v6(ill, &target)) == NULL) |
| return; |
| |
| if (NCE_PUBLISH(dst_ncec)) { |
| /* |
| * Someone just advertised an addresses that we publish. First, |
| * check it it was us -- if so, we can safely ignore it. |
| * We don't get the haddr from the ira_l2src because, in the |
| * case that the packet originated from us, on an IPMP group, |
| * the ira_l2src may would be the link-layer address of the |
| * cast_ill used to send the packet, which may not be the same |
| * as the dst_ncec->ncec_lladdr of the address. |
| */ |
| if (haddr != NULL) { |
| if (ira->ira_flags & IRAF_L2SRC_LOOPBACK) |
| goto out; |
| |
| if (!nce_cmp_ll_addr(dst_ncec, haddr, hlen)) |
| goto out; /* from us -- no conflict */ |
| |
| /* |
| * If we're in an IPMP group, check if this is an echo |
| * from another ill in the group. Use the double- |
| * checked locking pattern to avoid grabbing |
| * ill_g_lock in the non-IPMP case. |
| */ |
| if (IS_UNDER_IPMP(ill)) { |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( |
| ill->ill_grp, haddr, hlen) != NULL) { |
| rw_exit(&ipst->ips_ill_g_lock); |
| goto out; |
| } |
| rw_exit(&ipst->ips_ill_g_lock); |
| } |
| } |
| |
| /* |
| * This appears to be a real conflict. If we're trying to |
| * configure this NCE (ND_PROBE), then shut it down. |
| * Otherwise, handle the discovered conflict. |
| */ |
| if (dst_ncec->ncec_state == ND_PROBE) { |
| ndp_failure(mp, ira); |
| } else { |
| if (ip_nce_conflict(mp, ira, dst_ncec)) { |
| char hbuf[MAC_STR_LEN]; |
| char sbuf[INET6_ADDRSTRLEN]; |
| |
| cmn_err(CE_WARN, |
| "node '%s' is using %s on %s", |
| inet_ntop(AF_INET6, &target, sbuf, |
| sizeof (sbuf)), |
| haddr == NULL ? "<none>" : |
| mac_colon_addr(haddr, hlen, hbuf, |
| sizeof (hbuf)), ill->ill_name); |
| /* |
| * RFC 4862, Section 5.4.4 does not mandate |
| * any specific behavior when an NA matches |
| * a non-tentative address assigned to the |
| * receiver. We make the choice of defending |
| * our address, based on the assumption that |
| * the sender has not detected the Duplicate. |
| * |
| * ncec_last_time_defended has been adjusted |
| * in ip_nce_conflict() |
| */ |
| (void) ndp_announce(dst_ncec); |
| } |
| } |
| } else { |
| if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) |
| dst_ncec->ncec_flags |= NCE_F_ISROUTER; |
| |
| /* B_TRUE indicates this an advertisement */ |
| nce_process(dst_ncec, haddr, na->nd_na_flags_reserved, B_TRUE); |
| } |
| out: |
| ncec_refrele(dst_ncec); |
| } |
| |
| /* |
| * Process NDP neighbor solicitation/advertisement messages. |
| * The checksum has already checked o.k before reaching here. |
| * Information about the datalink header is contained in ira_l2src, but |
| * that should be ignored for loopback packets. |
| */ |
| void |
| ndp_input(mblk_t *mp, ip_recv_attr_t *ira) |
| { |
| ill_t *ill = ira->ira_rill; |
| icmp6_t *icmp_nd; |
| ip6_t *ip6h; |
| int len; |
| mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
| ill_t *orig_ill = NULL; |
| |
| /* |
| * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill |
| * and make it be the IPMP upper so avoid being confused by a packet |
| * addressed to a unicast address on a different ill. |
| */ |
| if (IS_UNDER_IPMP(ill)) { |
| orig_ill = ill; |
| ill = ipmp_ill_hold_ipmp_ill(orig_ill); |
| if (ill == NULL) { |
| ill = orig_ill; |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| ip_drop_input("ipIfStatsInDiscards - IPMP ill", |
| mp, ill); |
| freemsg(mp); |
| return; |
| } |
| ASSERT(ill != orig_ill); |
| orig_ill = ira->ira_ill; |
| ira->ira_ill = ill; |
| mib = ill->ill_icmp6_mib; |
| } |
| if (!pullupmsg(mp, -1)) { |
| ip1dbg(("ndp_input: pullupmsg failed\n")); |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp, ill); |
| goto done; |
| } |
| ip6h = (ip6_t *)mp->b_rptr; |
| if (ip6h->ip6_hops != IPV6_MAX_HOPS) { |
| ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); |
| ip_drop_input("ipv6IfIcmpBadHoplimit", mp, ill); |
| BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); |
| goto done; |
| } |
| /* |
| * NDP does not accept any extension headers between the |
| * IP header and the ICMP header since e.g. a routing |
| * header could be dangerous. |
| * This assumes that any AH or ESP headers are removed |
| * by ip prior to passing the packet to ndp_input. |
| */ |
| if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { |
| ip1dbg(("ndp_input: Wrong next header 0x%x\n", |
| ip6h->ip6_nxt)); |
| ip_drop_input("Wrong next header", mp, ill); |
| BUMP_MIB(mib, ipv6IfIcmpInErrors); |
| goto done; |
| } |
| icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || |
| icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); |
| if (icmp_nd->icmp6_code != 0) { |
| ip1dbg(("ndp_input: icmp6 code != 0 \n")); |
| ip_drop_input("code non-zero", mp, ill); |
| BUMP_MIB(mib, ipv6IfIcmpInErrors); |
| goto done; |
| } |
| len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
| /* |
| * Make sure packet length is large enough for either |
| * a NS or a NA icmp packet. |
| */ |
| if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { |
| ip1dbg(("ndp_input: packet too short\n")); |
| ip_drop_input("packet too short", mp, ill); |
| BUMP_MIB(mib, ipv6IfIcmpInErrors); |
| goto done; |
| } |
| if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { |
| ndp_input_solicit(mp, ira); |
| } else { |
| ndp_input_advert(mp, ira); |
| } |
| done: |
| freemsg(mp); |
| if (orig_ill != NULL) { |
| ill_refrele(ill); |
| ira->ira_ill = orig_ill; |
| } |
| } |
| |
| /* |
| * ndp_xmit is called to form and transmit a ND solicitation or |
| * advertisement ICMP packet. |
| * |
| * If the source address is unspecified and this isn't a probe (used for |
| * duplicate address detection), an appropriate source address and link layer |
| * address will be chosen here. The link layer address option is included if |
| * the source is specified (i.e., all non-probe packets), and omitted (per the |
| * specification) otherwise. |
| * |
| * It returns B_FALSE only if it does a successful put() to the |
| * corresponding ill's ill_wq otherwise returns B_TRUE. |
| */ |
| static boolean_t |
| ndp_xmit(ill_t *ill, uint32_t operation, uint8_t *hw_addr, uint_t hw_addr_len, |
| const in6_addr_t *sender, const in6_addr_t *target, int flag) |
| { |
| uint32_t len; |
| icmp6_t *icmp6; |
| mblk_t *mp; |
| ip6_t *ip6h; |
| nd_opt_hdr_t *opt; |
| uint_t plen; |
| zoneid_t zoneid = GLOBAL_ZONEID; |
| ill_t *hwaddr_ill = ill; |
| ip_xmit_attr_t ixas; |
| ip_stack_t *ipst = ill->ill_ipst; |
| boolean_t need_refrele = B_FALSE; |
| boolean_t probe = B_FALSE; |
| |
| if (IS_UNDER_IPMP(ill)) { |
| probe = ipif_lookup_testaddr_v6(ill, sender, NULL); |
| /* |
| * We send non-probe packets on the upper IPMP interface. |
| * ip_output_simple() will use cast_ill for sending any |
| * multicast packets. Note that we can't follow the same |
| * logic for probe packets because all interfaces in the ipmp |
| * group may have failed, so that we really want to only try |
| * to send the ND packet on the ill corresponding to the src |
| * address. |
| */ |
| if (!probe) { |
| ill = ipmp_ill_hold_ipmp_ill(ill); |
| if (ill != NULL) |
| need_refrele = B_TRUE; |
| else |
| ill = hwaddr_ill; |
| } |
| } |
| |
| /* |
| * If we have a unspecified source(sender) address, select a |
| * proper source address for the solicitation here itself so |
| * that we can initialize the h/w address correctly. |
| * |
| * If the sender is specified then we use this address in order |
| * to lookup the zoneid before calling ip_output_v6(). This is to |
| * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly |
| * by IP (we cannot guarantee that the global zone has an interface |
| * route to the destination). |
| * |
| * Note that the NA never comes here with the unspecified source |
| * address. |
| */ |
| |
| /* |
| * Probes will have unspec src at this point. |
| */ |
| if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { |
| zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ipst); |
| /* |
| * It's possible for ipif_lookup_addr_zoneid_v6() to return |
| * ALL_ZONES if it cannot find a matching ipif for the address |
| * we are trying to use. In this case we err on the side of |
| * trying to send the packet by defaulting to the GLOBAL_ZONEID. |
| */ |
| if (zoneid == ALL_ZONES) |
| zoneid = GLOBAL_ZONEID; |
| } |
| |
| plen = (sizeof (nd_opt_hdr_t) + hw_addr_len + 7) / 8; |
| len = IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t) + plen * 8; |
| mp = allocb(len, BPRI_LO); |
| if (mp == NULL) { |
| if (need_refrele) |
| ill_refrele(ill); |
| return (B_TRUE); |
| } |
| |
| bzero((char *)mp->b_rptr, len); |
| mp->b_wptr = mp->b_rptr + len; |
| |
| bzero(&ixas, sizeof (ixas)); |
| ixas.ixa_flags = IXAF_SET_ULP_CKSUM | IXAF_NO_HW_CKSUM; |
| |
| ixas.ixa_ifindex = ill->ill_phyint->phyint_ifindex; |
| ixas.ixa_ipst = ipst; |
| ixas.ixa_cred = kcred; |
| ixas.ixa_cpid = NOPID; |
| ixas.ixa_tsl = NULL; |
| ixas.ixa_zoneid = zoneid; |
| |
| ip6h = (ip6_t *)mp->b_rptr; |
| ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; |
| ip6h->ip6_plen = htons(len - IPV6_HDR_LEN); |
| ip6h->ip6_nxt = IPPROTO_ICMPV6; |
| ip6h->ip6_hops = IPV6_MAX_HOPS; |
| ixas.ixa_multicast_ttl = ip6h->ip6_hops; |
| ip6h->ip6_dst = *target; |
| icmp6 = (icmp6_t *)&ip6h[1]; |
| |
| if (hw_addr_len != 0) { |
| opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + |
| sizeof (nd_neighbor_advert_t)); |
| } else { |
| opt = NULL; |
| } |
| if (operation == ND_NEIGHBOR_SOLICIT) { |
| nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; |
| |
| if (opt != NULL && !(flag & NDP_PROBE)) { |
| /* |
| * Note that we don't send out SLLA for ND probes |
| * per RFC 4862, even though we do send out the src |
| * haddr for IPv4 DAD probes, even though both IPv4 |
| * and IPv6 go out with the unspecified/INADDR_ANY |
| * src IP addr. |
| */ |
| opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; |
| } |
| ip6h->ip6_src = *sender; |
| ns->nd_ns_target = *target; |
| if (!(flag & NDP_UNICAST)) { |
| /* Form multicast address of the target */ |
| ip6h->ip6_dst = ipv6_solicited_node_mcast; |
| ip6h->ip6_dst.s6_addr32[3] |= |
| ns->nd_ns_target.s6_addr32[3]; |
| } |
| } else { |
| nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; |
| |
| ASSERT(!(flag & NDP_PROBE)); |
| if (opt != NULL) |
| opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; |
| ip6h->ip6_src = *sender; |
| na->nd_na_target = *sender; |
| if (flag & NDP_ISROUTER) |
| na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; |
| if (flag & NDP_SOLICITED) |
| na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; |
| if (flag & NDP_ORIDE) |
| na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; |
| } |
| |
| if (!(flag & NDP_PROBE)) { |
| if (hw_addr != NULL && opt != NULL) { |
| /* Fill in link layer address and option len */ |
| opt->nd_opt_len = (uint8_t)plen; |
| bcopy(hw_addr, &opt[1], hw_addr_len); |
| } |
| } |
| if (opt != NULL && opt->nd_opt_type == 0) { |
| /* If there's no link layer address option, then strip it. */ |
| len -= plen * 8; |
| mp->b_wptr = mp->b_rptr + len; |
| ip6h->ip6_plen = htons(len - IPV6_HDR_LEN); |
| } |
| |
| icmp6->icmp6_type = (uint8_t)operation; |
| icmp6->icmp6_code = 0; |
| /* |
| * Prepare for checksum by putting icmp length in the icmp |
| * checksum field. The checksum is calculated in ip_output.c. |
| */ |
| icmp6->icmp6_cksum = ip6h->ip6_plen; |
| |
| (void) ip_output_simple(mp, &ixas); |
| ixa_cleanup(&ixas); |
| if (need_refrele) |
| ill_refrele(ill); |
| return (B_FALSE); |
| } |
| |
| /* |
| * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED. |
| * The datapath uses this as an indication that there |
| * is a problem (as opposed to a NCE that was just |
| * reclaimed due to lack of memory. |
| * Note that static ARP entries never become unreachable. |
| */ |
| void |
| nce_make_unreachable(ncec_t *ncec) |
| { |
| mutex_enter(&ncec->ncec_lock); |
| ncec->ncec_state = ND_UNREACHABLE; |
| mutex_exit(&ncec->ncec_lock); |
| } |
| |
| /* |
| * NCE retransmit timer. Common to IPv4 and IPv6. |
| * This timer goes off when: |
| * a. It is time to retransmit a resolution for resolver. |
| * b. It is time to send reachability probes. |
| */ |
| void |
| nce_timer(void *arg) |
| { |
| ncec_t *ncec = arg; |
| ill_t *ill = ncec->ncec_ill, *src_ill; |
| char addrbuf[INET6_ADDRSTRLEN]; |
| boolean_t dropped = B_FALSE; |
| ip_stack_t *ipst = ncec->ncec_ipst; |
| boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); |
| in_addr_t sender4 = INADDR_ANY; |
| in6_addr_t sender6 = ipv6_all_zeros; |
| |
| /* |
| * The timer has to be cancelled by ncec_delete before doing the final |
| * refrele. So the NCE is guaranteed to exist when the timer runs |
| * until it clears the timeout_id. Before clearing the timeout_id |
| * bump up the refcnt so that we can continue to use the ncec |
| */ |
| ASSERT(ncec != NULL); |
| mutex_enter(&ncec->ncec_lock); |
| ncec_refhold_locked(ncec); |
| ncec->ncec_timeout_id = 0; |
| mutex_exit(&ncec->ncec_lock); |
| |
| src_ill = nce_resolve_src(ncec, &sender6); |
| /* if we could not find a sender address, return */ |
| if (src_ill == NULL) { |
| if (!isv6) { |
| IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, sender4); |
| ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET, |
| &sender4, addrbuf, sizeof (addrbuf)))); |
| } else { |
| ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6, |
| &ncec->ncec_addr, addrbuf, sizeof (addrbuf)))); |
| } |
| nce_restart_timer(ncec, ill->ill_reachable_retrans_time); |
| ncec_refrele(ncec); |
| return; |
| } |
| if (!isv6) |
| IN6_V4MAPPED_TO_IPADDR(&sender6, sender4); |
| |
| mutex_enter(&ncec->ncec_lock); |
| /* |
| * Check the reachability state. |
| */ |
| switch (ncec->ncec_state) { |
| case ND_DELAY: |
| ASSERT(ncec->ncec_lladdr != NULL); |
| ncec->ncec_state = ND_PROBE; |
| ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; |
| if (isv6) { |
| mutex_exit(&ncec->ncec_lock); |
| dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT, |
| src_ill->ill_phys_addr, |
| src_ill->ill_phys_addr_length, |
| &sender6, &ncec->ncec_addr, |
| NDP_UNICAST); |
| } else { |
| dropped = (arp_request(ncec, sender4, src_ill) == 0); |
| mutex_exit(&ncec->ncec_lock); |
| } |
| if (!dropped) { |
| mutex_enter(&ncec->ncec_lock); |
| ncec->ncec_pcnt--; |
| mutex_exit(&ncec->ncec_lock); |
| } |
| if (ip_debug > 3) { |
| /* ip2dbg */ |
| pr_addr_dbg("nce_timer: state for %s changed " |
| "to PROBE\n", AF_INET6, &ncec->ncec_addr); |
| } |
| nce_restart_timer(ncec, ill->ill_reachable_retrans_time); |
| break; |
| case ND_PROBE: |
| /* must be retransmit timer */ |
| ASSERT(ncec->ncec_pcnt >= -1); |
| if (ncec->ncec_pcnt > 0) { |
| /* |
| * As per RFC2461, the ncec gets deleted after |
| * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. |
| * Note that the first unicast solicitation is sent |
| * during the DELAY state. |
| */ |
| ip2dbg(("nce_timer: pcount=%x dst %s\n", |
| ncec->ncec_pcnt, |
| inet_ntop((isv6? AF_INET6 : AF_INET), |
| &ncec->ncec_addr, addrbuf, sizeof (addrbuf)))); |
| if (NCE_PUBLISH(ncec)) { |
| mutex_exit(&ncec->ncec_lock); |
| /* |
| * send out a probe; note that src_ill |
| * is ignored by nce_dad() for all |
| * DAD message types other than IPv6 |
| * unicast probes |
| */ |
| nce_dad(ncec, src_ill, B_TRUE); |
| } else { |
| ASSERT(src_ill != NULL); |
| if (isv6) { |
| mutex_exit(&ncec->ncec_lock); |
| dropped = ndp_xmit(src_ill, |
| ND_NEIGHBOR_SOLICIT, |
| src_ill->ill_phys_addr, |
| src_ill->ill_phys_addr_length, |
| &sender6, &ncec->ncec_addr, |
| NDP_UNICAST); |
| } else { |
| /* |
| * since the nce is REACHABLE, |
| * the ARP request will be sent out |
| * as a link-layer unicast. |
| */ |
| dropped = (arp_request(ncec, sender4, |
| src_ill) == 0); |
| mutex_exit(&ncec->ncec_lock); |
| } |
| if (!dropped) { |
| mutex_enter(&ncec->ncec_lock); |
| ncec->ncec_pcnt--; |
| mutex_exit(&ncec->ncec_lock); |
| } |
| nce_restart_timer(ncec, |
| ill->ill_reachable_retrans_time); |
| } |
| } else if (ncec->ncec_pcnt < 0) { |
| /* No hope, delete the ncec */ |
| /* Tell datapath it went bad */ |
| ncec->ncec_state = ND_UNREACHABLE; |
| mutex_exit(&ncec->ncec_lock); |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("nce_timer: Delete NCE for" |
| " dst %s\n", (isv6? AF_INET6: AF_INET), |
| &ncec->ncec_addr); |
| } |
| /* if static ARP can't delete. */ |
| if ((ncec->ncec_flags & NCE_F_STATIC) == 0) |
| ncec_delete(ncec); |
| |
| } else if (!NCE_PUBLISH(ncec)) { |
| /* |
| * Probe count is 0 for a dynamic entry (one that we |
| * ourselves are not publishing). We should never get |
| * here if NONUD was requested, hence the ASSERT below. |
| */ |
| ASSERT((ncec->ncec_flags & NCE_F_NONUD) == 0); |
| ip2dbg(("nce_timer: pcount=%x dst %s\n", |
| ncec->ncec_pcnt, inet_ntop(AF_INET6, |
| &ncec->ncec_addr, addrbuf, sizeof (addrbuf)))); |
| ncec->ncec_pcnt--; |
| mutex_exit(&ncec->ncec_lock); |
| /* Wait one interval before killing */ |
| nce_restart_timer(ncec, |
| ill->ill_reachable_retrans_time); |
| } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { |
| ipif_t *ipif; |
| ipaddr_t ncec_addr; |
| |
| /* |
| * We're done probing, and we can now declare this |
| * address to be usable. Let IP know that it's ok to |
| * use. |
| */ |
| ncec->ncec_state = ND_REACHABLE; |
| ncec->ncec_flags &= ~NCE_F_UNVERIFIED; |
| mutex_exit(&ncec->ncec_lock); |
| if (isv6) { |
| ipif = ipif_lookup_addr_exact_v6( |
| &ncec->ncec_addr, ill, ipst); |
| } else { |
| IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, |
| ncec_addr); |
| ipif = ipif_lookup_addr_exact(ncec_addr, ill, |
| ipst); |
| } |
| if (ipif != NULL) { |
| if (ipif->ipif_was_dup) { |
| char ibuf[LIFNAMSIZ]; |
| char sbuf[INET6_ADDRSTRLEN]; |
| |
| ipif->ipif_was_dup = B_FALSE; |
| (void) inet_ntop(AF_INET6, |
| &ipif->ipif_v6lcl_addr, |
| sbuf, sizeof (sbuf)); |
| ipif_get_name(ipif, ibuf, |
| sizeof (ibuf)); |
| cmn_err(CE_NOTE, "recovered address " |
| "%s on %s", sbuf, ibuf); |
| } |
| if ((ipif->ipif_flags & IPIF_UP) && |
| !ipif->ipif_addr_ready) |
| ipif_up_notify(ipif); |
| ipif->ipif_addr_ready = 1; |
| ipif_refrele(ipif); |
| } |
| if (!isv6 && arp_no_defense) |
| break; |
| /* Begin defending our new address */ |
| if (ncec->ncec_unsolicit_count > 0) { |
| ncec->ncec_unsolicit_count--; |
| if (isv6) { |
| dropped = ndp_announce(ncec); |
| } else { |
| dropped = arp_announce(ncec); |
| } |
| |
| if (dropped) |
| ncec->ncec_unsolicit_count++; |
| else |
| ncec->ncec_last_time_defended = |
| ddi_get_lbolt(); |
| } |
| if (ncec->ncec_unsolicit_count > 0) { |
| nce_restart_timer(ncec, |
| ANNOUNCE_INTERVAL(isv6)); |
| } else if (DEFENSE_INTERVAL(isv6) != 0) { |
| nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6)); |
| } |
| } else { |
| /* |
| * This is an address we're probing to be our own, but |
| * the ill is down. Wait until it comes back before |
| * doing anything, but switch to reachable state so |
| * that the restart will work. |
| */ |
| ncec->ncec_state = ND_REACHABLE; |
| mutex_exit(&ncec->ncec_lock); |
| } |
| break; |
| case ND_INCOMPLETE: { |
| mblk_t *mp, *nextmp; |
| mblk_t **prevmpp; |
| |
| /* |
| * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp |
| * for any IPMP probe packets, and toss them. IPMP probe |
| * packets will always be at the head of ncec_qd_mp, so that |
| * we can stop at the first queued ND packet that is |
| * not a probe packet. |
| */ |
| prevmpp = &ncec->ncec_qd_mp; |
| for (mp = ncec->ncec_qd_mp; mp != NULL; mp = nextmp) { |
| nextmp = mp->b_next; |
| |
| if (IS_UNDER_IPMP(ill) && ncec->ncec_nprobes > 0) { |
| inet_freemsg(mp); |
| ncec->ncec_nprobes--; |
| *prevmpp = nextmp; |
| } else { |
| prevmpp = &mp->b_next; |
| } |
| } |
| |
| /* |
| * Must be resolver's retransmit timer. |
| */ |
| mutex_exit(&ncec->ncec_lock); |
| ip_ndp_resolve(ncec); |
| break; |
| } |
| case ND_REACHABLE: |
| if (((ncec->ncec_flags & NCE_F_UNSOL_ADV) && |
| ncec->ncec_unsolicit_count != 0) || |
| (NCE_PUBLISH(ncec) && DEFENSE_INTERVAL(isv6) != 0)) { |
| if (ncec->ncec_unsolicit_count > 0) { |
| ncec->ncec_unsolicit_count--; |
| mutex_exit(&ncec->ncec_lock); |
| /* |
| * When we get to zero announcements left, |
| * switch to address defense |
| */ |
| } else { |
| boolean_t rate_limit; |
| |
| mutex_exit(&ncec->ncec_lock); |
| rate_limit = ill_defend_rate_limit(ill, ncec); |
| if (rate_limit) { |
| nce_restart_timer(ncec, |
| DEFENSE_INTERVAL(isv6)); |
| break; |
| } |
| } |
| if (isv6) { |
| dropped = ndp_announce(ncec); |
| } else { |
| dropped = arp_announce(ncec); |
| } |
| mutex_enter(&ncec->ncec_lock); |
| if (dropped) { |
| ncec->ncec_unsolicit_count++; |
| } else { |
| ncec->ncec_last_time_defended = |
| ddi_get_lbolt(); |
| } |
| mutex_exit(&ncec->ncec_lock); |
| if (ncec->ncec_unsolicit_count != 0) { |
| nce_restart_timer(ncec, |
| ANNOUNCE_INTERVAL(isv6)); |
| } else { |
| nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6)); |
| } |
| } else { |
| mutex_exit(&ncec->ncec_lock); |
| } |
| break; |
| default: |
| mutex_exit(&ncec->ncec_lock); |
| break; |
| } |
| done: |
| ncec_refrele(ncec); |
| ill_refrele(src_ill); |
| } |
| |
| /* |
| * Set a link layer address from the ll_addr passed in. |
| * Copy SAP from ill. |
| */ |
| static void |
| nce_set_ll(ncec_t *ncec, uchar_t *ll_addr) |
| { |
| ill_t *ill = ncec->ncec_ill; |
| |
| ASSERT(ll_addr != NULL); |
| if (ill->ill_phys_addr_length > 0) { |
| /* |
| * The bcopy() below used to be called for the physical address |
| * length rather than the link layer address length. For |
| * ethernet and many other media, the phys_addr and lla are |
| * identical. |
| * |
| * The phys_addr and lla may not be the same for devices that |
| * support DL_IPV6_LINK_LAYER_ADDR, though there are currently |
| * no known instances of these. |
| * |
| * For PPP or other interfaces with a zero length |
| * physical address, don't do anything here. |
| * The bcopy() with a zero phys_addr length was previously |
| * a no-op for interfaces with a zero-length physical address. |
| * Using the lla for them would change the way they operate. |
| * Doing nothing in such cases preserves expected behavior. |
| */ |
| bcopy(ll_addr, ncec->ncec_lladdr, ill->ill_nd_lla_len); |
| } |
| } |
| |
| boolean_t |
| nce_cmp_ll_addr(const ncec_t *ncec, const uchar_t *ll_addr, |
| uint32_t ll_addr_len) |
| { |
| ASSERT(ncec->ncec_lladdr != NULL); |
| if (ll_addr == NULL) |
| return (B_FALSE); |
| if (bcmp(ll_addr, ncec->ncec_lladdr, ll_addr_len) != 0) |
| return (B_TRUE); |
| return (B_FALSE); |
| } |
| |
| /* |
| * Updates the link layer address or the reachability state of |
| * a cache entry. Reset probe counter if needed. |
| */ |
| void |
| nce_update(ncec_t *ncec, uint16_t new_state, uchar_t *new_ll_addr) |
| { |
| ill_t *ill = ncec->ncec_ill; |
| boolean_t need_stop_timer = B_FALSE; |
| boolean_t need_fastpath_update = B_FALSE; |
| nce_t *nce = NULL; |
| timeout_id_t tid; |
| |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| /* |
| * If this interface does not do NUD, there is no point |
| * in allowing an update to the cache entry. Although |
| * we will respond to NS. |
| * The only time we accept an update for a resolver when |
| * NUD is turned off is when it has just been created. |
| * Non-Resolvers will always be created as REACHABLE. |
| */ |
| if (new_state != ND_UNCHANGED) { |
| if ((ncec->ncec_flags & NCE_F_NONUD) && |
| (ncec->ncec_state != ND_INCOMPLETE)) |
| return; |
| ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); |
| ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); |
| need_stop_timer = B_TRUE; |
| if (new_state == ND_REACHABLE) |
| ncec->ncec_last = TICK_TO_MSEC(ddi_get_lbolt64()); |
| else { |
| /* We force NUD in this case */ |
| ncec->ncec_last = 0; |
| } |
| ncec->ncec_state = new_state; |
| ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; |
| ASSERT(ncec->ncec_lladdr != NULL || new_state == ND_INITIAL || |
| new_state == ND_INCOMPLETE); |
| } |
| if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) { |
| tid = ncec->ncec_timeout_id; |
| ncec->ncec_timeout_id = 0; |
| } |
| /* |
| * Re-trigger fastpath probe and |
| * overwrite the DL_UNITDATA_REQ data, noting we'll lose |
| * whatever packets that happens to be transmitting at the time. |
| */ |
| if (new_ll_addr != NULL) { |
| bcopy(new_ll_addr, ncec->ncec_lladdr, |
| ill->ill_phys_addr_length); |
| need_fastpath_update = B_TRUE; |
| } |
| mutex_exit(&ncec->ncec_lock); |
| if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) { |
| if (tid != 0) |
| (void) untimeout(tid); |
| } |
| if (need_fastpath_update) { |
| /* |
| * Delete any existing existing dlur_mp and fp_mp information. |
| * For IPMP interfaces, all underlying ill's must be checked |
| * and purged. |
| */ |
| nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL); |
| /* |
| * add the new dlur_mp and fp_mp |
| */ |
| nce = nce_fastpath(ncec, B_TRUE, NULL); |
| if (nce != NULL) |
| nce_refrele(nce); |
| } |
| mutex_enter(&ncec->ncec_lock); |
| } |
| |
| static void |
| nce_queue_mp_common(ncec_t *ncec, mblk_t *mp, boolean_t head_insert) |
| { |
| uint_t count = 0; |
| mblk_t **mpp, *tmp; |
| |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| |
| for (mpp = &ncec->ncec_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { |
| if (++count > ncec->ncec_ill->ill_max_buf) { |
| tmp = ncec->ncec_qd_mp->b_next; |
| ncec->ncec_qd_mp->b_next = NULL; |
| /* |
| * if we never create data addrs on the under_ill |
| * does this matter? |
| */ |
| BUMP_MIB(ncec->ncec_ill->ill_ip_mib, |
| ipIfStatsOutDiscards); |
| ip_drop_output("ipIfStatsOutDiscards", ncec->ncec_qd_mp, |
| ncec->ncec_ill); |
| freemsg(ncec->ncec_qd_mp); |
| ncec->ncec_qd_mp = tmp; |
| } |
| } |
| |
| if (head_insert) { |
| ncec->ncec_nprobes++; |
| mp->b_next = ncec->ncec_qd_mp; |
| ncec->ncec_qd_mp = mp; |
| } else { |
| *mpp = mp; |
| } |
| } |
| |
| /* |
| * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be |
| * queued at the head or tail of the queue based on the input argument |
| * 'head_insert'. The caller should specify this argument as B_TRUE if this |
| * packet is an IPMP probe packet, in which case the following happens: |
| * |
| * 1. Insert it at the head of the ncec_qd_mp list. Consider the normal |
| * (non-ipmp_probe) load-speading case where the source address of the ND |
| * packet is not tied to ncec_ill. If the ill bound to the source address |
| * cannot receive, the response to the ND packet will not be received. |
| * However, if ND packets for ncec_ill's probes are queued behind that ND |
| * packet, those probes will also fail to be sent, and thus in.mpathd will |
| * erroneously conclude that ncec_ill has also failed. |
| * |
| * 2. Drop the ipmp_probe packet in ndp_timer() if the ND did not succeed on |
| * the first at
|