| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/stream.h> |
| #include <sys/stropts.h> |
| #include <sys/strsun.h> |
| #include <sys/sysmacros.h> |
| #include <sys/errno.h> |
| #include <sys/dlpi.h> |
| #include <sys/socket.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/cmn_err.h> |
| #include <sys/debug.h> |
| #include <sys/vtrace.h> |
| #include <sys/kmem.h> |
| #include <sys/zone.h> |
| #include <sys/ethernet.h> |
| #include <sys/sdt.h> |
| |
| #include <net/if.h> |
| #include <net/if_types.h> |
| #include <net/if_dl.h> |
| #include <net/route.h> |
| #include <netinet/in.h> |
| #include <netinet/ip6.h> |
| #include <netinet/icmp6.h> |
| |
| #include <inet/common.h> |
| #include <inet/mi.h> |
| #include <inet/mib2.h> |
| #include <inet/nd.h> |
| #include <inet/ip.h> |
| #include <inet/ip_impl.h> |
| #include <inet/ipclassifier.h> |
| #include <inet/ip_if.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ip_rts.h> |
| #include <inet/ip6.h> |
| #include <inet/ip_ndp.h> |
| #include <inet/ipsec_impl.h> |
| #include <inet/ipsec_info.h> |
| #include <inet/sctp_ip.h> |
| |
| /* |
| * Function names with nce_ prefix are static while function |
| * names with ndp_ prefix are used by rest of the IP. |
| * |
| * Lock ordering: |
| * |
| * ndp_g_lock -> ill_lock -> nce_lock |
| * |
| * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and |
| * nce_next. Nce_lock protects the contents of the NCE (particularly |
| * nce_refcnt). |
| */ |
| |
| static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, |
| uint32_t ll_addr_len); |
| static void nce_ire_delete(nce_t *nce); |
| static void nce_ire_delete1(ire_t *ire, char *nce_arg); |
| static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); |
| static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, |
| nce_t *); |
| static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); |
| static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, |
| uchar_t *addr); |
| static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); |
| static void nce_queue_mp(nce_t *nce, mblk_t *mp); |
| static mblk_t *nce_udreq_alloc(ill_t *ill); |
| static void nce_update(nce_t *nce, uint16_t new_state, |
| uchar_t *new_ll_addr); |
| static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); |
| static boolean_t nce_xmit(ill_t *ill, uint8_t type, |
| boolean_t use_lla_addr, const in6_addr_t *sender, |
| const in6_addr_t *target, int flag); |
| static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, |
| const in6_addr_t *target, uint_t flags); |
| static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, |
| const in6_addr_t *src, uint_t flags); |
| static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, |
| nce_t **, nce_t *); |
| static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); |
| |
| #ifdef DEBUG |
| static void nce_trace_cleanup(const nce_t *); |
| #endif |
| |
| #define NCE_HASH_PTR_V4(ipst, addr) \ |
| (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) |
| |
| #define NCE_HASH_PTR_V6(ipst, addr) \ |
| (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ |
| NCE_TABLE_SIZE)])) |
| |
| /* Non-tunable probe interval, based on link capabilities */ |
| #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) |
| |
| /* |
| * NDP Cache Entry creation routine. |
| * Mapped entries will never do NUD . |
| * This routine must always be called with ndp6->ndp_g_lock held. |
| * Prior to return, nce_refcnt is incremented. |
| */ |
| int |
| ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, |
| const in6_addr_t *mask, const in6_addr_t *extract_mask, |
| uint32_t hw_extract_start, uint16_t flags, uint16_t state, |
| nce_t **newnce) |
| { |
| static nce_t nce_nil; |
| nce_t *nce; |
| mblk_t *mp; |
| mblk_t *template; |
| nce_t **ncep; |
| int err; |
| boolean_t dropped = B_FALSE; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); |
| ASSERT(ill != NULL && ill->ill_isv6); |
| if (IN6_IS_ADDR_UNSPECIFIED(addr)) { |
| ip0dbg(("ndp_add_v6: no addr\n")); |
| return (EINVAL); |
| } |
| if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { |
| ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); |
| return (EINVAL); |
| } |
| if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && |
| (flags & NCE_F_MAPPING)) { |
| ip0dbg(("ndp_add_v6: extract mask zero for mapping")); |
| return (EINVAL); |
| } |
| /* |
| * Allocate the mblk to hold the nce. |
| * |
| * XXX This can come out of a separate cache - nce_cache. |
| * We don't need the mp anymore as there are no more |
| * "qwriter"s |
| */ |
| mp = allocb(sizeof (nce_t), BPRI_MED); |
| if (mp == NULL) |
| return (ENOMEM); |
| |
| nce = (nce_t *)mp->b_rptr; |
| mp->b_wptr = (uchar_t *)&nce[1]; |
| *nce = nce_nil; |
| |
| /* |
| * This one holds link layer address |
| */ |
| if (ill->ill_net_type == IRE_IF_RESOLVER) { |
| template = nce_udreq_alloc(ill); |
| } else { |
| if (ill->ill_resolver_mp == NULL) { |
| freeb(mp); |
| return (EINVAL); |
| } |
| ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); |
| template = copyb(ill->ill_resolver_mp); |
| } |
| if (template == NULL) { |
| freeb(mp); |
| return (ENOMEM); |
| } |
| nce->nce_ill = ill; |
| nce->nce_ipversion = IPV6_VERSION; |
| nce->nce_flags = flags; |
| nce->nce_state = state; |
| nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; |
| nce->nce_rcnt = ill->ill_xmit_count; |
| nce->nce_addr = *addr; |
| nce->nce_mask = *mask; |
| nce->nce_extract_mask = *extract_mask; |
| nce->nce_ll_extract_start = hw_extract_start; |
| nce->nce_fp_mp = NULL; |
| nce->nce_res_mp = template; |
| if (state == ND_REACHABLE) |
| nce->nce_last = TICK_TO_MSEC(lbolt64); |
| else |
| nce->nce_last = 0; |
| nce->nce_qd_mp = NULL; |
| nce->nce_mp = mp; |
| if (hw_addr != NULL) |
| nce_set_ll(nce, hw_addr); |
| /* This one is for nce getting created */ |
| nce->nce_refcnt = 1; |
| mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); |
| if (nce->nce_flags & NCE_F_MAPPING) { |
| ASSERT(IN6_IS_ADDR_MULTICAST(addr)); |
| ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); |
| ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); |
| ncep = &ipst->ips_ndp6->nce_mask_entries; |
| } else { |
| ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); |
| } |
| |
| nce->nce_trace_disable = B_FALSE; |
| |
| /* |
| * Atomically ensure that the ill is not CONDEMNED, before |
| * adding the NCE. |
| */ |
| mutex_enter(&ill->ill_lock); |
| if (ill->ill_state_flags & ILL_CONDEMNED) { |
| mutex_exit(&ill->ill_lock); |
| freeb(mp); |
| freeb(template); |
| return (EINVAL); |
| } |
| if ((nce->nce_next = *ncep) != NULL) |
| nce->nce_next->nce_ptpn = &nce->nce_next; |
| *ncep = nce; |
| nce->nce_ptpn = ncep; |
| *newnce = nce; |
| /* This one is for nce being used by an active thread */ |
| NCE_REFHOLD(*newnce); |
| |
| /* Bump up the number of nce's referencing this ill */ |
| DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, |
| (char *), "nce", (void *), nce); |
| ill->ill_nce_cnt++; |
| mutex_exit(&ill->ill_lock); |
| |
| err = 0; |
| if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { |
| mutex_enter(&nce->nce_lock); |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; |
| mutex_exit(&nce->nce_lock); |
| dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); |
| if (dropped) { |
| mutex_enter(&nce->nce_lock); |
| nce->nce_pcnt++; |
| mutex_exit(&nce->nce_lock); |
| } |
| NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| err = EINPROGRESS; |
| } else if (flags & NCE_F_UNSOL_ADV) { |
| /* |
| * We account for the transmit below by assigning one |
| * less than the ndd variable. Subsequent decrements |
| * are done in ndp_timer. |
| */ |
| mutex_enter(&nce->nce_lock); |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; |
| mutex_exit(&nce->nce_lock); |
| dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, |
| 0); |
| mutex_enter(&nce->nce_lock); |
| if (dropped) |
| nce->nce_unsolicit_count++; |
| if (nce->nce_unsolicit_count != 0) { |
| ASSERT(nce->nce_timeout_id == 0); |
| nce->nce_timeout_id = timeout(ndp_timer, nce, |
| MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); |
| } |
| mutex_exit(&nce->nce_lock); |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| } |
| |
| /* |
| * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then |
| * we call nce_fastpath as soon as the nce is resolved in ndp_process. |
| * We call nce_fastpath from nce_update if the link layer address of |
| * the peer changes from nce_update |
| */ |
| if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) |
| nce_fastpath(nce); |
| return (err); |
| } |
| |
| int |
| ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, |
| const in6_addr_t *addr, const in6_addr_t *mask, |
| const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, |
| uint16_t state, nce_t **newnce) |
| { |
| int err = 0; |
| nce_t *nce; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(ill->ill_isv6); |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| |
| /* Get head of v6 hash table */ |
| nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); |
| nce = nce_lookup_addr(ill, match_illgrp, addr, nce); |
| if (nce == NULL) { |
| err = ndp_add_v6(ill, |
| hw_addr, |
| addr, |
| mask, |
| extract_mask, |
| hw_extract_start, |
| flags, |
| state, |
| newnce); |
| } else { |
| *newnce = nce; |
| err = EEXIST; |
| } |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| return (err); |
| } |
| |
| /* |
| * Remove all the CONDEMNED nces from the appropriate hash table. |
| * We create a private list of NCEs, these may have ires pointing |
| * to them, so the list will be passed through to clean up dependent |
| * ires and only then we can do NCE_REFRELE which can make NCE inactive. |
| */ |
| static void |
| nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) |
| { |
| nce_t *nce1; |
| nce_t **ptpn; |
| |
| ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); |
| ASSERT(ndp->ndp_g_walker == 0); |
| for (; nce; nce = nce1) { |
| nce1 = nce->nce_next; |
| mutex_enter(&nce->nce_lock); |
| if (nce->nce_flags & NCE_F_CONDEMNED) { |
| ptpn = nce->nce_ptpn; |
| nce1 = nce->nce_next; |
| if (nce1 != NULL) |
| nce1->nce_ptpn = ptpn; |
| *ptpn = nce1; |
| nce->nce_ptpn = NULL; |
| nce->nce_next = NULL; |
| nce->nce_next = *free_nce_list; |
| *free_nce_list = nce; |
| } |
| mutex_exit(&nce->nce_lock); |
| } |
| } |
| |
| /* |
| * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() |
| * will return this NCE. Also no new IREs will be created that |
| * point to this NCE (See ire_add_v6). Also no new timeouts will |
| * be started (See NDP_RESTART_TIMER). |
| * 2. Cancel any currently running timeouts. |
| * 3. If there is an ndp walker, return. The walker will do the cleanup. |
| * This ensures that walkers see a consistent list of NCEs while walking. |
| * 4. Otherwise remove the NCE from the list of NCEs |
| * 5. Delete all IREs pointing to this NCE. |
| */ |
| void |
| ndp_delete(nce_t *nce) |
| { |
| nce_t **ptpn; |
| nce_t *nce1; |
| int ipversion = nce->nce_ipversion; |
| ndp_g_t *ndp; |
| ip_stack_t *ipst = nce->nce_ill->ill_ipst; |
| |
| if (ipversion == IPV4_VERSION) |
| ndp = ipst->ips_ndp4; |
| else |
| ndp = ipst->ips_ndp6; |
| |
| /* Serialize deletes */ |
| mutex_enter(&nce->nce_lock); |
| if (nce->nce_flags & NCE_F_CONDEMNED) { |
| /* Some other thread is doing the delete */ |
| mutex_exit(&nce->nce_lock); |
| return; |
| } |
| /* |
| * Caller has a refhold. Also 1 ref for being in the list. Thus |
| * refcnt has to be >= 2 |
| */ |
| ASSERT(nce->nce_refcnt >= 2); |
| nce->nce_flags |= NCE_F_CONDEMNED; |
| mutex_exit(&nce->nce_lock); |
| |
| nce_fastpath_list_delete(nce); |
| |
| /* |
| * Cancel any running timer. Timeout can't be restarted |
| * since CONDEMNED is set. Can't hold nce_lock across untimeout. |
| * Passing invalid timeout id is fine. |
| */ |
| if (nce->nce_timeout_id != 0) { |
| (void) untimeout(nce->nce_timeout_id); |
| nce->nce_timeout_id = 0; |
| } |
| |
| mutex_enter(&ndp->ndp_g_lock); |
| if (nce->nce_ptpn == NULL) { |
| /* |
| * The last ndp walker has already removed this nce from |
| * the list after we marked the nce CONDEMNED and before |
| * we grabbed the global lock. |
| */ |
| mutex_exit(&ndp->ndp_g_lock); |
| return; |
| } |
| if (ndp->ndp_g_walker > 0) { |
| /* |
| * Can't unlink. The walker will clean up |
| */ |
| ndp->ndp_g_walker_cleanup = B_TRUE; |
| mutex_exit(&ndp->ndp_g_lock); |
| return; |
| } |
| |
| /* |
| * Now remove the nce from the list. NDP_RESTART_TIMER won't restart |
| * the timer since it is marked CONDEMNED. |
| */ |
| ptpn = nce->nce_ptpn; |
| nce1 = nce->nce_next; |
| if (nce1 != NULL) |
| nce1->nce_ptpn = ptpn; |
| *ptpn = nce1; |
| nce->nce_ptpn = NULL; |
| nce->nce_next = NULL; |
| mutex_exit(&ndp->ndp_g_lock); |
| |
| nce_ire_delete(nce); |
| } |
| |
| void |
| ndp_inactive(nce_t *nce) |
| { |
| mblk_t **mpp; |
| ill_t *ill; |
| |
| ASSERT(nce->nce_refcnt == 0); |
| ASSERT(MUTEX_HELD(&nce->nce_lock)); |
| ASSERT(nce->nce_fastpath == NULL); |
| |
| /* Free all nce allocated messages */ |
| mpp = &nce->nce_first_mp_to_free; |
| do { |
| while (*mpp != NULL) { |
| mblk_t *mp; |
| |
| mp = *mpp; |
| *mpp = mp->b_next; |
| |
| inet_freemsg(mp); |
| } |
| } while (mpp++ != &nce->nce_last_mp_to_free); |
| |
| #ifdef DEBUG |
| nce_trace_cleanup(nce); |
| #endif |
| |
| ill = nce->nce_ill; |
| mutex_enter(&ill->ill_lock); |
| DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, |
| (char *), "nce", (void *), nce); |
| ill->ill_nce_cnt--; |
| /* |
| * If the number of nce's associated with this ill have dropped |
| * to zero, check whether we need to restart any operation that |
| * is waiting for this to happen. |
| */ |
| if (ILL_DOWN_OK(ill)) { |
| /* ipif_ill_refrele_tail drops the ill_lock */ |
| ipif_ill_refrele_tail(ill); |
| } else { |
| mutex_exit(&ill->ill_lock); |
| } |
| mutex_destroy(&nce->nce_lock); |
| if (nce->nce_mp != NULL) |
| inet_freemsg(nce->nce_mp); |
| } |
| |
| /* |
| * ndp_walk routine. Delete the nce if it is associated with the ill |
| * that is going away. Always called as a writer. |
| */ |
| void |
| ndp_delete_per_ill(nce_t *nce, uchar_t *arg) |
| { |
| if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { |
| ndp_delete(nce); |
| } |
| } |
| |
| /* |
| * Walk a list of to be inactive NCEs and blow away all the ires. |
| */ |
| static void |
| nce_ire_delete_list(nce_t *nce) |
| { |
| nce_t *nce_next; |
| |
| ASSERT(nce != NULL); |
| while (nce != NULL) { |
| nce_next = nce->nce_next; |
| nce->nce_next = NULL; |
| |
| /* |
| * It is possible for the last ndp walker (this thread) |
| * to come here after ndp_delete has marked the nce CONDEMNED |
| * and before it has removed the nce from the fastpath list |
| * or called untimeout. So we need to do it here. It is safe |
| * for both ndp_delete and this thread to do it twice or |
| * even simultaneously since each of the threads has a |
| * reference on the nce. |
| */ |
| nce_fastpath_list_delete(nce); |
| /* |
| * Cancel any running timer. Timeout can't be restarted |
| * since CONDEMNED is set. Can't hold nce_lock across untimeout. |
| * Passing invalid timeout id is fine. |
| */ |
| if (nce->nce_timeout_id != 0) { |
| (void) untimeout(nce->nce_timeout_id); |
| nce->nce_timeout_id = 0; |
| } |
| /* |
| * We might hit this func thus in the v4 case: |
| * ipif_down->ipif_ndp_down->ndp_walk |
| */ |
| |
| if (nce->nce_ipversion == IPV4_VERSION) { |
| ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, |
| IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); |
| } else { |
| ASSERT(nce->nce_ipversion == IPV6_VERSION); |
| ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, |
| IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); |
| } |
| NCE_REFRELE_NOTR(nce); |
| nce = nce_next; |
| } |
| } |
| |
| /* |
| * Delete an ire when the nce goes away. |
| */ |
| /* ARGSUSED */ |
| static void |
| nce_ire_delete(nce_t *nce) |
| { |
| if (nce->nce_ipversion == IPV6_VERSION) { |
| ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, |
| nce_ire_delete1, (char *)nce, nce->nce_ill); |
| NCE_REFRELE_NOTR(nce); |
| } else { |
| ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, |
| nce_ire_delete1, (char *)nce, nce->nce_ill); |
| NCE_REFRELE_NOTR(nce); |
| } |
| } |
| |
| /* |
| * ire_walk routine used to delete every IRE that shares this nce |
| */ |
| static void |
| nce_ire_delete1(ire_t *ire, char *nce_arg) |
| { |
| nce_t *nce = (nce_t *)nce_arg; |
| |
| ASSERT(ire->ire_type == IRE_CACHE); |
| |
| if (ire->ire_nce == nce) { |
| ASSERT(ire->ire_ipversion == nce->nce_ipversion); |
| ire_delete(ire); |
| } |
| } |
| |
| /* |
| * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. |
| */ |
| boolean_t |
| ndp_restart_dad(nce_t *nce) |
| { |
| boolean_t started; |
| boolean_t dropped; |
| |
| if (nce == NULL) |
| return (B_FALSE); |
| mutex_enter(&nce->nce_lock); |
| if (nce->nce_state == ND_PROBE) { |
| mutex_exit(&nce->nce_lock); |
| started = B_TRUE; |
| } else if (nce->nce_state == ND_REACHABLE) { |
| nce->nce_state = ND_PROBE; |
| nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; |
| mutex_exit(&nce->nce_lock); |
| dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); |
| if (dropped) { |
| mutex_enter(&nce->nce_lock); |
| nce->nce_pcnt++; |
| mutex_exit(&nce->nce_lock); |
| } |
| NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); |
| started = B_TRUE; |
| } else { |
| mutex_exit(&nce->nce_lock); |
| started = B_FALSE; |
| } |
| return (started); |
| } |
| |
| /* |
| * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. |
| * If one is found, the refcnt on the nce will be incremented. |
| */ |
| nce_t * |
| ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, |
| boolean_t caller_holds_lock) |
| { |
| nce_t *nce; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(ill->ill_isv6); |
| if (!caller_holds_lock) |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| |
| /* Get head of v6 hash table */ |
| nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); |
| nce = nce_lookup_addr(ill, match_illgrp, addr, nce); |
| if (nce == NULL) |
| nce = nce_lookup_mapping(ill, addr); |
| if (!caller_holds_lock) |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| return (nce); |
| } |
| /* |
| * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. |
| * If one is found, the refcnt on the nce will be incremented. |
| * Since multicast mappings are handled in arp, there are no nce_mcast_entries |
| * so we skip the nce_lookup_mapping call. |
| * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL |
| */ |
| nce_t * |
| ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) |
| { |
| nce_t *nce; |
| in6_addr_t addr6; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| if (!caller_holds_lock) |
| mutex_enter(&ipst->ips_ndp4->ndp_g_lock); |
| |
| /* Get head of v4 hash table */ |
| nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); |
| IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); |
| /* |
| * NOTE: IPv4 never matches across the illgrp since the NCE's we're |
| * looking up have fastpath headers that are inherently per-ill. |
| */ |
| nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); |
| if (!caller_holds_lock) |
| mutex_exit(&ipst->ips_ndp4->ndp_g_lock); |
| return (nce); |
| } |
| |
| /* |
| * Cache entry lookup. Try to find an nce matching the parameters passed. |
| * Look only for exact entries (no mappings). If an nce is found, increment |
| * the hold count on that nce. The caller passes in the start of the |
| * appropriate hash table, and must be holding the appropriate global |
| * lock (ndp_g_lock). |
| */ |
| static nce_t * |
| nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, |
| nce_t *nce) |
| { |
| ndp_g_t *ndp; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| if (ill->ill_isv6) |
| ndp = ipst->ips_ndp6; |
| else |
| ndp = ipst->ips_ndp4; |
| |
| ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); |
| if (IN6_IS_ADDR_UNSPECIFIED(addr)) |
| return (NULL); |
| for (; nce != NULL; nce = nce->nce_next) { |
| if (nce->nce_ill == ill || |
| match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { |
| if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && |
| IN6_ARE_ADDR_EQUAL(&nce->nce_mask, |
| &ipv6_all_ones)) { |
| mutex_enter(&nce->nce_lock); |
| if (!(nce->nce_flags & NCE_F_CONDEMNED)) { |
| NCE_REFHOLD_LOCKED(nce); |
| mutex_exit(&nce->nce_lock); |
| break; |
| } |
| mutex_exit(&nce->nce_lock); |
| } |
| } |
| } |
| return (nce); |
| } |
| |
| /* |
| * Cache entry lookup. Try to find an nce matching the parameters passed. |
| * Look only for mappings. |
| */ |
| static nce_t * |
| nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) |
| { |
| nce_t *nce; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(ill != NULL && ill->ill_isv6); |
| ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); |
| if (!IN6_IS_ADDR_MULTICAST(addr)) |
| return (NULL); |
| nce = ipst->ips_ndp6->nce_mask_entries; |
| for (; nce != NULL; nce = nce->nce_next) |
| if (nce->nce_ill == ill && |
| (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { |
| mutex_enter(&nce->nce_lock); |
| if (!(nce->nce_flags & NCE_F_CONDEMNED)) { |
| NCE_REFHOLD_LOCKED(nce); |
| mutex_exit(&nce->nce_lock); |
| break; |
| } |
| mutex_exit(&nce->nce_lock); |
| } |
| return (nce); |
| } |
| |
| /* |
| * Process passed in parameters either from an incoming packet or via |
| * user ioctl. |
| */ |
| static void |
| nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) |
| { |
| ill_t *ill = nce->nce_ill; |
| uint32_t hw_addr_len = ill->ill_nd_lla_len; |
| mblk_t *mp; |
| boolean_t ll_updated = B_FALSE; |
| boolean_t ll_changed; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(nce->nce_ipversion == IPV6_VERSION); |
| /* |
| * No updates of link layer address or the neighbor state is |
| * allowed, when the cache is in NONUD state. This still |
| * allows for responding to reachability solicitation. |
| */ |
| mutex_enter(&nce->nce_lock); |
| if (nce->nce_state == ND_INCOMPLETE) { |
| if (hw_addr == NULL) { |
| mutex_exit(&nce->nce_lock); |
| return; |
| } |
| nce_set_ll(nce, hw_addr); |
| /* |
| * Update nce state and send the queued packets |
| * back to ip this time ire will be added. |
| */ |
| if (flag & ND_NA_FLAG_SOLICITED) { |
| nce_update(nce, ND_REACHABLE, NULL); |
| } else { |
| nce_update(nce, ND_STALE, NULL); |
| } |
| mutex_exit(&nce->nce_lock); |
| nce_fastpath(nce); |
| mutex_enter(&nce->nce_lock); |
| mp = nce->nce_qd_mp; |
| nce->nce_qd_mp = NULL; |
| mutex_exit(&nce->nce_lock); |
| while (mp != NULL) { |
| mblk_t *nxt_mp, *data_mp; |
| |
| nxt_mp = mp->b_next; |
| mp->b_next = NULL; |
| |
| if (mp->b_datap->db_type == M_CTL) |
| data_mp = mp->b_cont; |
| else |
| data_mp = mp; |
| if (data_mp->b_prev != NULL) { |
| ill_t *inbound_ill; |
| queue_t *fwdq = NULL; |
| uint_t ifindex; |
| |
| ifindex = (uint_t)(uintptr_t)data_mp->b_prev; |
| inbound_ill = ill_lookup_on_ifindex(ifindex, |
| B_TRUE, NULL, NULL, NULL, NULL, ipst); |
| if (inbound_ill == NULL) { |
| data_mp->b_prev = NULL; |
| freemsg(mp); |
| return; |
| } else { |
| fwdq = inbound_ill->ill_rq; |
| } |
| data_mp->b_prev = NULL; |
| /* |
| * Send a forwarded packet back into ip_rput_v6 |
| * just as in ire_send_v6(). |
| * Extract the queue from b_prev (set in |
| * ip_rput_data_v6). |
| */ |
| if (fwdq != NULL) { |
| /* |
| * Forwarded packets hop count will |
| * get decremented in ip_rput_data_v6 |
| */ |
| if (data_mp != mp) |
| freeb(mp); |
| put(fwdq, data_mp); |
| } else { |
| /* |
| * Send locally originated packets back |
| * into ip_wput_v6. |
| */ |
| put(ill->ill_wq, mp); |
| } |
| ill_refrele(inbound_ill); |
| } else { |
| put(ill->ill_wq, mp); |
| } |
| mp = nxt_mp; |
| } |
| return; |
| } |
| ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); |
| if (!is_adv) { |
| /* If this is a SOLICITATION request only */ |
| if (ll_changed) |
| nce_update(nce, ND_STALE, hw_addr); |
| mutex_exit(&nce->nce_lock); |
| return; |
| } |
| if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { |
| /* If in any other state than REACHABLE, ignore */ |
| if (nce->nce_state == ND_REACHABLE) { |
| nce_update(nce, ND_STALE, NULL); |
| } |
| mutex_exit(&nce->nce_lock); |
| return; |
| } else { |
| if (ll_changed) { |
| nce_update(nce, ND_UNCHANGED, hw_addr); |
| ll_updated = B_TRUE; |
| } |
| if (flag & ND_NA_FLAG_SOLICITED) { |
| nce_update(nce, ND_REACHABLE, NULL); |
| } else { |
| if (ll_updated) { |
| nce_update(nce, ND_STALE, NULL); |
| } |
| } |
| mutex_exit(&nce->nce_lock); |
| if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & |
| NCE_F_ISROUTER)) { |
| ire_t *ire; |
| |
| /* |
| * Router turned to host. We need to remove the |
| * entry as well as any default route that may be |
| * using this as a next hop. This is required by |
| * section 7.2.5 of RFC 2461. |
| */ |
| ire = ire_ftable_lookup_v6(&ipv6_all_zeros, |
| &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, |
| nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, |
| MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | |
| MATCH_IRE_DEFAULT, ipst); |
| if (ire != NULL) { |
| ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); |
| ire_delete(ire); |
| ire_refrele(ire); |
| } |
| ndp_delete(nce); |
| } |
| } |
| } |
| |
| /* |
| * Walker state structure used by ndp_process() / ndp_process_entry(). |
| */ |
| typedef struct ndp_process_data { |
| ill_t *np_ill; /* ill/illgrp to match against */ |
| const in6_addr_t *np_addr; /* IPv6 address to match */ |
| uchar_t *np_hw_addr; /* passed to nce_process() */ |
| uint32_t np_flag; /* passed to nce_process() */ |
| boolean_t np_is_adv; /* passed to nce_process() */ |
| } ndp_process_data_t; |
| |
| /* |
| * Walker callback used by ndp_process() for IPMP groups: calls nce_process() |
| * for each NCE with a matching address that's in the same IPMP group. |
| */ |
| static void |
| ndp_process_entry(nce_t *nce, void *arg) |
| { |
| ndp_process_data_t *npp = arg; |
| |
| if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && |
| IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && |
| IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { |
| nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); |
| } |
| } |
| |
| /* |
| * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, |
| * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have |
| * more than one NCE for a given IPv6 address to tend to. In that case, we |
| * need to walk all NCEs and callback nce_process() for each one. Since this |
| * is expensive, in the non-IPMP case we just directly call nce_process(). |
| * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP |
| * interfaces in an IPMP group share the same NCEs -- at which point this |
| * function can be removed entirely. |
| */ |
| void |
| ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) |
| { |
| ill_t *ill = nce->nce_ill; |
| struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; |
| ndp_process_data_t np; |
| |
| if (ill->ill_grp == NULL) { |
| nce_process(nce, hw_addr, flag, is_adv); |
| return; |
| } |
| |
| /* IPMP case: walk all NCEs */ |
| np.np_ill = ill; |
| np.np_addr = &nce->nce_addr; |
| np.np_flag = flag; |
| np.np_is_adv = is_adv; |
| np.np_hw_addr = hw_addr; |
| |
| ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); |
| } |
| |
| /* |
| * Pass arg1 to the pfi supplied, along with each nce in existence. |
| * ndp_walk() places a REFHOLD on the nce and drops the lock when |
| * walking the hash list. |
| */ |
| void |
| ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, |
| boolean_t trace) |
| { |
| nce_t *nce; |
| nce_t *nce1; |
| nce_t **ncep; |
| nce_t *free_nce_list = NULL; |
| |
| mutex_enter(&ndp->ndp_g_lock); |
| /* Prevent ndp_delete from unlink and free of NCE */ |
| ndp->ndp_g_walker++; |
| mutex_exit(&ndp->ndp_g_lock); |
| for (ncep = ndp->nce_hash_tbl; |
| ncep < A_END(ndp->nce_hash_tbl); ncep++) { |
| for (nce = *ncep; nce != NULL; nce = nce1) { |
| nce1 = nce->nce_next; |
| if (ill == NULL || nce->nce_ill == ill) { |
| if (trace) { |
| NCE_REFHOLD(nce); |
| (*pfi)(nce, arg1); |
| NCE_REFRELE(nce); |
| } else { |
| NCE_REFHOLD_NOTR(nce); |
| (*pfi)(nce, arg1); |
| NCE_REFRELE_NOTR(nce); |
| } |
| } |
| } |
| } |
| for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { |
| nce1 = nce->nce_next; |
| if (ill == NULL || nce->nce_ill == ill) { |
| if (trace) { |
| NCE_REFHOLD(nce); |
| (*pfi)(nce, arg1); |
| NCE_REFRELE(nce); |
| } else { |
| NCE_REFHOLD_NOTR(nce); |
| (*pfi)(nce, arg1); |
| NCE_REFRELE_NOTR(nce); |
| } |
| } |
| } |
| mutex_enter(&ndp->ndp_g_lock); |
| ndp->ndp_g_walker--; |
| /* |
| * While NCE's are removed from global list they are placed |
| * in a private list, to be passed to nce_ire_delete_list(). |
| * The reason is, there may be ires pointing to this nce |
| * which needs to cleaned up. |
| */ |
| if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { |
| /* Time to delete condemned entries */ |
| for (ncep = ndp->nce_hash_tbl; |
| ncep < A_END(ndp->nce_hash_tbl); ncep++) { |
| nce = *ncep; |
| if (nce != NULL) { |
| nce_remove(ndp, nce, &free_nce_list); |
| } |
| } |
| nce = ndp->nce_mask_entries; |
| if (nce != NULL) { |
| nce_remove(ndp, nce, &free_nce_list); |
| } |
| ndp->ndp_g_walker_cleanup = B_FALSE; |
| } |
| |
| mutex_exit(&ndp->ndp_g_lock); |
| |
| if (free_nce_list != NULL) { |
| nce_ire_delete_list(free_nce_list); |
| } |
| } |
| |
| /* |
| * Walk everything. |
| * Note that ill can be NULL hence can't derive the ipst from it. |
| */ |
| void |
| ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) |
| { |
| ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); |
| ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); |
| } |
| |
| /* |
| * Process resolve requests. Handles both mapped entries |
| * as well as cases that needs to be send out on the wire. |
| * Lookup a NCE for a given IRE. Regardless of whether one exists |
| * or one is created, we defer making ire point to nce until the |
| * ire is actually added at which point the nce_refcnt on the nce is |
| * incremented. This is done primarily to have symmetry between ire_add() |
| * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. |
| */ |
| int |
| ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) |
| { |
| nce_t *nce, *hw_nce = NULL; |
| int err; |
| ill_t *ipmp_ill; |
| uint16_t nce_flags; |
| uint32_t ms; |
| mblk_t *mp_nce = NULL; |
| ip_stack_t *ipst = ill->ill_ipst; |
| uchar_t *hwaddr = NULL; |
| |
| ASSERT(ill->ill_isv6); |
| |
| if (IN6_IS_ADDR_MULTICAST(dst)) |
| return (nce_set_multicast(ill, dst)); |
| |
| nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; |
| |
| /* |
| * If `ill' is under IPMP, then first check to see if there's an NCE |
| * for `dst' on the IPMP meta-interface (e.g., because an application |
| * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). |
| * If so, we use that hardware address when creating the NCE below. |
| * Note that we don't yet have a mechanism to remove these NCEs if the |
| * NCE for `dst' on the IPMP meta-interface is subsequently removed -- |
| * but rather than build such a beast, we should fix NCEs so that they |
| * can be properly shared across an IPMP group. |
| */ |
| if (IS_UNDER_IPMP(ill)) { |
| if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { |
| hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); |
| if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { |
| hwaddr = hw_nce->nce_res_mp->b_rptr + |
| NCE_LL_ADDR_OFFSET(ipmp_ill); |
| nce_flags |= hw_nce->nce_flags; |
| } |
| ill_refrele(ipmp_ill); |
| } |
| } |
| |
| err = ndp_lookup_then_add_v6(ill, |
| B_FALSE, /* NCE fastpath is per ill; don't match across group */ |
| hwaddr, |
| dst, |
| &ipv6_all_ones, |
| &ipv6_all_zeros, |
| 0, |
| nce_flags, |
| hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, |
| &nce); |
| |
| if (hw_nce != NULL) |
| NCE_REFRELE(hw_nce); |
| |
| switch (err) { |
| case 0: |
| /* |
| * New cache entry was created. Make sure that the state |
| * is not ND_INCOMPLETE. It can be in some other state |
| * even before we send out the solicitation as we could |
| * get un-solicited advertisements. |
| * |
| * If this is an XRESOLV interface, simply return 0, |
| * since we don't want to solicit just yet. |
| */ |
| if (ill->ill_flags & ILLF_XRESOLV) { |
| NCE_REFRELE(nce); |
| return (0); |
| } |
| |
| mutex_enter(&nce->nce_lock); |
| if (nce->nce_state != ND_INCOMPLETE) { |
| mutex_exit(&nce->nce_lock); |
| NCE_REFRELE(nce); |
| return (0); |
| } |
| mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); |
| if (mp_nce == NULL) { |
| /* The caller will free mp */ |
| mutex_exit(&nce->nce_lock); |
| ndp_delete(nce); |
| NCE_REFRELE(nce); |
| return (ENOMEM); |
| } |
| if ((ms = nce_solicit(nce, mp_nce)) == 0) { |
| /* The caller will free mp */ |
| if (mp_nce != mp) |
| freeb(mp_nce); |
| mutex_exit(&nce->nce_lock); |
| ndp_delete(nce); |
| NCE_REFRELE(nce); |
| return (EBUSY); |
| } |
| mutex_exit(&nce->nce_lock); |
| NDP_RESTART_TIMER(nce, (clock_t)ms); |
| NCE_REFRELE(nce); |
| return (EINPROGRESS); |
| case EEXIST: |
| /* Resolution in progress just queue the packet */ |
| mutex_enter(&nce->nce_lock); |
| if (nce->nce_state == ND_INCOMPLETE) { |
| mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); |
| if (mp_nce == NULL) { |
| err = ENOMEM; |
| } else { |
| nce_queue_mp(nce, mp_nce); |
| err = EINPROGRESS; |
| } |
| } else { |
| /* |
| * Any other state implies we have |
| * a nce but IRE needs to be added ... |
| * ire_add_v6() will take care of the |
| * the case when the nce becomes CONDEMNED |
| * before the ire is added to the table. |
| */ |
| err = 0; |
| } |
| mutex_exit(&nce->nce_lock); |
| NCE_REFRELE(nce); |
| break; |
| default: |
| ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); |
| break; |
| } |
| return (err); |
| } |
| |
| /* |
| * When there is no resolver, the link layer template is passed in |
| * the IRE. |
| * Lookup a NCE for a given IRE. Regardless of whether one exists |
| * or one is created, we defer making ire point to nce until the |
| * ire is actually added at which point the nce_refcnt on the nce is |
| * incremented. This is done primarily to have symmetry between ire_add() |
| * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. |
| */ |
| int |
| ndp_noresolver(ill_t *ill, const in6_addr_t *dst) |
| { |
| nce_t *nce; |
| int err = 0; |
| |
| ASSERT(ill != NULL); |
| ASSERT(ill->ill_isv6); |
| if (IN6_IS_ADDR_MULTICAST(dst)) { |
| err = nce_set_multicast(ill, dst); |
| return (err); |
| } |
| |
| err = ndp_lookup_then_add_v6(ill, |
| B_FALSE, /* NCE fastpath is per ill; don't match across group */ |
| NULL, /* hardware address */ |
| dst, |
| &ipv6_all_ones, |
| &ipv6_all_zeros, |
| 0, |
| (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, |
| ND_REACHABLE, |
| &nce); |
| |
| switch (err) { |
| case 0: |
| /* |
| * Cache entry with a proper resolver cookie was |
| * created. |
| */ |
| NCE_REFRELE(nce); |
| break; |
| case EEXIST: |
| err = 0; |
| NCE_REFRELE(nce); |
| break; |
| default: |
| ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); |
| break; |
| } |
| return (err); |
| } |
| |
| /* |
| * For each interface an entry is added for the unspecified multicast group. |
| * Here that mapping is used to form the multicast cache entry for a particular |
| * multicast destination. |
| */ |
| static int |
| nce_set_multicast(ill_t *ill, const in6_addr_t *dst) |
| { |
| nce_t *mnce; /* Multicast mapping entry */ |
| nce_t *nce; |
| uchar_t *hw_addr = NULL; |
| int err = 0; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(ill != NULL); |
| ASSERT(ill->ill_isv6); |
| ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); |
| |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); |
| nce = nce_lookup_addr(ill, B_FALSE, dst, nce); |
| if (nce != NULL) { |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| NCE_REFRELE(nce); |
| return (0); |
| } |
| /* No entry, now lookup for a mapping this should never fail */ |
| mnce = nce_lookup_mapping(ill, dst); |
| if (mnce == NULL) { |
| /* Something broken for the interface. */ |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| return (ESRCH); |
| } |
| ASSERT(mnce->nce_flags & NCE_F_MAPPING); |
| if (ill->ill_net_type == IRE_IF_RESOLVER) { |
| /* |
| * For IRE_IF_RESOLVER a hardware mapping can be |
| * generated, for IRE_IF_NORESOLVER, resolution cookie |
| * in the ill is copied in ndp_add_v6(). |
| */ |
| hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); |
| if (hw_addr == NULL) { |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| NCE_REFRELE(mnce); |
| return (ENOMEM); |
| } |
| nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); |
| } |
| NCE_REFRELE(mnce); |
| /* |
| * IRE_IF_NORESOLVER type simply copies the resolution |
| * cookie passed in. So no hw_addr is needed. |
| */ |
| err = ndp_add_v6(ill, |
| hw_addr, |
| dst, |
| &ipv6_all_ones, |
| &ipv6_all_zeros, |
| 0, |
| NCE_F_NONUD, |
| ND_REACHABLE, |
| &nce); |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| if (hw_addr != NULL) |
| kmem_free(hw_addr, ill->ill_nd_lla_len); |
| if (err != 0) { |
| ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); |
| return (err); |
| } |
| NCE_REFRELE(nce); |
| return (0); |
| } |
| |
| /* |
| * Return the link layer address, and any flags of a nce. |
| */ |
| int |
| ndp_query(ill_t *ill, struct lif_nd_req *lnr) |
| { |
| nce_t *nce; |
| in6_addr_t *addr; |
| sin6_t *sin6; |
| dl_unitdata_req_t *dl; |
| |
| ASSERT(ill != NULL && ill->ill_isv6); |
| sin6 = (sin6_t *)&lnr->lnr_addr; |
| addr = &sin6->sin6_addr; |
| |
| /* |
| * NOTE: if the ill is an IPMP interface, then match against the whole |
| * illgrp. This e.g. allows in.ndpd to retrieve the link layer |
| * addresses for the data addresses on an IPMP interface even though |
| * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. |
| */ |
| nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); |
| if (nce == NULL) |
| return (ESRCH); |
| /* If in INCOMPLETE state, no link layer address is available yet */ |
| if (nce->nce_state == ND_INCOMPLETE) |
| goto done; |
| dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; |
| if (ill->ill_flags & ILLF_XRESOLV) |
| lnr->lnr_hdw_len = dl->dl_dest_addr_length; |
| else |
| lnr->lnr_hdw_len = ill->ill_nd_lla_len; |
| ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= |
| sizeof (lnr->lnr_hdw_addr)); |
| bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), |
| (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); |
| if (nce->nce_flags & NCE_F_ISROUTER) |
| lnr->lnr_flags = NDF_ISROUTER_ON; |
| if (nce->nce_flags & NCE_F_ANYCAST) |
| lnr->lnr_flags |= NDF_ANYCAST_ON; |
| done: |
| NCE_REFRELE(nce); |
| return (0); |
| } |
| |
| /* |
| * Send Enable/Disable multicast reqs to driver. |
| */ |
| int |
| ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, |
| uint32_t hw_addr_offset, mblk_t *mp) |
| { |
| nce_t *nce; |
| uchar_t *hw_addr; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ASSERT(ill != NULL && ill->ill_isv6); |
| ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); |
| hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); |
| if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { |
| freemsg(mp); |
| return (EINVAL); |
| } |
| mutex_enter(&ipst->ips_ndp6->ndp_g_lock); |
| nce = nce_lookup_mapping(ill, addr); |
| if (nce == NULL) { |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| freemsg(mp); |
| return (ESRCH); |
| } |
| mutex_exit(&ipst->ips_ndp6->ndp_g_lock); |
| /* |
| * Update dl_addr_length and dl_addr_offset for primitives that |
| * have physical addresses as opposed to full saps |
| */ |
| switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { |
| case DL_ENABMULTI_REQ: |
| /* Track the state if this is the first enabmulti */ |
| if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) |
| ill->ill_dlpi_multicast_state = IDS_INPROGRESS; |
| ip1dbg(("ndp_mcastreq: ENABMULTI\n")); |
| break; |
| case DL_DISABMULTI_REQ: |
| ip1dbg(("ndp_mcastreq: DISABMULTI\n")); |
| break; |
| default: |
| NCE_REFRELE(nce); |
| ip1dbg(("ndp_mcastreq: default\n")); |
| return (EINVAL); |
| } |
| nce_make_mapping(nce, hw_addr, (uchar_t *)addr); |
| NCE_REFRELE(nce); |
| ill_dlpi_send(ill, mp); |
| return (0); |
| } |
| |
| /* |
| * Send a neighbor solicitation. |
| * Returns number of milliseconds after which we should either rexmit or abort. |
| * Return of zero means we should abort. |
| * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. |
| * |
| * NOTE: This routine drops nce_lock (and later reacquires it) when sending |
| * the packet. |
| * NOTE: This routine does not consume mp. |
| */ |
| uint32_t |
| nce_solicit(nce_t *nce, mblk_t *mp) |
| { |
| ip6_t *ip6h; |
| in6_addr_t sender; |
| boolean_t dropped; |
| |
| ASSERT(MUTEX_HELD(&nce->nce_lock)); |
| |
| if (nce->nce_rcnt == 0) |
| return (0); |
| |
| if (mp == NULL) { |
| ASSERT(nce->nce_qd_mp != NULL); |
| mp = nce->nce_qd_mp; |
| } else { |
| nce_queue_mp(nce, mp); |
| } |
| |
| /* Handle ip_newroute_v6 giving us IPSEC packets */ |
| if (mp->b_datap->db_type == M_CTL) |
| mp = mp->b_cont; |
| |
| ip6h = (ip6_t *)mp->b_rptr; |
| if (ip6h->ip6_nxt == IPPROTO_RAW) { |
| /* |
| * This message should have been pulled up already in |
| * ip_wput_v6. We can't do pullups here because the message |
| * could be from the nce_qd_mp which could have b_next/b_prev |
| * non-NULL. |
| */ |
| ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); |
| ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); |
| } |
| |
| /* |
| * Need to copy the sender address into a local since `mp' can |
| * go away once we drop nce_lock. |
| */ |
| sender = ip6h->ip6_src; |
| nce->nce_rcnt--; |
| mutex_exit(&nce->nce_lock); |
| dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); |
| mutex_enter(&nce->nce_lock); |
| if (dropped) |
| nce->nce_rcnt++; |
| return (nce->nce_ill->ill_reachable_retrans_time); |
| } |
| |
| /* |
| * Attempt to recover an address on an interface that's been marked as a |
| * duplicate. Because NCEs are destroyed when the interface goes down, there's |
| * no easy way to just probe the address and have the right thing happen if |
| * it's no longer in use. Instead, we just bring it up normally and allow the |
| * regular interface start-up logic to probe for a remaining duplicate and take |
| * us back down if necessary. |
| * Neither DHCP nor temporary addresses arrive here; they're excluded by |
| * ip_ndp_excl. |
| */ |
| /* ARGSUSED */ |
| static void |
| ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) |
| { |
| ill_t *ill = rq->q_ptr; |
| ipif_t *ipif; |
| in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; |
| |
| for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { |
| /* |
| * We do not support recovery of proxy ARP'd interfaces, |
| * because the system lacks a complete proxy ARP mechanism. |
| */ |
| if ((ipif->ipif_flags & IPIF_POINTOPOINT) || |
| !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { |
| continue; |
| } |
| |
| /* |
| * If we have already recovered or if the interface is going |
| * away, then ignore. |
| */ |
| mutex_enter(&ill->ill_lock); |
| if (!(ipif->ipif_flags & IPIF_DUPLICATE) || |
| (ipif->ipif_state_flags & IPIF_CONDEMNED)) { |
| mutex_exit(&ill->ill_lock); |
| continue; |
| } |
| |
| ipif->ipif_flags &= ~IPIF_DUPLICATE; |
| ill->ill_ipif_dup_count--; |
| mutex_exit(&ill->ill_lock); |
| ipif->ipif_was_dup = B_TRUE; |
| |
| VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); |
| (void) ipif_up_done_v6(ipif); |
| } |
| freeb(mp); |
| } |
| |
| /* |
| * Attempt to recover an IPv6 interface that's been shut down as a duplicate. |
| * As long as someone else holds the address, the interface will stay down. |
| * When that conflict goes away, the interface is brought back up. This is |
| * done so that accidental shutdowns of addresses aren't made permanent. Your |
| * server will recover from a failure. |
| * |
| * For DHCP and temporary addresses, recovery is not done in the kernel. |
| * Instead, it's handled by user space processes (dhcpagent and in.ndpd). |
| * |
| * This function is entered on a timer expiry; the ID is in ipif_recovery_id. |
| */ |
| static void |
| ipif6_dup_recovery(void *arg) |
| { |
| ipif_t *ipif = arg; |
| |
| ipif->ipif_recovery_id = 0; |
| if (!(ipif->ipif_flags & IPIF_DUPLICATE)) |
| return; |
| |
| /* |
| * No lock, because this is just an optimization. |
| */ |
| if (ipif->ipif_state_flags & IPIF_CONDEMNED) |
| return; |
| |
| /* If the link is down, we'll retry this later */ |
| if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) |
| return; |
| |
| ndp_do_recovery(ipif); |
| } |
| |
| /* |
| * Perform interface recovery by forcing the duplicate interfaces up and |
| * allowing the system to determine which ones should stay up. |
| * |
| * Called both by recovery timer expiry and link-up notification. |
| */ |
| void |
| ndp_do_recovery(ipif_t *ipif) |
| { |
| ill_t *ill = ipif->ipif_ill; |
| mblk_t *mp; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); |
| if (mp == NULL) { |
| mutex_enter(&ill->ill_lock); |
| if (ipif->ipif_recovery_id == 0 && |
| !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { |
| ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, |
| ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); |
| } |
| mutex_exit(&ill->ill_lock); |
| } else { |
| /* |
| * A recovery timer may still be running if we got here from |
| * ill_restart_dad(); cancel that timer. |
| */ |
| if (ipif->ipif_recovery_id != 0) |
| (void) untimeout(ipif->ipif_recovery_id); |
| ipif->ipif_recovery_id = 0; |
| |
| bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, |
| sizeof (ipif->ipif_v6lcl_addr)); |
| ill_refhold(ill); |
| qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, |
| B_FALSE); |
| } |
| } |
| |
| /* |
| * Find the MAC and IP addresses in an NA/NS message. |
| */ |
| static void |
| ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, |
| uchar_t **haddr, uint_t *haddrlenp) |
| { |
| ip6_t *ip6h = (ip6_t *)mp->b_rptr; |
| icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; |
| nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; |
| uchar_t *addr; |
| int alen = 0; |
| |
| if (dl_mp == NULL) { |
| nd_opt_hdr_t *opt; |
| int len; |
| |
| /* |
| * If it's from the fast-path, then it can't be a probe |
| * message, and thus must include a linkaddr option. |
| * Extract that here. |
| */ |
| switch (icmp6->icmp6_type) { |
| case ND_NEIGHBOR_SOLICIT: |
| len = mp->b_wptr - (uchar_t *)ns; |
| if ((len -= sizeof (*ns)) > 0) { |
| opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), |
| len, ND_OPT_SOURCE_LINKADDR); |
| } |
| break; |
| case ND_NEIGHBOR_ADVERT: |
| len = mp->b_wptr - (uchar_t *)na; |
| if ((len -= sizeof (*na)) > 0) { |
| opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), |
| len, ND_OPT_TARGET_LINKADDR); |
| } |
| break; |
| } |
| |
| if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= |
| ill->ill_nd_lla_len) { |
| addr = (uchar_t *)(opt + 1); |
| alen = ill->ill_nd_lla_len; |
| } |
| |
| /* |
| * We cheat a bit here for the sake of printing usable log |
| * messages in the rare case where the reply we got was unicast |
| * without a source linkaddr option, and the interface is in |
| * fastpath mode. (Sigh.) |
| */ |
| if (alen == 0 && ill->ill_type == IFT_ETHER && |
| MBLKHEAD(mp) >= sizeof (struct ether_header)) { |
| struct ether_header *pether; |
| |
| pether = (struct ether_header *)((char *)ip6h - |
| sizeof (*pether)); |
| addr = pether->ether_shost.ether_addr_octet; |
| alen = ETHERADDRL; |
| } |
| } else { |
| dl_unitdata_ind_t *dlu; |
| |
| dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; |
| alen = dlu->dl_src_addr_length; |
| if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && |
| dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { |
| addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; |
| if (ill->ill_sap_length < 0) { |
| alen += ill->ill_sap_length; |
| } else { |
| addr += ill->ill_sap_length; |
| alen -= ill->ill_sap_length; |
| } |
| } |
| } |
| |
| if (alen > 0) { |
| *haddr = addr; |
| *haddrlenp = alen; |
| } else { |
| *haddr = NULL; |
| *haddrlenp = 0; |
| } |
| |
| /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ |
| *targp = ns->nd_ns_target; |
| } |
| |
| /* |
| * This is for exclusive changes due to NDP duplicate address detection |
| * failure. |
| */ |
| /* ARGSUSED */ |
| static void |
| ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) |
| { |
| ill_t *ill = rq->q_ptr; |
| ipif_t *ipif; |
| mblk_t *dl_mp = NULL; |
| uchar_t *haddr; |
| uint_t haddrlen; |
| ip_stack_t *ipst = ill->ill_ipst; |
| in6_addr_t targ; |
| |
| if (DB_TYPE(mp) != M_DATA) { |
| dl_mp = mp; |
| mp = mp->b_cont; |
| } |
| |
| ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); |
| if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { |
| /* |
| * Ignore conflicts generated by misbehaving switches that |
| * just reflect our own messages back to us. For IPMP, we may |
| * see reflections across any ill in the illgrp. |
| */ |
| if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || |
| IS_UNDER_IPMP(ill) && |
| ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) |
| goto ignore_conflict; |
| } |
| |
| /* |
| * Look up the appropriate ipif. |
| */ |
| ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, |
| NULL, ipst); |
| if (ipif == NULL) |
| goto ignore_conflict; |
| |
| /* Reload the ill to match the ipif */ |
| ill = ipif->ipif_ill; |
| |
| /* If it's already duplicate or ineligible, then don't do anything. */ |
| if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { |
| ipif_refrele(ipif); |
| goto ignore_conflict; |
| } |
| |
| /* |
| * If this is a failure during duplicate recovery, then don't |
| * complain. It may take a long time to recover. |
| */ |
| if (!ipif->ipif_was_dup) { |
| char ibuf[LIFNAMSIZ]; |
| char hbuf[MAC_STR_LEN]; |
| char sbuf[INET6_ADDRSTRLEN]; |
| |
| ipif_get_name(ipif, ibuf, sizeof (ibuf)); |
| cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" |
| " disabled", ibuf, |
| inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), |
| mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); |
| } |
| mutex_enter(&ill->ill_lock); |
| ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); |
| ipif->ipif_flags |= IPIF_DUPLICATE; |
| ill->ill_ipif_dup_count++; |
| mutex_exit(&ill->ill_lock); |
| (void) ipif_down(ipif, NULL, NULL); |
| ipif_down_tail(ipif); |
| mutex_enter(&ill->ill_lock); |
| if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && |
| ill->ill_net_type == IRE_IF_RESOLVER && |
| !(ipif->ipif_state_flags & IPIF_CONDEMNED) && |
| ipst->ips_ip_dup_recovery > 0) { |
| ASSERT(ipif->ipif_recovery_id == 0); |
| ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, |
| ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); |
| } |
| mutex_exit(&ill->ill_lock); |
| ipif_refrele(ipif); |
| ignore_conflict: |
| if (dl_mp != NULL) |
| freeb(dl_mp); |
| freemsg(mp); |
| } |
| |
| /* |
| * Handle failure by tearing down the ipifs with the specified address. Note |
| * that tearing down the ipif also means deleting the nce through ipif_down, so |
| * it's not possible to do recovery by just restarting the nce timer. Instead, |
| * we start a timer on the ipif. |
| */ |
| static void |
| ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) |
| { |
| if ((mp = copymsg(mp)) != NULL) { |
| if (dl_mp == NULL) |
| dl_mp = mp; |
| else if ((dl_mp = copyb(dl_mp)) != NULL) |
| dl_mp->b_cont = mp; |
| if (dl_mp == NULL) { |
| freemsg(mp); |
| } else { |
| ill_refhold(ill); |
| qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, |
| B_FALSE); |
| } |
| } |
| } |
| |
| /* |
| * Handle a discovered conflict: some other system is advertising that it owns |
| * one of our IP addresses. We need to defend ourselves, or just shut down the |
| * interface. |
| */ |
| static void |
| ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) |
| { |
| ipif_t *ipif; |
| uint32_t now; |
| uint_t maxdefense; |
| uint_t defs; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, |
| NULL, NULL, ipst); |
| if (ipif == NULL) |
| return; |
| |
| /* |
| * First, figure out if this address is disposable. |
| */ |
| if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) |
| maxdefense = ipst->ips_ip_max_temp_defend; |
| else |
| maxdefense = ipst->ips_ip_max_defend; |
| |
| /* |
| * Now figure out how many times we've defended ourselves. Ignore |
| * defenses that happened long in the past. |
| */ |
| now = gethrestime_sec(); |
| mutex_enter(&nce->nce_lock); |
| if ((defs = nce->nce_defense_count) > 0 && |
| now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { |
| nce->nce_defense_count = defs = 0; |
| } |
| nce->nce_defense_count++; |
| nce->nce_defense_time = now; |
| mutex_exit(&nce->nce_lock); |
| ipif_refrele(ipif); |
| |
| /* |
| * If we've defended ourselves too many times already, then give up and |
| * tear down the interface(s) using this address. Otherwise, defend by |
| * sending out an unsolicited Neighbor Advertisement. |
| */ |
| if (defs >= maxdefense) { |
| ip_ndp_failure(ill, mp, dl_mp); |
| } else { |
| char hbuf[MAC_STR_LEN]; |
| char sbuf[INET6_ADDRSTRLEN]; |
| uchar_t *haddr; |
| uint_t haddrlen; |
| in6_addr_t targ; |
| |
| ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); |
| cmn_err(CE_WARN, "node %s is using our IP address %s on %s", |
| mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), |
| inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), |
| ill->ill_name); |
| |
| (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); |
| } |
| } |
| |
| static void |
| ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) |
| { |
| nd_neighbor_solicit_t *ns; |
| uint32_t hlen = ill->ill_nd_lla_len; |
| uchar_t *haddr = NULL; |
| icmp6_t *icmp_nd; |
| ip6_t *ip6h; |
| nce_t *our_nce = NULL; |
| in6_addr_t target; |
| in6_addr_t src; |
| int len; |
| int flag = 0; |
| nd_opt_hdr_t *opt = NULL; |
| boolean_t bad_solicit = B_FALSE; |
| mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
| |
| ip6h = (ip6_t *)mp->b_rptr; |
| icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
| src = ip6h->ip6_src; |
| ns = (nd_neighbor_solicit_t *)icmp_nd; |
| target = ns->nd_ns_target; |
| if (IN6_IS_ADDR_MULTICAST(&target)) { |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("ndp_input_solicit: Target is" |
| " multicast! %s\n", AF_INET6, &target); |
| } |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| if (len > sizeof (nd_neighbor_solicit_t)) { |
| /* Options present */ |
| opt = (nd_opt_hdr_t *)&ns[1]; |
| len -= sizeof (nd_neighbor_solicit_t); |
| if (!ndp_verify_optlen(opt, len)) { |
| ip1dbg(("ndp_input_solicit: Bad opt len\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| |
| } |
| if (IN6_IS_ADDR_UNSPECIFIED(&src)) { |
| /* Check to see if this is a valid DAD solicitation */ |
| if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("ndp_input_solicit: IPv6 " |
| "Destination is not solicited node " |
| "multicast %s\n", AF_INET6, |
| &ip6h->ip6_dst); |
| } |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| } |
| |
| /* |
| * NOTE: with IPMP, it's possible the nominated multicast ill (which |
| * received this packet if it's multicast) is not the ill tied to |
| * e.g. the IPMP ill's data link-local. So we match across the illgrp |
| * to ensure we find the associated NCE. |
| */ |
| our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); |
| /* |
| * If this is a valid Solicitation, a permanent |
| * entry should exist in the cache |
| */ |
| if (our_nce == NULL || |
| !(our_nce->nce_flags & NCE_F_PERMANENT)) { |
| ip1dbg(("ndp_input_solicit: Wrong target in NS?!" |
| "ifname=%s ", ill->ill_name)); |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg(" dst %s\n", AF_INET6, &target); |
| } |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| |
| /* At this point we should have a verified NS per spec */ |
| if (opt != NULL) { |
| opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); |
| if (opt != NULL) { |
| haddr = (uchar_t *)&opt[1]; |
| if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || |
| hlen == 0) { |
| ip1dbg(("ndp_input_solicit: bad SLLA\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| } |
| } |
| |
| /* If sending directly to peer, set the unicast flag */ |
| if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) |
| flag |= NDP_UNICAST; |
| |
| /* |
| * Create/update the entry for the soliciting node. |
| * or respond to outstanding queries, don't if |
| * the source is unspecified address. |
| */ |
| if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { |
| int err; |
| nce_t *nnce; |
| |
| ASSERT(ill->ill_isv6); |
| /* |
| * Regular solicitations *must* include the Source Link-Layer |
| * Address option. Ignore messages that do not. |
| */ |
| if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { |
| ip1dbg(("ndp_input_solicit: source link-layer address " |
| "option missing with a specified source.\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| |
| /* |
| * This is a regular solicitation. If we're still in the |
| * process of verifying the address, then don't respond at all |
| * and don't keep track of the sender. |
| */ |
| if (our_nce->nce_state == ND_PROBE) |
| goto done; |
| |
| /* |
| * If the solicitation doesn't have sender hardware address |
| * (legal for unicast solicitation), then process without |
| * installing the return NCE. Either we already know it, or |
| * we'll be forced to look it up when (and if) we reply to the |
| * packet. |
| */ |
| if (haddr == NULL) |
| goto no_source; |
| |
| err = ndp_lookup_then_add_v6(ill, |
| B_FALSE, |
| haddr, |
| &src, /* Soliciting nodes address */ |
| &ipv6_all_ones, |
| &ipv6_all_zeros, |
| 0, |
| 0, |
| ND_STALE, |
| &nnce); |
| switch (err) { |
| case 0: |
| /* done with this entry */ |
| NCE_REFRELE(nnce); |
| break; |
| case EEXIST: |
| /* |
| * B_FALSE indicates this is not an an advertisement. |
| */ |
| ndp_process(nnce, haddr, 0, B_FALSE); |
| NCE_REFRELE(nnce); |
| break; |
| default: |
| ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", |
| err)); |
| goto done; |
| } |
| no_source: |
| flag |= NDP_SOLICITED; |
| } else { |
| /* |
| * No source link layer address option should be present in a |
| * valid DAD request. |
| */ |
| if (haddr != NULL) { |
| ip1dbg(("ndp_input_solicit: source link-layer address " |
| "option present with an unspecified source.\n")); |
| bad_solicit = B_TRUE; |
| goto done; |
| } |
| if (our_nce->nce_state == ND_PROBE) { |
| /* |
| * Internally looped-back probes won't have DLPI |
| * attached to them. External ones (which are sent by |
| * multicast) always will. Just ignore our own |
| * transmissions. |
| */ |
| if (dl_mp != NULL) { |
| /* |
| * If someone else is probing our address, then |
| * we've crossed wires. Declare failure. |
| */ |
| ip_ndp_failure(ill, mp, dl_mp); |
| } |
| goto done; |
| } |
| /* |
| * This is a DAD probe. Multicast the advertisement to the |
| * all-nodes address. |
| */ |
| src = ipv6_all_hosts_mcast; |
| } |
| /* Response to a solicitation */ |
| (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); |
| done: |
| if (bad_solicit) |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); |
| if (our_nce != NULL) |
| NCE_REFRELE(our_nce); |
| } |
| |
| void |
| ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) |
| { |
| nd_neighbor_advert_t *na; |
| uint32_t hlen = ill->ill_nd_lla_len; |
| uchar_t *haddr = NULL; |
| icmp6_t *icmp_nd; |
| ip6_t *ip6h; |
| nce_t *dst_nce = NULL; |
| in6_addr_t target; |
| nd_opt_hdr_t *opt = NULL; |
| int len; |
| ip_stack_t *ipst = ill->ill_ipst; |
| mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
| |
| ip6h = (ip6_t *)mp->b_rptr; |
| icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
| na = (nd_neighbor_advert_t *)icmp_nd; |
| if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && |
| (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { |
| ip1dbg(("ndp_input_advert: Target is multicast but the " |
| "solicited flag is not zero\n")); |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| target = na->nd_na_target; |
| if (IN6_IS_ADDR_MULTICAST(&target)) { |
| ip1dbg(("ndp_input_advert: Target is multicast!\n")); |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| if (len > sizeof (nd_neighbor_advert_t)) { |
| opt = (nd_opt_hdr_t *)&na[1]; |
| if (!ndp_verify_optlen(opt, |
| len - sizeof (nd_neighbor_advert_t))) { |
| ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); |
| BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| /* At this point we have a verified NA per spec */ |
| len -= sizeof (nd_neighbor_advert_t); |
| opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); |
| if (opt != NULL) { |
| haddr = (uchar_t *)&opt[1]; |
| if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || |
| hlen == 0) { |
| ip1dbg(("ndp_input_advert: bad SLLA\n")); |
| BUMP_MIB(mib, |
| ipv6IfIcmpInBadNeighborAdvertisements); |
| return; |
| } |
| } |
| } |
| |
| /* |
| * NOTE: we match across the illgrp since we need to do DAD for all of |
| * our local addresses, and those are spread across all the active |
| * ills in the group. |
| */ |
| if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) |
| return; |
| |
| if (dst_nce->nce_flags & NCE_F_PERMANENT) { |
| /* |
| * Someone just advertised one of our local addresses. First, |
| * check it it was us -- if so, we can safely ignore it. |
| */ |
| if (haddr != NULL) { |
| if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) |
| goto out; /* from us -- no conflict */ |
| |
| /* |
| * If we're in an IPMP group, check if this is an echo |
| * from another ill in the group. Use the double- |
| * checked locking pattern to avoid grabbing |
| * ill_g_lock in the non-IPMP case. |
| */ |
| if (IS_UNDER_IPMP(ill)) { |
| rw_enter(&ipst->ips_ill_g_lock, RW_READER); |
| if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( |
| ill->ill_grp, haddr, hlen) != NULL) { |
| rw_exit(&ipst->ips_ill_g_lock); |
| goto out; |
| } |
| rw_exit(&ipst->ips_ill_g_lock); |
| } |
| } |
| |
| /* |
| * This appears to be a real conflict. If we're trying to |
| * configure this NCE (ND_PROBE), then shut it down. |
| * Otherwise, handle the discovered conflict. |
| * |
| * Note that dl_mp might be NULL if we're getting a unicast |
| * reply. This isn't typically done (multicast is the norm in |
| * response to a probe), but we can handle the dl_mp == NULL |
| * case as well. |
| */ |
| if (dst_nce->nce_state == ND_PROBE) |
| ip_ndp_failure(ill, mp, dl_mp); |
| else |
| ip_ndp_conflict(ill, mp, dl_mp, dst_nce); |
| } else { |
| if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) |
| dst_nce->nce_flags |= NCE_F_ISROUTER; |
| |
| /* B_TRUE indicates this an advertisement */ |
| ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); |
| } |
| out: |
| NCE_REFRELE(dst_nce); |
| } |
| |
| /* |
| * Process NDP neighbor solicitation/advertisement messages. |
| * The checksum has already checked o.k before reaching here. |
| */ |
| void |
| ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) |
| { |
| icmp6_t *icmp_nd; |
| ip6_t *ip6h; |
| int len; |
| mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
| |
| |
| if (!pullupmsg(mp, -1)) { |
| ip1dbg(("ndp_input: pullupmsg failed\n")); |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| goto done; |
| } |
| ip6h = (ip6_t *)mp->b_rptr; |
| if (ip6h->ip6_hops != IPV6_MAX_HOPS) { |
| ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); |
| BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); |
| goto done; |
| } |
| /* |
| * NDP does not accept any extension headers between the |
| * IP header and the ICMP header since e.g. a routing |
| * header could be dangerous. |
| * This assumes that any AH or ESP headers are removed |
| * by ip prior to passing the packet to ndp_input. |
| */ |
| if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { |
| ip1dbg(("ndp_input: Wrong next header 0x%x\n", |
| ip6h->ip6_nxt)); |
| BUMP_MIB(mib, ipv6IfIcmpInErrors); |
| goto done; |
| } |
| icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
| ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || |
| icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); |
| if (icmp_nd->icmp6_code != 0) { |
| ip1dbg(("ndp_input: icmp6 code != 0 \n")); |
| BUMP_MIB(mib, ipv6IfIcmpInErrors); |
| goto done; |
| } |
| len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
| /* |
| * Make sure packet length is large enough for either |
| * a NS or a NA icmp packet. |
| */ |
| if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { |
| ip1dbg(("ndp_input: packet too short\n")); |
| BUMP_MIB(mib, ipv6IfIcmpInErrors); |
| goto done; |
| } |
| if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { |
| ndp_input_solicit(ill, mp, dl_mp); |
| } else { |
| ndp_input_advert(ill, mp, dl_mp); |
| } |
| done: |
| freemsg(mp); |
| } |
| |
| /* |
| * Utility routine to send an advertisement. Assumes that the NCE cannot |
| * go away (e.g., because it's refheld). |
| */ |
| static boolean_t |
| nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, |
| uint_t flags) |
| { |
| ASSERT((flags & NDP_PROBE) == 0); |
| |
| if (nce->nce_flags & NCE_F_ISROUTER) |
| flags |= NDP_ISROUTER; |
| if (!(nce->nce_flags & NCE_F_ANYCAST)) |
| flags |= NDP_ORIDE; |
| |
| return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, |
| &nce->nce_addr, target, flags)); |
| } |
| |
| /* |
| * Utility routine to send a solicitation. Assumes that the NCE cannot |
| * go away (e.g., because it's refheld). |
| */ |
| static boolean_t |
| nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, |
| uint_t flags) |
| { |
| if (flags & NDP_PROBE) |
| sender = &ipv6_all_zeros; |
| |
| return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, |
| sender, &nce->nce_addr, flags)); |
| } |
| |
| /* |
| * nce_xmit is called to form and transmit a ND solicitation or |
| * advertisement ICMP packet. |
| * |
| * If the source address is unspecified and this isn't a probe (used for |
| * duplicate address detection), an appropriate source address and link layer |
| * address will be chosen here. The link layer address option is included if |
| * the source is specified (i.e., all non-probe packets), and omitted (per the |
| * specification) otherwise. |
| * |
| * It returns B_FALSE only if it does a successful put() to the |
| * corresponding ill's ill_wq otherwise returns B_TRUE. |
| */ |
| static boolean_t |
| nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, |
| const in6_addr_t *sender, const in6_addr_t *target, int flag) |
| { |
| ill_t *hwaddr_ill; |
| uint32_t len; |
| icmp6_t *icmp6; |
| mblk_t *mp; |
| ip6_t *ip6h; |
| nd_opt_hdr_t *opt; |
| uint_t plen, maxplen; |
| ip6i_t *ip6i; |
| ipif_t *src_ipif = NULL; |
| uint8_t *hw_addr; |
| zoneid_t zoneid = GLOBAL_ZONEID; |
| char buf[INET6_ADDRSTRLEN]; |
| |
| ASSERT(!IS_IPMP(ill)); |
| |
| /* |
| * Check that the sender is actually a usable address on `ill', and if |
| * so, track that as the src_ipif. If not, for solicitations, set the |
| * sender to :: so that a new one will be picked below; for adverts, |
| * drop the packet since we expect nce_xmit_advert() to always provide |
| * a valid sender. |
| */ |
| if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { |
| if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || |
| !src_ipif->ipif_addr_ready) { |
| if (src_ipif != NULL) { |
| ipif_refrele(src_ipif); |
| src_ipif = NULL; |
| } |
| if (type == ND_NEIGHBOR_ADVERT) { |
| ip1dbg(("nce_xmit: No source ipif for src %s\n", |
| inet_ntop(AF_INET6, sender, buf, |
| sizeof (buf)))); |
| return (B_TRUE); |
| } |
| sender = &ipv6_all_zeros; |
| } |
| } |
| |
| /* |
| * If we still have an unspecified source (sender) address and this |
| * isn't a probe, select a source address from `ill'. |
| */ |
| if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { |
| ASSERT(type != ND_NEIGHBOR_ADVERT); |
| /* |
| * Pick a source address for this solicitation, but restrict |
| * the selection to addresses assigned to the output |
| * interface. We do this because the destination will create |
| * a neighbor cache entry for the source address of this |
| * packet, so the source address needs to be a valid neighbor. |
| */ |
| src_ipif = ipif_select_source_v6(ill, target, B_TRUE, |
| IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); |
| if (src_ipif == NULL) { |
| ip1dbg(("nce_xmit: No source ipif for dst %s\n", |
| inet_ntop(AF_INET6, target, buf, sizeof (buf)))); |
| return (B_TRUE); |
| } |
| sender = &src_ipif->ipif_v6src_addr; |
| } |
| |
| /* |
| * We're either sending a probe or we have a source address. |
| */ |
| ASSERT((flag & NDP_PROBE) || src_ipif != NULL); |
| |
| maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); |
| len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + |
| maxplen; |
| mp = allocb(len, BPRI_LO); |
| if (mp == NULL) { |
| if (src_ipif != NULL) |
| ipif_refrele(src_ipif); |
| return (B_TRUE); |
| } |
| bzero((char *)mp->b_rptr, len); |
| mp->b_wptr = mp->b_rptr + len; |
| |
| ip6i = (ip6i_t *)mp->b_rptr; |
| ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; |
| ip6i->ip6i_nxt = IPPROTO_RAW; |
| ip6i->ip6i_flags = IP6I_HOPLIMIT; |
| if (flag & NDP_PROBE) |
| ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; |
| |
| ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); |
| ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; |
| ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); |
| ip6h->ip6_nxt = IPPROTO_ICMPV6; |
| ip6h->ip6_hops = IPV6_MAX_HOPS; |
| ip6h->ip6_src = *sender; |
| ip6h->ip6_dst = *target; |
| icmp6 = (icmp6_t *)&ip6h[1]; |
| |
| opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + |
| sizeof (nd_neighbor_advert_t)); |
| |
| if (type == ND_NEIGHBOR_SOLICIT) { |
| nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; |
| |
| if (!(flag & NDP_PROBE)) |
| opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; |
| ns->nd_ns_target = *target; |
| if (!(flag & NDP_UNICAST)) { |
| /* Form multicast address of the target */ |
| ip6h->ip6_dst = ipv6_solicited_node_mcast; |
| ip6h->ip6_dst.s6_addr32[3] |= |
| ns->nd_ns_target.s6_addr32[3]; |
| } |
| } else { |
| nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; |
| |
| ASSERT(!(flag & NDP_PROBE)); |
| opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; |
| na->nd_na_target = *sender; |
| if (flag & NDP_ISROUTER) |
| na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; |
| if (flag & NDP_SOLICITED) |
| na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; |
| if (flag & NDP_ORIDE) |
| na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; |
| } |
| |
| hw_addr = NULL; |
| if (!(flag & NDP_PROBE)) { |
| /* |
| * Use our source address to find the hardware address to put |
| * in the packet, so that the hardware address and IP address |
| * will match up -- even if that hardware address doesn't |
| * match the ill we actually transmit the packet through. |
| */ |
| if (IS_IPMP(src_ipif->ipif_ill)) { |
| hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); |
| if (hwaddr_ill == NULL) { |
| ip1dbg(("nce_xmit: no bound ill!\n")); |
| ipif_refrele(src_ipif); |
| freemsg(mp); |
| return (B_TRUE); |
| } |
| } else { |
| hwaddr_ill = src_ipif->ipif_ill; |
| ill_refhold(hwaddr_ill); /* for symmetry */ |
| } |
| |
| plen = roundup(sizeof (nd_opt_hdr_t) + |
| hwaddr_ill->ill_nd_lla_len, 8); |
| |
| hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : |
| hwaddr_ill->ill_phys_addr; |
| if (hw_addr != NULL) { |
| /* Fill in link layer address and option len */ |
| opt->nd_opt_len = (uint8_t)(plen / 8); |
| bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); |
| } |
| |
| ill_refrele(hwaddr_ill); |
| } |
| |
| if (hw_addr == NULL) |
| plen = 0; |
| |
| /* Fix up the length of the packet now that plen is known */ |
| len -= (maxplen - plen); |
| mp->b_wptr = mp->b_rptr + len; |
| ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); |
| |
| icmp6->icmp6_type = type; |
| icmp6->icmp6_code = 0; |
| /* |
| * Prepare for checksum by putting icmp length in the icmp |
| * checksum field. The checksum is calculated in ip_wput_v6. |
| */ |
| icmp6->icmp6_cksum = ip6h->ip6_plen; |
| |
| /* |
| * Before we toss the src_ipif, look up the zoneid to pass to |
| * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT |
| * packets to be routed correctly by IP (we cannot guarantee that the |
| * global zone has an interface route to the destination). |
| */ |
| if (src_ipif != NULL) { |
| if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) |
| zoneid = GLOBAL_ZONEID; |
| ipif_refrele(src_ipif); |
| } |
| |
| ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); |
| return (B_FALSE); |
| } |
| |
| /* |
| * Make a link layer address (does not include the SAP) from an nce. |
| * To form the link layer address, use the last four bytes of ipv6 |
| * address passed in and the fixed offset stored in nce. |
| */ |
| static void |
| nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) |
| { |
| uchar_t *mask, *to; |
| ill_t *ill = nce->nce_ill; |
| int len; |
| |
| if (ill->ill_net_type == IRE_IF_NORESOLVER) |
| return; |
| ASSERT(nce->nce_res_mp != NULL); |
| ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); |
| ASSERT(nce->nce_flags & NCE_F_MAPPING); |
| ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); |
| ASSERT(addr != NULL); |
| bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), |
| addrpos, ill->ill_nd_lla_len); |
| len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, |
| IPV6_ADDR_LEN); |
| mask = (uchar_t *)&nce->nce_extract_mask; |
| mask += (IPV6_ADDR_LEN - len); |
| addr += (IPV6_ADDR_LEN - len); |
| to = addrpos + nce->nce_ll_extract_start; |
| while (len-- > 0) |
| *to++ |= *mask++ & *addr++; |
| } |
| |
| mblk_t * |
| nce_udreq_alloc(ill_t *ill) |
| { |
| mblk_t *template_mp = NULL; |
| dl_unitdata_req_t *dlur; |
| int sap_length; |
| |
| ASSERT(ill->ill_isv6); |
| |
| sap_length = ill->ill_sap_length; |
| template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + |
| ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); |
| if (template_mp == NULL) |
| return (NULL); |
| |
| dlur = (dl_unitdata_req_t *)template_mp->b_rptr; |
| dlur->dl_priority.dl_min = 0; |
| dlur->dl_priority.dl_max = 0; |
| dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; |
| dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); |
| |
| /* Copy in the SAP value. */ |
| NCE_LL_SAP_COPY(ill, template_mp); |
| |
| return (template_mp); |
| } |
| |
| /* |
| * NDP retransmit timer. |
| * This timer goes off when: |
| * a. It is time to retransmit NS for resolver. |
| * b. It is time to send reachability probes. |
| */ |
| void |
| ndp_timer(void *arg) |
| { |
| nce_t *nce = arg; |
| ill_t *ill = nce->nce_ill; |
| uint32_t ms; |
| char addrbuf[INET6_ADDRSTRLEN]; |
| boolean_t dropped = B_FALSE; |
| ip_stack_t *ipst = ill->ill_ipst; |
| |
| /* |
| * The timer has to be cancelled by ndp_delete before doing the final |
| * refrele. So the NCE is guaranteed to exist when the timer runs |
| * until it clears the timeout_id. Before clearing the timeout_id |
| * bump up the refcnt so that we can continue to use the nce |
| */ |
| ASSERT(nce != NULL); |
| |
| mutex_enter(&nce->nce_lock); |
| NCE_REFHOLD_LOCKED(nce); |
| nce->nce_timeout_id = 0; |
| |
| /* |
| * Check the reachability state first. |
| */ |
| switch (nce->nce_state) { |
| case ND_DELAY: |
| nce->nce_state = ND_PROBE; |
| mutex_exit(&nce->nce_lock); |
| (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, |
| NDP_UNICAST); |
| if (ip_debug > 3) { |
| /* ip2dbg */ |
| pr_addr_dbg("ndp_timer: state for %s changed " |
| "to PROBE\n", AF_INET6, &nce->nce_addr); |
| } |
| NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); |
| NCE_REFRELE(nce); |
| return; |
| case ND_PROBE: |
| /* must be retransmit timer */ |
| nce->nce_pcnt--; |
| ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && |
| nce->nce_pcnt >= -1); |
| if (nce->nce_pcnt > 0) { |
| /* |
| * As per RFC2461, the nce gets deleted after |
| * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. |
| * Note that the first unicast solicitation is sent |
| * during the DELAY state. |
| */ |
| ip2dbg(("ndp_timer: pcount=%x dst %s\n", |
| nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, |
| addrbuf, sizeof (addrbuf)))); |
| mutex_exit(&nce->nce_lock); |
| dropped = nce_xmit_solicit(nce, B_FALSE, |
| &ipv6_all_zeros, |
| (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : |
| NDP_UNICAST); |
| if (dropped) { |
| mutex_enter(&nce->nce_lock); |
| nce->nce_pcnt++; |
| mutex_exit(&nce->nce_lock); |
| } |
| NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); |
| } else if (nce->nce_pcnt < 0) { |
| /* No hope, delete the nce */ |
| nce->nce_state = ND_UNREACHABLE; |
| mutex_exit(&nce->nce_lock); |
| if (ip_debug > 2) { |
| /* ip1dbg */ |
| pr_addr_dbg("ndp_timer: Delete IRE for" |
| " dst %s\n", AF_INET6, &nce->nce_addr); |
| } |
| ndp_delete(nce); |
| } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { |
| /* Wait RetransTimer, before deleting the entry */ |
| ip2dbg(("ndp_timer: pcount=%x dst %s\n", |
| nce->nce_pcnt, inet_ntop(AF_INET6, |
| &nce->nce_addr, addrbuf, sizeof (addrbuf)))); |
| mutex_exit(&nce->nce_lock); |
| /* Wait one interval before killing */ |
| NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); |
| } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { |
| ipif_t *ipif; |
| |
| /* |
| * We're done probing, and we can now declare this |
| * address to be usable. Let IP know that it's ok to |
| * use. |
| */ |
| nce->nce_state = ND_REACHABLE; |
| mutex_exit(&nce->nce_lock); |
| ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, |
| nce->nce_ill); |
| if (ipif != NULL) { |
| if (ipif->ipif_was_dup) { |
| char ibuf[LIFNAMSIZ + 10]; |
| char sbuf[INET6_ADDRSTRLEN]; |
| |
| ipif->ipif_was_dup = B_FALSE; |
| (void) inet_ntop(AF_INET6, |
| &ipif->ipif_v6lcl_addr, |
| sbuf, sizeof (sbuf)); |
| ipif_get_name(ipif, ibuf, |
| sizeof (ibuf)); |
| cmn_err(CE_NOTE, "recovered address " |
| "%s on %s", sbuf, ibuf); |
| } |
| if ((ipif->ipif_flags & IPIF_UP) && |
| !ipif->ipif_addr_ready) |
| ipif_up_notify(ipif); |
| ipif->ipif_addr_ready = 1; |
| ipif_refrele(ipif); |
| } |
| /* Begin defending our new address */ |
| nce->nce_unsolicit_count = 0; |
| dropped = nce_xmit_advert(nce, B_FALSE, |
| &ipv6_all_hosts_mcast, 0); |
| if (dropped) { |
| nce->nce_unsolicit_count = 1; |
| NDP_RESTART_TIMER(nce, |
| ipst->ips_ip_ndp_unsolicit_interval); |
| } else if (ipst->ips_ip_ndp_defense_interval != 0) { |
| NDP_RESTART_TIMER(nce, |
| ipst->ips_ip_ndp_defense_interval); |
| } |
| } else { |
| /* |
| * This is an address we're probing to be our own, but |
| * the ill is down. Wait until it comes back before |
| * doing anything, but switch to reachable state so |
| * that the restart will work. |
| */ |
| nce->nce_state = ND_REACHABLE; |
| mutex_exit(&nce->nce_lock); |
| } |
| NCE_REFRELE(nce); |
| return; |
| case ND_INCOMPLETE: { |
| ip6_t *ip6h; |
| ip6i_t *ip6i; |
| mblk_t *mp, *datamp, *nextmp, **prevmpp; |
| |
| /* |
| * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp |
| * for any IPMP probe packets, and toss 'em. IPMP probe |
| * packets will always be at the head of nce_qd_mp and always |
| * have an ip6i_t header, so we can stop at the first queued |
| * ND packet without an ip6i_t. |
| */ |
| prevmpp = &nce->nce_qd_mp; |
| for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { |
| nextmp = mp->b_next; |
| datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; |
| ip6h = (ip6_t *)datamp->b_rptr; |
| if (ip6h->ip6_nxt != IPPROTO_RAW) |
| break; |
| |
| ip6i = (ip6i_t *)ip6h; |
| if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { |
| inet_freemsg(mp); |
| *prevmpp = nextmp; |
| } else { |
| prevmpp = &mp->b_next; |
| } |
| } |
| |
| /* |
| * Must be resolver's retransmit timer. |
| */ |
| if (nce->nce_qd_mp != NULL) { |
| if ((ms = nce_solicit(nce, NULL)) == 0) { |
| if (nce->nce_state != ND_REACHABLE) { |
| mutex_exit(&nce->nce_lock); |
| nce_resolv_failed(nce); |
| ndp_delete(nce); |
| } else { |
| mutex_exit(&nce->nce_lock); |
| } |
| } else { |
| mutex_exit(&nce->nce_lock); |
| NDP_RESTART_TIMER(nce, (clock_t)ms); |
| } |
| NCE_REFRELE(nce); |
| return; |
| } |
| mutex_exit(&nce->nce_lock); |
| NCE_REFRELE(nce); |
| break; |
| } |
| case ND_REACHABLE: |
| if (((nce->nce_flags & NCE_F_UNSOL_ADV) && |
| nce->nce_unsolicit_count != 0) || |
| ((nce->nce_flags & NCE_F_PERMANENT) && |
| ipst->ips_ip_ndp_defense_interval != 0)) { |
| if (nce->nce_unsolicit_count > 0) |
| nce->nce_unsolicit_count--; |
| mutex_exit(&nce->nce_lock); |
| dropped = nce_xmit_advert(nce, B_FALSE, |
| &ipv6_all_hosts_mcast, 0); |
| if (dropped) { |
|