blob: 2cee123d4a2a602191ac2b876aea57ce511acd4e [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
jpk45916cd2006-03-24 12:29:20 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
meem1f197382010-04-03 14:24:23 -040022 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070023 */
24
John Levon15c07ad2019-01-14 19:42:58 +000025/*
Dan McDonald42c5ef02019-02-22 14:42:52 -050026 * Copyright (c) 2019, Joyent, Inc.
John Levon15c07ad2019-01-14 19:42:58 +000027 */
28
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070029#include <sys/types.h>
30#include <sys/stream.h>
31#include <sys/stropts.h>
carlsonj69bb4bb2006-08-14 14:10:48 -070032#include <sys/strsun.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070033#include <sys/sysmacros.h>
34#include <sys/errno.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070035#include <sys/dlpi.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070036#include <sys/socket.h>
37#include <sys/ddi.h>
carlsonj69bb4bb2006-08-14 14:10:48 -070038#include <sys/sunddi.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070039#include <sys/cmn_err.h>
40#include <sys/debug.h>
41#include <sys/vtrace.h>
42#include <sys/kmem.h>
43#include <sys/zone.h>
carlsonj69bb4bb2006-08-14 14:10:48 -070044#include <sys/ethernet.h>
45#include <sys/sdt.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080046#include <sys/mac.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070047
48#include <net/if.h>
carlsonj69bb4bb2006-08-14 14:10:48 -070049#include <net/if_types.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070050#include <net/if_dl.h>
51#include <net/route.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070052#include <netinet/in.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070053#include <netinet/ip6.h>
54#include <netinet/icmp6.h>
55
56#include <inet/common.h>
57#include <inet/mi.h>
58#include <inet/mib2.h>
59#include <inet/nd.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070060#include <inet/ip.h>
nordmark5597b602006-09-14 18:05:27 -070061#include <inet/ip_impl.h>
dh155122f4b3ec62007-01-19 16:59:38 -080062#include <inet/ipclassifier.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070063#include <inet/ip_if.h>
64#include <inet/ip_ire.h>
65#include <inet/ip_rts.h>
66#include <inet/ip6.h>
67#include <inet/ip_ndp.h>
carlsonj69bb4bb2006-08-14 14:10:48 -070068#include <inet/sctp_ip.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080069#include <inet/ip_arp.h>
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -040070#include <inet/ip2mac_impl.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070071
Erik Nordmarkbd670b32009-11-11 11:49:49 -080072#define ANNOUNCE_INTERVAL(isv6) \
73 (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \
74 ipst->ips_ip_arp_publish_interval)
75
76#define DEFENSE_INTERVAL(isv6) \
77 (isv6 ? ipst->ips_ndp_defend_interval : \
78 ipst->ips_arp_defend_interval)
79
80/* Non-tunable probe interval, based on link capabilities */
81#define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
82
83/*
84 * The IPv4 Link Local address space is special; we do extra duplicate checking
85 * there, as the entire assignment mechanism rests on random numbers.
86 */
87#define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
88 ((uchar_t *)ptr)[1] == 254)
89
90/*
91 * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed
92 * in to the ncec*add* functions.
93 *
94 * NCE_F_AUTHORITY means that we ignore any incoming adverts for that
95 * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means
96 * that we will respond to requests for the protocol address.
97 */
98#define NCE_EXTERNAL_FLAGS_MASK \
99 (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \
100 NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \
101 NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC)
102
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700103/*
carlsonj69bb4bb2006-08-14 14:10:48 -0700104 * Lock ordering:
105 *
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800106 * ndp_g_lock -> ill_lock -> ncec_lock
carlsonj69bb4bb2006-08-14 14:10:48 -0700107 *
108 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800109 * ncec_next. ncec_lock protects the contents of the NCE (particularly
110 * ncec_refcnt).
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700111 */
112
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800113static void nce_cleanup_list(ncec_t *ncec);
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300114static void nce_set_ll(ncec_t *ncec, uchar_t *ll_addr);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800115static ncec_t *ncec_lookup_illgrp(ill_t *, const in6_addr_t *,
116 ncec_t *);
117static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *);
118static int nce_set_multicast_v6(ill_t *ill, const in6_addr_t *addr,
119 uint16_t ncec_flags, nce_t **newnce);
120static int nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst,
121 uint16_t ncec_flags, nce_t **newnce);
122static boolean_t ndp_xmit(ill_t *ill, uint32_t operation,
123 uint8_t *hwaddr, uint_t hwaddr_len, const in6_addr_t *sender,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700124 const in6_addr_t *target, int flag);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800125static void ncec_refhold_locked(ncec_t *);
126static boolean_t ill_defend_rate_limit(ill_t *, ncec_t *);
127static void nce_queue_mp_common(ncec_t *, mblk_t *, boolean_t);
128static int nce_add_common(ill_t *, uchar_t *, uint_t, const in6_addr_t *,
129 uint16_t, uint16_t, nce_t **);
Dan McDonald42c5ef02019-02-22 14:42:52 -0500130static nce_t *nce_add_impl(ill_t *, ncec_t *, nce_t *, mblk_t *, list_t *);
131static nce_t *nce_add(ill_t *, ncec_t *, list_t *);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800132static void nce_inactive(nce_t *);
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300133extern nce_t *nce_lookup(ill_t *, const in6_addr_t *);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800134static nce_t *nce_ill_lookup_then_add(ill_t *, ncec_t *);
135static int nce_add_v6(ill_t *, uchar_t *, uint_t, const in6_addr_t *,
136 uint16_t, uint16_t, nce_t **);
137static int nce_add_v4(ill_t *, uchar_t *, uint_t, const in_addr_t *,
138 uint16_t, uint16_t, nce_t **);
139static int nce_add_v6_postprocess(nce_t *);
140static int nce_add_v4_postprocess(nce_t *);
141static ill_t *nce_resolve_src(ncec_t *, in6_addr_t *);
142static clock_t nce_fuzz_interval(clock_t, boolean_t);
143static void nce_resolv_ipmp_ok(ncec_t *);
144static void nce_walk_common(ill_t *, pfi_t, void *);
145static void nce_start_timer(ncec_t *, uint_t);
146static nce_t *nce_fastpath_create(ill_t *, ncec_t *);
147static void nce_fastpath_trigger(nce_t *);
148static nce_t *nce_fastpath(ncec_t *, boolean_t, nce_t *);
sangeetac793af92006-08-11 05:59:29 -0700149
carlsonj6a8288c2007-09-11 04:26:06 -0700150#ifdef DEBUG
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800151static void ncec_trace_cleanup(const ncec_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700152#endif
153
dh155122f4b3ec62007-01-19 16:59:38 -0800154#define NCE_HASH_PTR_V4(ipst, addr) \
155 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700156
dh155122f4b3ec62007-01-19 16:59:38 -0800157#define NCE_HASH_PTR_V6(ipst, addr) \
158 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
159 NCE_TABLE_SIZE)]))
sangeetac793af92006-08-11 05:59:29 -0700160
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800161extern kmem_cache_t *ncec_cache;
162extern kmem_cache_t *nce_cache;
163
164/*
165 * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe
166 * If src_ill is not null, the ncec_addr is bound to src_ill. The
167 * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where
168 * the probe is sent on the ncec_ill (in the non-IPMP case) or the
169 * IPMP cast_ill (in the IPMP case).
170 *
Sowmini Varadhan44b099c2010-02-17 22:59:58 -0500171 * Note that the probe interval is based on the src_ill for IPv6, and
172 * the ncec_xmit_interval for IPv4.
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800173 */
174static void
175nce_dad(ncec_t *ncec, ill_t *src_ill, boolean_t send_probe)
176{
177 boolean_t dropped;
178 uint32_t probe_interval;
179
180 ASSERT(!(ncec->ncec_flags & NCE_F_MCAST));
181 ASSERT(!(ncec->ncec_flags & NCE_F_BCAST));
182 if (ncec->ncec_ipversion == IPV6_VERSION) {
183 dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT,
184 ncec->ncec_lladdr, ncec->ncec_lladdr_length,
185 &ipv6_all_zeros, &ncec->ncec_addr, NDP_PROBE);
Sowmini Varadhan44b099c2010-02-17 22:59:58 -0500186 probe_interval = ILL_PROBE_INTERVAL(src_ill);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800187 } else {
188 /* IPv4 DAD delay the initial probe. */
189 if (send_probe)
190 dropped = arp_probe(ncec);
191 else
192 dropped = B_TRUE;
193 probe_interval = nce_fuzz_interval(ncec->ncec_xmit_interval,
194 !send_probe);
195 }
196 if (!dropped) {
197 mutex_enter(&ncec->ncec_lock);
198 ncec->ncec_pcnt--;
199 mutex_exit(&ncec->ncec_lock);
200 }
201 nce_restart_timer(ncec, probe_interval);
202}
203
204/*
205 * Compute default flags to use for an advertisement of this ncec's address.
206 */
207static int
208nce_advert_flags(const ncec_t *ncec)
209{
210 int flag = 0;
211
212 if (ncec->ncec_flags & NCE_F_ISROUTER)
213 flag |= NDP_ISROUTER;
214 if (!(ncec->ncec_flags & NCE_F_ANYCAST))
215 flag |= NDP_ORIDE;
216
217 return (flag);
218}
carlsonj69bb4bb2006-08-14 14:10:48 -0700219
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700220/*
221 * NDP Cache Entry creation routine.
dh155122f4b3ec62007-01-19 16:59:38 -0800222 * This routine must always be called with ndp6->ndp_g_lock held.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700223 */
sowmini54da8752007-07-24 07:26:05 -0700224int
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800225nce_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
226 const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700227{
carlsonj69bb4bb2006-08-14 14:10:48 -0700228 int err;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800229 nce_t *nce;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700230
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800231 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock));
sangeetac793af92006-08-11 05:59:29 -0700232 ASSERT(ill != NULL && ill->ill_isv6);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700233
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800234 err = nce_add_common(ill, hw_addr, hw_addr_len, addr, flags, state,
235 &nce);
236 if (err != 0)
237 return (err);
238 ASSERT(newnce != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700239 *newnce = nce;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800240 return (err);
241}
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700242
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800243/*
244 * Post-processing routine to be executed after nce_add_v6(). This function
245 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
246 * and must be called without any locks held.
247 */
248int
249nce_add_v6_postprocess(nce_t *nce)
250{
251 ncec_t *ncec = nce->nce_common;
252 boolean_t dropped = B_FALSE;
253 uchar_t *hw_addr = ncec->ncec_lladdr;
254 uint_t hw_addr_len = ncec->ncec_lladdr_length;
255 ill_t *ill = ncec->ncec_ill;
256 int err = 0;
257 uint16_t flags = ncec->ncec_flags;
258 ip_stack_t *ipst = ill->ill_ipst;
259 boolean_t trigger_fastpath = B_TRUE;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700260
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800261 /*
262 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
263 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
264 * We call nce_fastpath from nce_update if the link layer address of
265 * the peer changes from nce_update
266 */
267 if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) ||
268 (hw_addr == NULL && ill->ill_net_type != IRE_IF_NORESOLVER))
269 trigger_fastpath = B_FALSE;
270
271 if (trigger_fastpath)
272 nce_fastpath_trigger(nce);
273 if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) {
274 ill_t *hwaddr_ill;
275 /*
276 * Unicast entry that needs DAD.
277 */
278 if (IS_IPMP(ill)) {
279 hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp,
280 hw_addr, hw_addr_len);
281 } else {
282 hwaddr_ill = ill;
carlsonj69bb4bb2006-08-14 14:10:48 -0700283 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800284 nce_dad(ncec, hwaddr_ill, B_TRUE);
carlsonj69bb4bb2006-08-14 14:10:48 -0700285 err = EINPROGRESS;
286 } else if (flags & NCE_F_UNSOL_ADV) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700287 /*
288 * We account for the transmit below by assigning one
289 * less than the ndd variable. Subsequent decrements
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800290 * are done in nce_timer.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700291 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800292 mutex_enter(&ncec->ncec_lock);
293 ncec->ncec_unsolicit_count =
294 ipst->ips_ip_ndp_unsolicit_count - 1;
295 mutex_exit(&ncec->ncec_lock);
296 dropped = ndp_xmit(ill,
297 ND_NEIGHBOR_ADVERT,
298 hw_addr,
299 hw_addr_len,
300 &ncec->ncec_addr, /* Source and target of the adv */
301 &ipv6_all_hosts_mcast, /* Destination of the packet */
302 nce_advert_flags(ncec));
303 mutex_enter(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700304 if (dropped)
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800305 ncec->ncec_unsolicit_count++;
306 else
307 ncec->ncec_last_time_defended = ddi_get_lbolt();
308 if (ncec->ncec_unsolicit_count != 0) {
309 nce_start_timer(ncec,
310 ipst->ips_ip_ndp_unsolicit_interval);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700311 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800312 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700313 }
carlsonj69bb4bb2006-08-14 14:10:48 -0700314 return (err);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700315}
316
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800317/*
318 * Atomically lookup and add (if needed) Neighbor Cache information for
319 * an address.
320 *
321 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
322 * are always added pointing at the ipmp_ill. Thus, when the ill passed
323 * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
324 * entries will be created, both pointing at the same ncec_t. The nce_t
325 * entries will have their nce_ill set to the ipmp_ill and the under_ill
326 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
327 * Local addresses are always created on the ill passed to nce_add_v6.
328 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700329int
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800330nce_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
331 const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700332{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800333 int err = 0;
dh155122f4b3ec62007-01-19 16:59:38 -0800334 ip_stack_t *ipst = ill->ill_ipst;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800335 nce_t *nce, *upper_nce = NULL;
336 ill_t *in_ill = ill;
337 boolean_t need_ill_refrele = B_FALSE;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700338
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800339 if (flags & NCE_F_MCAST) {
340 /*
341 * hw_addr will be figured out in nce_set_multicast_v6;
342 * caller has to select the cast_ill
343 */
344 ASSERT(hw_addr == NULL);
345 ASSERT(!IS_IPMP(ill));
346 err = nce_set_multicast_v6(ill, addr, flags, newnce);
347 return (err);
348 }
sowmini54da8752007-07-24 07:26:05 -0700349 ASSERT(ill->ill_isv6);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800350 if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) {
351 ill = ipmp_ill_hold_ipmp_ill(ill);
352 if (ill == NULL)
353 return (ENXIO);
354 need_ill_refrele = B_TRUE;
355 }
dh155122f4b3ec62007-01-19 16:59:38 -0800356
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800357 mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
358 nce = nce_lookup_addr(ill, addr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700359 if (nce == NULL) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800360 err = nce_add_v6(ill, hw_addr, hw_addr_len, addr, flags, state,
361 &nce);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700362 } else {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700363 err = EEXIST;
364 }
dh155122f4b3ec62007-01-19 16:59:38 -0800365 mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800366 if (err == 0)
367 err = nce_add_v6_postprocess(nce);
368 if (in_ill != ill && nce != NULL) {
Sowmini Varadhanedd5d092009-12-04 10:05:24 -0500369 nce_t *under_nce = NULL;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800370
371 /*
372 * in_ill was the under_ill. Try to create the under_nce.
373 * Hold the ill_g_lock to prevent changes to group membership
374 * until we are done.
375 */
376 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
Sowmini Varadhanf1c454b2010-01-11 10:29:23 -0500377 if (!IS_IN_SAME_ILLGRP(in_ill, ill)) {
378 DTRACE_PROBE2(ill__not__in__group, nce_t *, nce,
379 ill_t *, ill);
380 rw_exit(&ipst->ips_ill_g_lock);
381 err = ENXIO;
382 nce_refrele(nce);
383 nce = NULL;
384 goto bail;
385 }
386 under_nce = nce_fastpath_create(in_ill, nce->nce_common);
387 if (under_nce == NULL) {
388 rw_exit(&ipst->ips_ill_g_lock);
389 err = EINVAL;
390 nce_refrele(nce);
391 nce = NULL;
392 goto bail;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800393 }
394 rw_exit(&ipst->ips_ill_g_lock);
Sowmini Varadhanf1c454b2010-01-11 10:29:23 -0500395 upper_nce = nce;
396 nce = under_nce; /* will be returned to caller */
397 if (NCE_ISREACHABLE(nce->nce_common))
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800398 nce_fastpath_trigger(under_nce);
399 }
Sowmini Varadhanf1c454b2010-01-11 10:29:23 -0500400 /* nce_refrele is deferred until the lock is dropped */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800401 if (nce != NULL) {
402 if (newnce != NULL)
403 *newnce = nce;
404 else
405 nce_refrele(nce);
406 }
Sowmini Varadhanf1c454b2010-01-11 10:29:23 -0500407bail:
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800408 if (upper_nce != NULL)
409 nce_refrele(upper_nce);
410 if (need_ill_refrele)
411 ill_refrele(ill);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700412 return (err);
413}
414
415/*
416 * Remove all the CONDEMNED nces from the appropriate hash table.
417 * We create a private list of NCEs, these may have ires pointing
418 * to them, so the list will be passed through to clean up dependent
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800419 * ires and only then we can do ncec_refrele() which can make NCE inactive.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700420 */
421static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800422nce_remove(ndp_g_t *ndp, ncec_t *ncec, ncec_t **free_nce_list)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700423{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800424 ncec_t *ncec1;
425 ncec_t **ptpn;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700426
sangeetac793af92006-08-11 05:59:29 -0700427 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
428 ASSERT(ndp->ndp_g_walker == 0);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800429 for (; ncec; ncec = ncec1) {
430 ncec1 = ncec->ncec_next;
431 mutex_enter(&ncec->ncec_lock);
432 if (NCE_ISCONDEMNED(ncec)) {
433 ptpn = ncec->ncec_ptpn;
434 ncec1 = ncec->ncec_next;
435 if (ncec1 != NULL)
436 ncec1->ncec_ptpn = ptpn;
437 *ptpn = ncec1;
438 ncec->ncec_ptpn = NULL;
439 ncec->ncec_next = NULL;
440 ncec->ncec_next = *free_nce_list;
441 *free_nce_list = ncec;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700442 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800443 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700444 }
445}
446
447/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800448 * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup()
449 * will return this NCE. Also no new timeouts will
450 * be started (See nce_restart_timer).
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700451 * 2. Cancel any currently running timeouts.
452 * 3. If there is an ndp walker, return. The walker will do the cleanup.
453 * This ensures that walkers see a consistent list of NCEs while walking.
454 * 4. Otherwise remove the NCE from the list of NCEs
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700455 */
456void
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800457ncec_delete(ncec_t *ncec)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700458{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800459 ncec_t **ptpn;
460 ncec_t *ncec1;
461 int ipversion = ncec->ncec_ipversion;
dh155122f4b3ec62007-01-19 16:59:38 -0800462 ndp_g_t *ndp;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800463 ip_stack_t *ipst = ncec->ncec_ipst;
dh155122f4b3ec62007-01-19 16:59:38 -0800464
465 if (ipversion == IPV4_VERSION)
466 ndp = ipst->ips_ndp4;
467 else
468 ndp = ipst->ips_ndp6;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700469
470 /* Serialize deletes */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800471 mutex_enter(&ncec->ncec_lock);
472 if (NCE_ISCONDEMNED(ncec)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700473 /* Some other thread is doing the delete */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800474 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700475 return;
476 }
477 /*
478 * Caller has a refhold. Also 1 ref for being in the list. Thus
479 * refcnt has to be >= 2
480 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800481 ASSERT(ncec->ncec_refcnt >= 2);
482 ncec->ncec_flags |= NCE_F_CONDEMNED;
483 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700484
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800485 /* Count how many condemned ires for kmem_cache callback */
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -0400486 atomic_inc_32(&ipst->ips_num_nce_condemned);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800487 nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700488
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -0400489 /* Complete any waiting callbacks */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800490 ncec_cb_dispatch(ncec);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -0400491
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700492 /*
493 * Cancel any running timer. Timeout can't be restarted
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800494 * since CONDEMNED is set. Can't hold ncec_lock across untimeout.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700495 * Passing invalid timeout id is fine.
496 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800497 if (ncec->ncec_timeout_id != 0) {
498 (void) untimeout(ncec->ncec_timeout_id);
499 ncec->ncec_timeout_id = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700500 }
501
sangeetac793af92006-08-11 05:59:29 -0700502 mutex_enter(&ndp->ndp_g_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800503 if (ncec->ncec_ptpn == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700504 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800505 * The last ndp walker has already removed this ncec from
506 * the list after we marked the ncec CONDEMNED and before
sangeetac793af92006-08-11 05:59:29 -0700507 * we grabbed the global lock.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700508 */
sangeetac793af92006-08-11 05:59:29 -0700509 mutex_exit(&ndp->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700510 return;
511 }
sangeetac793af92006-08-11 05:59:29 -0700512 if (ndp->ndp_g_walker > 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700513 /*
514 * Can't unlink. The walker will clean up
515 */
sangeetac793af92006-08-11 05:59:29 -0700516 ndp->ndp_g_walker_cleanup = B_TRUE;
517 mutex_exit(&ndp->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700518 return;
519 }
520
521 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800522 * Now remove the ncec from the list. nce_restart_timer won't restart
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700523 * the timer since it is marked CONDEMNED.
524 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800525 ptpn = ncec->ncec_ptpn;
526 ncec1 = ncec->ncec_next;
527 if (ncec1 != NULL)
528 ncec1->ncec_ptpn = ptpn;
529 *ptpn = ncec1;
530 ncec->ncec_ptpn = NULL;
531 ncec->ncec_next = NULL;
sangeetac793af92006-08-11 05:59:29 -0700532 mutex_exit(&ndp->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700533
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800534 /* Removed from ncec_ptpn/ncec_next list */
535 ncec_refrele_notr(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700536}
537
538void
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800539ncec_inactive(ncec_t *ncec)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700540{
541 mblk_t **mpp;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800542 ill_t *ill = ncec->ncec_ill;
543 ip_stack_t *ipst = ncec->ncec_ipst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700544
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800545 ASSERT(ncec->ncec_refcnt == 0);
546 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700547
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800548 /* Count how many condemned nces for kmem_cache callback */
549 if (NCE_ISCONDEMNED(ncec))
550 atomic_add_32(&ipst->ips_num_nce_condemned, -1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700551
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800552 /* Free all allocated messages */
553 mpp = &ncec->ncec_qd_mp;
554 while (*mpp != NULL) {
555 mblk_t *mp;
dr146992381a2a92006-10-20 16:37:58 -0700556
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800557 mp = *mpp;
558 *mpp = mp->b_next;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700559
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800560 inet_freemsg(mp);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -0400561 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800562 /*
563 * must have been cleaned up in ncec_delete
564 */
565 ASSERT(list_is_empty(&ncec->ncec_cb));
566 list_destroy(&ncec->ncec_cb);
567 /*
568 * free the ncec_lladdr if one was allocated in nce_add_common()
569 */
570 if (ncec->ncec_lladdr_length > 0)
571 kmem_free(ncec->ncec_lladdr, ncec->ncec_lladdr_length);
572
carlsonj6a8288c2007-09-11 04:26:06 -0700573#ifdef DEBUG
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800574 ncec_trace_cleanup(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700575#endif
576
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700577 mutex_enter(&ill->ill_lock);
sowmini968d2fd2008-03-21 06:08:04 -0700578 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800579 (char *), "ncec", (void *), ncec);
580 ill->ill_ncec_cnt--;
581 ncec->ncec_ill = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700582 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800583 * If the number of ncec's associated with this ill have dropped
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700584 * to zero, check whether we need to restart any operation that
585 * is waiting for this to happen.
586 */
sowmini968d2fd2008-03-21 06:08:04 -0700587 if (ILL_DOWN_OK(ill)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700588 /* ipif_ill_refrele_tail drops the ill_lock */
589 ipif_ill_refrele_tail(ill);
590 } else {
591 mutex_exit(&ill->ill_lock);
592 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800593
594 mutex_destroy(&ncec->ncec_lock);
595 kmem_cache_free(ncec_cache, ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700596}
597
598/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800599 * ncec_walk routine. Delete the ncec if it is associated with the ill
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700600 * that is going away. Always called as a writer.
601 */
602void
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300603ncec_delete_per_ill(ncec_t *ncec, void *arg)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700604{
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300605 if ((ncec != NULL) && ncec->ncec_ill == arg) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800606 ncec_delete(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700607 }
608}
609
610/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800611 * Neighbor Cache cleanup logic for a list of ncec_t entries.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700612 */
613static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800614nce_cleanup_list(ncec_t *ncec)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700615{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800616 ncec_t *ncec_next;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700617
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800618 ASSERT(ncec != NULL);
619 while (ncec != NULL) {
620 ncec_next = ncec->ncec_next;
621 ncec->ncec_next = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700622
623 /*
624 * It is possible for the last ndp walker (this thread)
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800625 * to come here after ncec_delete has marked the ncec CONDEMNED
626 * and before it has removed the ncec from the fastpath list
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700627 * or called untimeout. So we need to do it here. It is safe
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800628 * for both ncec_delete and this thread to do it twice or
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700629 * even simultaneously since each of the threads has a
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800630 * reference on the ncec.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700631 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800632 nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700633 /*
634 * Cancel any running timer. Timeout can't be restarted
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800635 * since CONDEMNED is set. The ncec_lock can't be
636 * held across untimeout though passing invalid timeout
637 * id is fine.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700638 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800639 if (ncec->ncec_timeout_id != 0) {
640 (void) untimeout(ncec->ncec_timeout_id);
641 ncec->ncec_timeout_id = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700642 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800643 /* Removed from ncec_ptpn/ncec_next list */
644 ncec_refrele_notr(ncec);
645 ncec = ncec_next;
sangeetac793af92006-08-11 05:59:29 -0700646 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700647}
648
649/*
carlsonj69bb4bb2006-08-14 14:10:48 -0700650 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
651 */
652boolean_t
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800653nce_restart_dad(ncec_t *ncec)
carlsonj69bb4bb2006-08-14 14:10:48 -0700654{
655 boolean_t started;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800656 ill_t *ill, *hwaddr_ill;
carlsonj69bb4bb2006-08-14 14:10:48 -0700657
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800658 if (ncec == NULL)
carlsonj69bb4bb2006-08-14 14:10:48 -0700659 return (B_FALSE);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800660 ill = ncec->ncec_ill;
661 mutex_enter(&ncec->ncec_lock);
662 if (ncec->ncec_state == ND_PROBE) {
663 mutex_exit(&ncec->ncec_lock);
carlsonj69bb4bb2006-08-14 14:10:48 -0700664 started = B_TRUE;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800665 } else if (ncec->ncec_state == ND_REACHABLE) {
666 ASSERT(ncec->ncec_lladdr != NULL);
667 ncec->ncec_state = ND_PROBE;
668 ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
669 /*
670 * Slight cheat here: we don't use the initial probe delay
671 * for IPv4 in this obscure case.
672 */
673 mutex_exit(&ncec->ncec_lock);
674 if (IS_IPMP(ill)) {
675 hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp,
676 ncec->ncec_lladdr, ncec->ncec_lladdr_length);
677 } else {
678 hwaddr_ill = ill;
carlsonj69bb4bb2006-08-14 14:10:48 -0700679 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800680 nce_dad(ncec, hwaddr_ill, B_TRUE);
carlsonj69bb4bb2006-08-14 14:10:48 -0700681 started = B_TRUE;
682 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800683 mutex_exit(&ncec->ncec_lock);
carlsonj69bb4bb2006-08-14 14:10:48 -0700684 started = B_FALSE;
685 }
686 return (started);
687}
688
689/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800690 * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed.
691 * If one is found, the refcnt on the ncec will be incremented.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700692 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800693ncec_t *
694ncec_lookup_illgrp_v6(ill_t *ill, const in6_addr_t *addr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700695{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800696 ncec_t *ncec;
697 ip_stack_t *ipst = ill->ill_ipst;
dh155122f4b3ec62007-01-19 16:59:38 -0800698
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800699 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
700 mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
dh155122f4b3ec62007-01-19 16:59:38 -0800701
702 /* Get head of v6 hash table */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800703 ncec = *((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr));
704 ncec = ncec_lookup_illgrp(ill, addr, ncec);
705 mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
706 rw_exit(&ipst->ips_ill_g_lock);
707 return (ncec);
sangeetac793af92006-08-11 05:59:29 -0700708}
709/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800710 * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed.
711 * If one is found, the refcnt on the ncec will be incremented.
sangeetac793af92006-08-11 05:59:29 -0700712 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800713ncec_t *
714ncec_lookup_illgrp_v4(ill_t *ill, const in_addr_t *addr)
sangeetac793af92006-08-11 05:59:29 -0700715{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800716 ncec_t *ncec = NULL;
sangeetac793af92006-08-11 05:59:29 -0700717 in6_addr_t addr6;
dh155122f4b3ec62007-01-19 16:59:38 -0800718 ip_stack_t *ipst = ill->ill_ipst;
sangeetac793af92006-08-11 05:59:29 -0700719
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800720 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
721 mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
dh155122f4b3ec62007-01-19 16:59:38 -0800722
723 /* Get head of v4 hash table */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800724 ncec = *((ncec_t **)NCE_HASH_PTR_V4(ipst, *addr));
sangeetac793af92006-08-11 05:59:29 -0700725 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800726 ncec = ncec_lookup_illgrp(ill, &addr6, ncec);
727 mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
728 rw_exit(&ipst->ips_ill_g_lock);
729 return (ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700730}
731
732/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800733 * Cache entry lookup. Try to find an ncec matching the parameters passed.
734 * If an ncec is found, increment the hold count on that ncec.
735 * The caller passes in the start of the appropriate hash table, and must
736 * be holding the appropriate global lock (ndp_g_lock). In addition, since
737 * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock
738 * must be held as reader.
739 *
740 * This function always matches across the ipmp group.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700741 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800742ncec_t *
743ncec_lookup_illgrp(ill_t *ill, const in6_addr_t *addr, ncec_t *ncec)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700744{
dh155122f4b3ec62007-01-19 16:59:38 -0800745 ndp_g_t *ndp;
746 ip_stack_t *ipst = ill->ill_ipst;
747
748 if (ill->ill_isv6)
749 ndp = ipst->ips_ndp6;
750 else
751 ndp = ipst->ips_ndp4;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700752
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800753 ASSERT(ill != NULL);
sangeetac793af92006-08-11 05:59:29 -0700754 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700755 if (IN6_IS_ADDR_UNSPECIFIED(addr))
756 return (NULL);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800757 for (; ncec != NULL; ncec = ncec->ncec_next) {
758 if (ncec->ncec_ill == ill ||
759 IS_IN_SAME_ILLGRP(ill, ncec->ncec_ill)) {
760 if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) {
761 mutex_enter(&ncec->ncec_lock);
762 if (!NCE_ISCONDEMNED(ncec)) {
763 ncec_refhold_locked(ncec);
764 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700765 break;
766 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800767 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700768 }
769 }
770 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800771 return (ncec);
772}
773
774/*
775 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
776 * entries for ill only, i.e., when ill is part of an ipmp group,
777 * nce_lookup_v4 will never try to match across the group.
778 */
779nce_t *
780nce_lookup_v4(ill_t *ill, const in_addr_t *addr)
781{
782 nce_t *nce;
783 in6_addr_t addr6;
784 ip_stack_t *ipst = ill->ill_ipst;
785
786 mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
787 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
788 nce = nce_lookup_addr(ill, &addr6);
789 mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700790 return (nce);
791}
792
793/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800794 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
795 * entries for ill only, i.e., when ill is part of an ipmp group,
796 * nce_lookup_v6 will never try to match across the group.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700797 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800798nce_t *
799nce_lookup_v6(ill_t *ill, const in6_addr_t *addr6)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700800{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800801 nce_t *nce;
802 ip_stack_t *ipst = ill->ill_ipst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700803
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800804 mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
805 nce = nce_lookup_addr(ill, addr6);
806 mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700807 return (nce);
808}
809
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800810static nce_t *
811nce_lookup_addr(ill_t *ill, const in6_addr_t *addr)
812{
813 nce_t *nce;
814
815 ASSERT(ill != NULL);
816#ifdef DEBUG
817 if (ill->ill_isv6)
818 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock));
819 else
820 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock));
821#endif
822 mutex_enter(&ill->ill_lock);
823 nce = nce_lookup(ill, addr);
824 mutex_exit(&ill->ill_lock);
825 return (nce);
826}
827
828
829/*
830 * Router turned to host. We need to make sure that cached copies of the ncec
831 * are not used for forwarding packets if they were derived from the default
832 * route, and that the default route itself is removed, as required by
833 * section 7.2.5 of RFC 2461.
834 *
835 * Note that the ncec itself probably has valid link-layer information for the
836 * nexthop, so that there is no reason to delete the ncec, as long as the
837 * ISROUTER flag is turned off.
838 */
839static void
840ncec_router_to_host(ncec_t *ncec)
841{
842 ire_t *ire;
843 ip_stack_t *ipst = ncec->ncec_ipst;
844
845 mutex_enter(&ncec->ncec_lock);
846 ncec->ncec_flags &= ~NCE_F_ISROUTER;
847 mutex_exit(&ncec->ncec_lock);
848
849 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, &ipv6_all_zeros,
850 &ncec->ncec_addr, IRE_DEFAULT, ncec->ncec_ill, ALL_ZONES, NULL,
851 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW, 0, ipst, NULL);
852 if (ire != NULL) {
853 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
854 ire_delete(ire);
855 ire_refrele(ire);
856 }
857}
858
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700859/*
860 * Process passed in parameters either from an incoming packet or via
861 * user ioctl.
862 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800863void
864nce_process(ncec_t *ncec, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700865{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800866 ill_t *ill = ncec->ncec_ill;
867 uint32_t hw_addr_len = ill->ill_phys_addr_length;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700868 boolean_t ll_updated = B_FALSE;
869 boolean_t ll_changed;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800870 nce_t *nce;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700871
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800872 ASSERT(ncec->ncec_ipversion == IPV6_VERSION);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700873 /*
874 * No updates of link layer address or the neighbor state is
875 * allowed, when the cache is in NONUD state. This still
876 * allows for responding to reachability solicitation.
877 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800878 mutex_enter(&ncec->ncec_lock);
879 if (ncec->ncec_state == ND_INCOMPLETE) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700880 if (hw_addr == NULL) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800881 mutex_exit(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700882 return;
883 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800884 nce_set_ll(ncec, hw_addr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700885 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800886 * Update ncec state and send the queued packets
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700887 * back to ip this time ire will be added.
888 */
889 if (flag & ND_NA_FLAG_SOLICITED) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800890 nce_update(ncec, ND_REACHABLE, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700891 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800892 nce_update(ncec, ND_STALE, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700893 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800894 mutex_exit(&ncec->ncec_lock);
895 nce = nce_fastpath(ncec, B_TRUE, NULL);
896 nce_resolv_ok(ncec);
897 if (nce != NULL)
898 nce_refrele(nce);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700899 return;
900 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800901 ll_changed = nce_cmp_ll_addr(ncec, hw_addr, hw_addr_len);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700902 if (!is_adv) {
903 /* If this is a SOLICITATION request only */
904 if (ll_changed)
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800905 nce_update(ncec, ND_STALE, hw_addr);
906 mutex_exit(&ncec->ncec_lock);
907 ncec_cb_dispatch(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700908 return;
909 }
910 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) {
911 /* If in any other state than REACHABLE, ignore */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800912 if (ncec->ncec_state == ND_REACHABLE) {
913 nce_update(ncec, ND_STALE, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700914 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800915 mutex_exit(&ncec->ncec_lock);
916 ncec_cb_dispatch(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700917 return;
918 } else {
919 if (ll_changed) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800920 nce_update(ncec, ND_UNCHANGED, hw_addr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700921 ll_updated = B_TRUE;
922 }
923 if (flag & ND_NA_FLAG_SOLICITED) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800924 nce_update(ncec, ND_REACHABLE, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700925 } else {
926 if (ll_updated) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800927 nce_update(ncec, ND_STALE, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700928 }
929 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800930 mutex_exit(&ncec->ncec_lock);
931 if (!(flag & ND_NA_FLAG_ROUTER) && (ncec->ncec_flags &
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700932 NCE_F_ISROUTER)) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800933 ncec_router_to_host(ncec);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -0400934 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800935 ncec_cb_dispatch(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700936 }
937 }
938}
939
940/*
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300941 * Pass arg1 to the cbf supplied, along with each ncec in existence.
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800942 * ncec_walk() places a REFHOLD on the ncec and drops the lock when
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700943 * walking the hash list.
944 */
945void
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300946ncec_walk_common(ndp_g_t *ndp, ill_t *ill, ncec_walk_cb_t cbf,
947 void *arg1, boolean_t trace)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700948{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800949 ncec_t *ncec;
950 ncec_t *ncec1;
951 ncec_t **ncep;
952 ncec_t *free_nce_list = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700953
sangeetac793af92006-08-11 05:59:29 -0700954 mutex_enter(&ndp->ndp_g_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800955 /* Prevent ncec_delete from unlink and free of NCE */
sangeetac793af92006-08-11 05:59:29 -0700956 ndp->ndp_g_walker++;
957 mutex_exit(&ndp->ndp_g_lock);
958 for (ncep = ndp->nce_hash_tbl;
959 ncep < A_END(ndp->nce_hash_tbl); ncep++) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800960 for (ncec = *ncep; ncec != NULL; ncec = ncec1) {
961 ncec1 = ncec->ncec_next;
962 if (ill == NULL || ncec->ncec_ill == ill) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700963 if (trace) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800964 ncec_refhold(ncec);
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300965 (*cbf)(ncec, arg1);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800966 ncec_refrele(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700967 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800968 ncec_refhold_notr(ncec);
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300969 (*cbf)(ncec, arg1);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800970 ncec_refrele_notr(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700971 }
972 }
973 }
974 }
sangeetac793af92006-08-11 05:59:29 -0700975 mutex_enter(&ndp->ndp_g_lock);
976 ndp->ndp_g_walker--;
sangeetac793af92006-08-11 05:59:29 -0700977 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700978 /* Time to delete condemned entries */
sangeetac793af92006-08-11 05:59:29 -0700979 for (ncep = ndp->nce_hash_tbl;
980 ncep < A_END(ndp->nce_hash_tbl); ncep++) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800981 ncec = *ncep;
982 if (ncec != NULL) {
983 nce_remove(ndp, ncec, &free_nce_list);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700984 }
985 }
sangeetac793af92006-08-11 05:59:29 -0700986 ndp->ndp_g_walker_cleanup = B_FALSE;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700987 }
sowmini54da8752007-07-24 07:26:05 -0700988
sangeetac793af92006-08-11 05:59:29 -0700989 mutex_exit(&ndp->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700990
991 if (free_nce_list != NULL) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800992 nce_cleanup_list(free_nce_list);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700993 }
994}
995
dh155122f4b3ec62007-01-19 16:59:38 -0800996/*
997 * Walk everything.
998 * Note that ill can be NULL hence can't derive the ipst from it.
999 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001000void
Toomas Soome8a06b3d2018-10-15 22:13:16 +03001001ncec_walk(ill_t *ill, ncec_walk_cb_t cbf, void *arg1, ip_stack_t *ipst)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001002{
Toomas Soome8a06b3d2018-10-15 22:13:16 +03001003 ncec_walk_common(ipst->ips_ndp4, ill, cbf, arg1, B_TRUE);
1004 ncec_walk_common(ipst->ips_ndp6, ill, cbf, arg1, B_TRUE);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001005}
1006
1007/*
Dan McDonald42c5ef02019-02-22 14:42:52 -05001008 * Cheesy globals (i.e. all netstacks) for both a limit on per-ill multicast
1009 * NCEs, and the number to reclaim if we hit the limit. Used by
1010 * nce_set_multicast_v[46]() to limit the linked-list length of ill_nce. Until
1011 * we solve the multicast-mappings-shouldn't-be-NCEs problem, use this.
1012 */
1013
1014/* Maximum number of multicast NCEs on an ill. */
1015uint_t ip_max_ill_mcast_nces = 16384;
1016/*
1017 * Number of NCEs to delete if we hit the maximum above. 0 means *don't* and
1018 * return an error. Non-zero means delete so many, and if the number is >=
1019 * the max above, that means delete them all.
1020 */
1021uint_t ip_ill_mcast_reclaim = 256;
1022
1023/*
1024 * Encapsulate multicast ill capping in a function, for easier DTrace
1025 * detections. Return a list of refheld NCEs to destroy-via-refrele. That
1026 * list can be NULL, but can only be non-NULL if we successfully reclaimed.
1027 *
1028 * NOTE: This function must be called while holding the ill_lock AND
1029 * JUST PRIOR to making the insertion into the ill_nce list.
1030 *
1031 * We can't release the ones we delete ourselves because the ill_lock is held
1032 * by the caller. They are, instead, passed back in a list_t for deletion
1033 * outside of the ill_lock hold. nce_graveyard_free() actually frees them.
1034 *
1035 * While this covers nce_t, ncec_t gets done even further down the road. See
1036 * nce_graveyard_free() for why.
1037 */
1038static boolean_t
1039nce_too_many_mcast(ill_t *ill, list_t *graveyard)
1040{
1041 uint_t reclaim_count, max_count, reclaimed = 0;
1042 boolean_t too_many;
1043 nce_t *nce, *deadman;
1044
1045 ASSERT(graveyard != NULL);
1046 ASSERT(list_is_empty(graveyard));
1047 ASSERT(MUTEX_HELD(&ill->ill_lock));
1048
1049 /*
1050 * NOTE: Some grinning weirdo may have lowered the global max beyond
1051 * what this ill currently has. The behavior in this case will be
1052 * trim-back just by the reclaim amount for any new ones.
1053 */
1054 max_count = ip_max_ill_mcast_nces;
1055 reclaim_count = min(ip_ill_mcast_reclaim, max_count);
1056
1057 /* All good? */
1058 if (ill->ill_mcast_nces < max_count)
1059 return (B_FALSE); /* Yes, all good. */
1060
1061 if (reclaim_count == 0)
1062 return (B_TRUE); /* Don't bother - we're stuck. */
1063
1064 /* We need to reclaim now. Exploit our held ill_lock. */
1065
1066 /*
1067 * Start at the tail and work backwards, new nces are head-inserted,
1068 * so we'll be reaping the oldest entries.
1069 */
1070 nce = list_tail(&ill->ill_nce);
1071 while (reclaimed < reclaim_count) {
1072 /* Skip ahead to a multicast NCE. */
1073 while (nce != NULL &&
1074 (nce->nce_common->ncec_flags & NCE_F_MCAST) == 0) {
1075 nce = list_prev(&ill->ill_nce, nce);
1076 }
1077 if (nce == NULL)
1078 break;
1079
1080 /*
1081 * NOTE: For now, we just delete the first one(s) we find.
1082 * This is not optimal, and may require some inspection of nce
1083 * & its ncec to be better.
1084 */
1085 deadman = nce;
1086 nce = list_prev(&ill->ill_nce, nce);
1087
1088 /* nce_delete() requires caller holds... */
1089 nce_refhold(deadman);
1090 nce_delete(deadman); /* Bumps down ill_mcast_nces. */
1091
1092 /* Link the dead ones singly, still refheld... */
1093 list_insert_tail(graveyard, deadman);
1094 reclaimed++;
1095 }
1096
1097 if (reclaimed != reclaim_count) {
1098 /* We didn't have enough to reach reclaim_count. Why?!? */
1099 DTRACE_PROBE3(ill__mcast__nce__reclaim__mismatch, ill_t *, ill,
1100 uint_t, reclaimed, uint_t, reclaim_count);
1101
1102 /* In case for some REALLY weird reason we found none! */
1103 too_many = (reclaimed == 0);
1104 } else {
1105 too_many = B_FALSE;
1106 }
1107
1108 return (too_many);
1109}
1110
1111static void
1112ncec_mcast_reap_one(ncec_t *ncec, void *arg)
1113{
1114 boolean_t reapit;
1115 ill_t *ill = (ill_t *)arg;
1116
1117 /* Obvious no-lock-needed checks... */
1118 if (ncec == NULL || ncec->ncec_ill != ill ||
1119 (ncec->ncec_flags & NCE_F_MCAST) == 0)
1120 return;
1121
1122 mutex_enter(&ncec->ncec_lock);
1123 /*
1124 * It's refheld by the walk infrastructure. It has one reference for
1125 * being in the ndp_g_hash, and if an nce_t exists, that's one more.
1126 * We want ones without an nce_t, so 2 is the magic number. If it's
1127 * LESS than 2, we have much bigger problems anyway.
1128 */
1129 ASSERT(ncec->ncec_refcnt >= 2);
1130 reapit = (ncec->ncec_refcnt == 2);
1131 mutex_exit(&ncec->ncec_lock);
1132
1133 if (reapit) {
1134 IP_STAT(ill->ill_ipst, ip_nce_mcast_reclaim_deleted);
1135 ncec_delete(ncec);
1136 }
1137}
1138
1139/*
1140 * Attempt to reap stray multicast ncec_t structures left in the wake of
1141 * nce_graveyard_free(). This is a taskq servicing routine, as it's well
1142 * outside any netstack-global locks being held - ndp_g_lock in this case. We
1143 * have a reference hold on the ill, which will prevent any unplumbing races.
1144 */
1145static void
1146ncec_mcast_reap(void *arg)
1147{
1148 ill_t *ill = (ill_t *)arg;
1149
1150 IP_STAT(ill->ill_ipst, ip_nce_mcast_reclaim_calls);
1151 ncec_walk(ill, ncec_mcast_reap_one, ill, ill->ill_ipst);
1152 mutex_enter(&ill->ill_lock);
1153 ill->ill_mcast_ncec_cleanup = B_FALSE;
1154 /*
1155 * Inline a _notr() version of ill_refrele. See nce_graveyard_free()
1156 * below for why.
1157 */
1158 ill->ill_refcnt--;
1159 if (ill->ill_refcnt == 0)
1160 ipif_ill_refrele_tail(ill); /* Drops ill_lock. */
1161 else
1162 mutex_exit(&ill->ill_lock);
1163}
1164
1165/*
1166 * Free a list (including handling an empty list or NULL list) of
1167 * reference-held NCEs that were reaped from a nce_too_many_mcast()
1168 * call. Separate because the caller must have dropped ndp_g_lock first.
1169 *
1170 * This also schedules a taskq task to unlink underlying NCECs from the
1171 * ndp_g_hash, which are protected by ndp_g_lock.
1172 */
1173static void
1174nce_graveyard_free(list_t *graveyard)
1175{
1176 nce_t *deadman, *current;
1177 ill_t *ill;
1178 boolean_t doit;
1179
1180 if (graveyard == NULL)
1181 return;
1182
1183 current = list_head(graveyard);
1184 if (current == NULL) {
1185 list_destroy(graveyard);
1186 return;
1187 }
1188
1189 ill = current->nce_ill;
1190 /*
1191 * Normally one should ill_refhold(ill) here. There's no _notr()
1192 * variant like there is for ire_t, dce_t, or even ncec_t, but this is
1193 * the ONLY case that'll break the mh_trace that IP debugging uses for
1194 * reference counts (i.e. they assume same thread releases as
1195 * holds). Instead, we inline ill_refhold() here. We must do the same
1196 * in the release done by the ncec_mcast_reap() above.
1197 */
1198 mutex_enter(&ill->ill_lock);
1199 ill->ill_refcnt++;
1200 mutex_exit(&ill->ill_lock);
1201
1202 while (current != NULL) {
1203 ASSERT3P(ill, ==, current->nce_ill);
1204 deadman = current;
1205 current = list_next(graveyard, deadman);
1206 list_remove(graveyard, deadman);
1207 ASSERT3U((deadman->nce_common->ncec_flags & NCE_F_MCAST), !=,
1208 0);
1209 nce_refrele(deadman);
1210 }
1211 list_destroy(graveyard);
1212
1213 mutex_enter(&ill->ill_lock);
1214 if (ill->ill_mcast_ncec_cleanup)
1215 doit = B_FALSE;
1216 else {
1217 ill->ill_mcast_ncec_cleanup = B_TRUE;
1218 doit = B_TRUE;
1219 }
1220 mutex_exit(&ill->ill_lock);
1221 if (!doit || taskq_dispatch(system_taskq, ncec_mcast_reap,
Toomas Soomefc8ae2e2019-03-20 17:29:38 +02001222 ill, TQ_NOSLEEP) == TASKQID_INVALID) {
Dan McDonald42c5ef02019-02-22 14:42:52 -05001223 mutex_enter(&ill->ill_lock);
1224 if (doit) {
1225 IP_STAT(ill->ill_ipst, ip_nce_mcast_reclaim_tqfail);
1226 ill->ill_mcast_ncec_cleanup = B_FALSE;
1227 }
1228 /* There's no _notr() for ill_refrele(), so inline it here. */
1229 ill->ill_refcnt--;
1230 if (ill->ill_refcnt == 0)
1231 ipif_ill_refrele_tail(ill); /* Drops ill_lock */
1232 else
1233 mutex_exit(&ill->ill_lock);
1234 }
1235}
1236
1237/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001238 * For each interface an entry is added for the unspecified multicast group.
1239 * Here that mapping is used to form the multicast cache entry for a particular
1240 * multicast destination.
1241 */
1242static int
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001243nce_set_multicast_v6(ill_t *ill, const in6_addr_t *dst,
1244 uint16_t flags, nce_t **newnce)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001245{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001246 uchar_t *hw_addr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001247 int err = 0;
dh155122f4b3ec62007-01-19 16:59:38 -08001248 ip_stack_t *ipst = ill->ill_ipst;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001249 nce_t *nce;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001250
1251 ASSERT(ill != NULL);
sangeetac793af92006-08-11 05:59:29 -07001252 ASSERT(ill->ill_isv6);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001253 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst)));
1254
dh155122f4b3ec62007-01-19 16:59:38 -08001255 mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001256 nce = nce_lookup_addr(ill, dst);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001257 if (nce != NULL) {
dh155122f4b3ec62007-01-19 16:59:38 -08001258 mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001259 goto done;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001260 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001261 if (ill->ill_net_type == IRE_IF_RESOLVER) {
1262 /*
1263 * For IRE_IF_RESOLVER a hardware mapping can be
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001264 * generated.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001265 */
1266 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
1267 if (hw_addr == NULL) {
dh155122f4b3ec62007-01-19 16:59:38 -08001268 mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001269 return (ENOMEM);
1270 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001271 ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr);
1272 } else {
Erik Nordmark0e0e37a2009-11-17 11:42:22 -08001273 /* No hw_addr is needed for IRE_IF_NORESOLVER. */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001274 hw_addr = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001275 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001276 ASSERT((flags & NCE_F_MCAST) != 0);
1277 ASSERT((flags & NCE_F_NONUD) != 0);
1278 /* nce_state will be computed by nce_add_common() */
1279 err = nce_add_v6(ill, hw_addr, ill->ill_phys_addr_length, dst, flags,
1280 ND_UNCHANGED, &nce);
dh155122f4b3ec62007-01-19 16:59:38 -08001281 mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001282 if (err == 0)
Dan McDonald42c5ef02019-02-22 14:42:52 -05001283 err = (nce != NULL) ? nce_add_v6_postprocess(nce) : ENOMEM;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001284 if (hw_addr != NULL)
1285 kmem_free(hw_addr, ill->ill_nd_lla_len);
1286 if (err != 0) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001287 ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001288 return (err);
1289 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001290done:
1291 ASSERT(nce->nce_common->ncec_state == ND_REACHABLE);
1292 if (newnce != NULL)
1293 *newnce = nce;
1294 else
1295 nce_refrele(nce);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001296 return (0);
1297}
1298
1299/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001300 * Return the link layer address, and any flags of a ncec.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001301 */
1302int
1303ndp_query(ill_t *ill, struct lif_nd_req *lnr)
1304{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001305 ncec_t *ncec;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001306 in6_addr_t *addr;
1307 sin6_t *sin6;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001308
sangeetac793af92006-08-11 05:59:29 -07001309 ASSERT(ill != NULL && ill->ill_isv6);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001310 sin6 = (sin6_t *)&lnr->lnr_addr;
1311 addr = &sin6->sin6_addr;
1312
meeme11c3f42009-01-06 20:16:25 -05001313 /*
1314 * NOTE: if the ill is an IPMP interface, then match against the whole
1315 * illgrp. This e.g. allows in.ndpd to retrieve the link layer
1316 * addresses for the data addresses on an IPMP interface even though
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001317 * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill.
meeme11c3f42009-01-06 20:16:25 -05001318 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001319 ncec = ncec_lookup_illgrp_v6(ill, addr);
1320 if (ncec == NULL)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001321 return (ESRCH);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001322 /* If no link layer address is available yet, return ESRCH */
1323 if (!NCE_ISREACHABLE(ncec)) {
1324 ncec_refrele(ncec);
Sowmini Varadhanee07f6e2009-05-19 15:07:44 -04001325 return (ESRCH);
1326 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001327 lnr->lnr_hdw_len = ill->ill_phys_addr_length;
1328 bcopy(ncec->ncec_lladdr, (uchar_t *)&lnr->lnr_hdw_addr,
1329 lnr->lnr_hdw_len);
1330 if (ncec->ncec_flags & NCE_F_ISROUTER)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001331 lnr->lnr_flags = NDF_ISROUTER_ON;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001332 if (ncec->ncec_flags & NCE_F_ANYCAST)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001333 lnr->lnr_flags |= NDF_ANYCAST_ON;
Cody Peter Melloe7df7762015-09-03 22:04:07 +00001334 if (ncec->ncec_flags & NCE_F_STATIC)
1335 lnr->lnr_flags |= NDF_STATIC;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001336 ncec_refrele(ncec);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001337 return (0);
1338}
1339
1340/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001341 * Finish setting up the Enable/Disable multicast for the driver.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001342 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001343mblk_t *
1344ndp_mcastreq(ill_t *ill, const in6_addr_t *v6group, uint32_t hw_addr_len,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001345 uint32_t hw_addr_offset, mblk_t *mp)
1346{
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001347 uchar_t *hw_addr;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001348 ipaddr_t v4group;
1349 uchar_t *addr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001350
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001351 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001352 if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1353 IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1354
1355 ASSERT(CLASSD(v4group));
1356 ASSERT(!(ill->ill_isv6));
1357
1358 addr = (uchar_t *)&v4group;
1359 } else {
1360 ASSERT(IN6_IS_ADDR_MULTICAST(v6group));
1361 ASSERT(ill->ill_isv6);
1362
1363 addr = (uchar_t *)v6group;
1364 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001365 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001366 if (hw_addr == NULL) {
1367 ip0dbg(("ndp_mcastreq NULL hw_addr\n"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001368 freemsg(mp);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001369 return (NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001370 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001371
1372 ip_mcast_mapping(ill, addr, hw_addr);
1373 return (mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001374}
1375
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001376void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001377ip_ndp_resolve(ncec_t *ncec)
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001378{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001379 in_addr_t sender4 = INADDR_ANY;
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001380 in6_addr_t sender6 = ipv6_all_zeros;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001381 ill_t *src_ill;
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001382 uint32_t ms;
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001383
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001384 src_ill = nce_resolve_src(ncec, &sender6);
1385 if (src_ill == NULL) {
1386 /* Make sure we try again later */
1387 ms = ncec->ncec_ill->ill_reachable_retrans_time;
1388 nce_restart_timer(ncec, (clock_t)ms);
1389 return;
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001390 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001391 if (ncec->ncec_ipversion == IPV4_VERSION)
1392 IN6_V4MAPPED_TO_IPADDR(&sender6, sender4);
1393 mutex_enter(&ncec->ncec_lock);
1394 if (ncec->ncec_ipversion == IPV6_VERSION)
1395 ms = ndp_solicit(ncec, sender6, src_ill);
1396 else
1397 ms = arp_request(ncec, sender4, src_ill);
1398 mutex_exit(&ncec->ncec_lock);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001399 if (ms == 0) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001400 if (ncec->ncec_state != ND_REACHABLE) {
1401 if (ncec->ncec_ipversion == IPV6_VERSION)
1402 ndp_resolv_failed(ncec);
1403 else
1404 arp_resolv_failed(ncec);
1405 ASSERT((ncec->ncec_flags & NCE_F_STATIC) == 0);
1406 nce_make_unreachable(ncec);
1407 ncec_delete(ncec);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001408 }
1409 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001410 nce_restart_timer(ncec, (clock_t)ms);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001411 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001412done:
1413 ill_refrele(src_ill);
Sowmini Varadhan4b7cbb42009-03-26 09:12:05 -04001414}
1415
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001416/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001417 * Send an IPv6 neighbor solicitation.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001418 * Returns number of milliseconds after which we should either rexmit or abort.
1419 * Return of zero means we should abort.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001420 * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt.
1421 * The optional source address is used as a hint to ndp_solicit for
1422 * which source to use in the packet.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001423 *
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001424 * NOTE: This routine drops ncec_lock (and later reacquires it) when sending
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001425 * the packet.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001426 */
1427uint32_t
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001428ndp_solicit(ncec_t *ncec, in6_addr_t src, ill_t *ill)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001429{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001430 in6_addr_t dst;
1431 boolean_t dropped = B_FALSE;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001432
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001433 ASSERT(ncec->ncec_ipversion == IPV6_VERSION);
1434 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001435
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001436 if (ncec->ncec_rcnt == 0)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001437 return (0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001438
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001439 dst = ncec->ncec_addr;
1440 ncec->ncec_rcnt--;
1441 mutex_exit(&ncec->ncec_lock);
1442 dropped = ndp_xmit(ill, ND_NEIGHBOR_SOLICIT, ill->ill_phys_addr,
1443 ill->ill_phys_addr_length, &src, &dst, 0);
1444 mutex_enter(&ncec->ncec_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001445 if (dropped)
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001446 ncec->ncec_rcnt++;
1447 return (ncec->ncec_ill->ill_reachable_retrans_time);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001448}
1449
carlsonj69bb4bb2006-08-14 14:10:48 -07001450/*
1451 * Attempt to recover an address on an interface that's been marked as a
1452 * duplicate. Because NCEs are destroyed when the interface goes down, there's
1453 * no easy way to just probe the address and have the right thing happen if
1454 * it's no longer in use. Instead, we just bring it up normally and allow the
1455 * regular interface start-up logic to probe for a remaining duplicate and take
1456 * us back down if necessary.
1457 * Neither DHCP nor temporary addresses arrive here; they're excluded by
1458 * ip_ndp_excl.
1459 */
1460/* ARGSUSED */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001461void
1462ip_addr_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
carlsonj69bb4bb2006-08-14 14:10:48 -07001463{
1464 ill_t *ill = rq->q_ptr;
1465 ipif_t *ipif;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001466 in6_addr_t *addr6 = (in6_addr_t *)mp->b_rptr;
1467 in_addr_t *addr4 = (in_addr_t *)mp->b_rptr;
1468 boolean_t addr_equal;
carlsonj69bb4bb2006-08-14 14:10:48 -07001469
1470 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
1471 /*
1472 * We do not support recovery of proxy ARP'd interfaces,
1473 * because the system lacks a complete proxy ARP mechanism.
1474 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001475 if (ill->ill_isv6) {
1476 addr_equal = IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
1477 addr6);
1478 } else {
1479 addr_equal = (ipif->ipif_lcl_addr == *addr4);
carlsonj69bb4bb2006-08-14 14:10:48 -07001480 }
1481
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001482 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || !addr_equal)
1483 continue;
1484
carlsonj69bb4bb2006-08-14 14:10:48 -07001485 /*
carlsonj686c2682006-12-22 09:18:37 -08001486 * If we have already recovered or if the interface is going
1487 * away, then ignore.
carlsonj69bb4bb2006-08-14 14:10:48 -07001488 */
1489 mutex_enter(&ill->ill_lock);
carlsonj686c2682006-12-22 09:18:37 -08001490 if (!(ipif->ipif_flags & IPIF_DUPLICATE) ||
meeme11c3f42009-01-06 20:16:25 -05001491 (ipif->ipif_state_flags & IPIF_CONDEMNED)) {
carlsonj69bb4bb2006-08-14 14:10:48 -07001492 mutex_exit(&ill->ill_lock);
1493 continue;
1494 }
1495
1496 ipif->ipif_flags &= ~IPIF_DUPLICATE;
1497 ill->ill_ipif_dup_count--;
1498 mutex_exit(&ill->ill_lock);
1499 ipif->ipif_was_dup = B_TRUE;
1500
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001501 if (ill->ill_isv6) {
1502 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS);
1503 (void) ipif_up_done_v6(ipif);
1504 } else {
1505 VERIFY(ipif_arp_up(ipif, Res_act_initial, B_TRUE) !=
1506 EINPROGRESS);
1507 (void) ipif_up_done(ipif);
1508 }
carlsonj69bb4bb2006-08-14 14:10:48 -07001509 }
1510 freeb(mp);
1511}
1512
1513/*
1514 * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
1515 * As long as someone else holds the address, the interface will stay down.
1516 * When that conflict goes away, the interface is brought back up. This is
1517 * done so that accidental shutdowns of addresses aren't made permanent. Your
1518 * server will recover from a failure.
1519 *
1520 * For DHCP and temporary addresses, recovery is not done in the kernel.
1521 * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
1522 *
1523 * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
1524 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001525void
1526ipif_dup_recovery(void *arg)
carlsonj69bb4bb2006-08-14 14:10:48 -07001527{
1528 ipif_t *ipif = arg;
1529
1530 ipif->ipif_recovery_id = 0;
1531 if (!(ipif->ipif_flags & IPIF_DUPLICATE))
1532 return;
1533
carlsonj686c2682006-12-22 09:18:37 -08001534 /*
1535 * No lock, because this is just an optimization.
1536 */
meeme11c3f42009-01-06 20:16:25 -05001537 if (ipif->ipif_state_flags & IPIF_CONDEMNED)
carlsonj686c2682006-12-22 09:18:37 -08001538 return;
1539
carlsonj69bb4bb2006-08-14 14:10:48 -07001540 /* If the link is down, we'll retry this later */
1541 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
1542 return;
1543
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001544 ipif_do_recovery(ipif);
carlsonj69bb4bb2006-08-14 14:10:48 -07001545}
1546
1547/*
1548 * Perform interface recovery by forcing the duplicate interfaces up and
1549 * allowing the system to determine which ones should stay up.
1550 *
1551 * Called both by recovery timer expiry and link-up notification.
1552 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001553void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001554ipif_do_recovery(ipif_t *ipif)
carlsonj69bb4bb2006-08-14 14:10:48 -07001555{
1556 ill_t *ill = ipif->ipif_ill;
1557 mblk_t *mp;
dh155122f4b3ec62007-01-19 16:59:38 -08001558 ip_stack_t *ipst = ill->ill_ipst;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001559 size_t mp_size;
carlsonj69bb4bb2006-08-14 14:10:48 -07001560
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001561 if (ipif->ipif_isv6)
1562 mp_size = sizeof (ipif->ipif_v6lcl_addr);
1563 else
1564 mp_size = sizeof (ipif->ipif_lcl_addr);
1565 mp = allocb(mp_size, BPRI_MED);
carlsonj69bb4bb2006-08-14 14:10:48 -07001566 if (mp == NULL) {
carlsonj686c2682006-12-22 09:18:37 -08001567 mutex_enter(&ill->ill_lock);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001568 if (ipst->ips_ip_dup_recovery > 0 &&
1569 ipif->ipif_recovery_id == 0 &&
meeme11c3f42009-01-06 20:16:25 -05001570 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001571 ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
dh155122f4b3ec62007-01-19 16:59:38 -08001572 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
carlsonj686c2682006-12-22 09:18:37 -08001573 }
1574 mutex_exit(&ill->ill_lock);
carlsonj69bb4bb2006-08-14 14:10:48 -07001575 } else {
meeme11c3f42009-01-06 20:16:25 -05001576 /*
1577 * A recovery timer may still be running if we got here from
1578 * ill_restart_dad(); cancel that timer.
1579 */
1580 if (ipif->ipif_recovery_id != 0)
1581 (void) untimeout(ipif->ipif_recovery_id);
1582 ipif->ipif_recovery_id = 0;
1583
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001584 if (ipif->ipif_isv6) {
1585 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
1586 sizeof (ipif->ipif_v6lcl_addr));
1587 } else {
1588 bcopy(&ipif->ipif_lcl_addr, mp->b_rptr,
1589 sizeof (ipif->ipif_lcl_addr));
1590 }
carlsonj69bb4bb2006-08-14 14:10:48 -07001591 ill_refhold(ill);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001592 qwriter_ip(ill, ill->ill_rq, mp, ip_addr_recover, NEW_OP,
meem8df01f72007-05-30 16:02:35 -07001593 B_FALSE);
carlsonj69bb4bb2006-08-14 14:10:48 -07001594 }
1595}
1596
1597/*
meeme11c3f42009-01-06 20:16:25 -05001598 * Find the MAC and IP addresses in an NA/NS message.
carlsonj69bb4bb2006-08-14 14:10:48 -07001599 */
meeme11c3f42009-01-06 20:16:25 -05001600static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001601ip_ndp_find_addresses(mblk_t *mp, ip_recv_attr_t *ira, ill_t *ill,
1602 in6_addr_t *targp, uchar_t **haddr, uint_t *haddrlenp)
carlsonj69bb4bb2006-08-14 14:10:48 -07001603{
meeme11c3f42009-01-06 20:16:25 -05001604 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
meeme11c3f42009-01-06 20:16:25 -05001605 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
carlsonj69bb4bb2006-08-14 14:10:48 -07001606 uchar_t *addr;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001607 int alen;
carlsonj69bb4bb2006-08-14 14:10:48 -07001608
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001609 /* icmp_inbound_v6 ensures this */
1610 ASSERT(ira->ira_flags & IRAF_L2SRC_SET);
carlsonj69bb4bb2006-08-14 14:10:48 -07001611
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001612 addr = ira->ira_l2src;
1613 alen = ill->ill_phys_addr_length;
carlsonj69bb4bb2006-08-14 14:10:48 -07001614 if (alen > 0) {
1615 *haddr = addr;
meeme11c3f42009-01-06 20:16:25 -05001616 *haddrlenp = alen;
carlsonj69bb4bb2006-08-14 14:10:48 -07001617 } else {
1618 *haddr = NULL;
meeme11c3f42009-01-06 20:16:25 -05001619 *haddrlenp = 0;
carlsonj69bb4bb2006-08-14 14:10:48 -07001620 }
meeme11c3f42009-01-06 20:16:25 -05001621
1622 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */
1623 *targp = ns->nd_ns_target;
carlsonj69bb4bb2006-08-14 14:10:48 -07001624}
1625
1626/*
1627 * This is for exclusive changes due to NDP duplicate address detection
1628 * failure.
1629 */
1630/* ARGSUSED */
1631static void
1632ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
1633{
1634 ill_t *ill = rq->q_ptr;
1635 ipif_t *ipif;
meeme11c3f42009-01-06 20:16:25 -05001636 uchar_t *haddr;
1637 uint_t haddrlen;
dh155122f4b3ec62007-01-19 16:59:38 -08001638 ip_stack_t *ipst = ill->ill_ipst;
meeme11c3f42009-01-06 20:16:25 -05001639 in6_addr_t targ;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001640 ip_recv_attr_t iras;
1641 mblk_t *attrmp;
carlsonj69bb4bb2006-08-14 14:10:48 -07001642
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001643 attrmp = mp;
1644 mp = mp->b_cont;
1645 attrmp->b_cont = NULL;
1646 if (!ip_recv_attr_from_mblk(attrmp, &iras)) {
1647 /* The ill or ip_stack_t disappeared on us */
1648 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1649 ip_drop_input("ip_recv_attr_from_mblk", mp, ill);
1650 freemsg(mp);
1651 ira_cleanup(&iras, B_TRUE);
1652 return;
carlsonj69bb4bb2006-08-14 14:10:48 -07001653 }
meeme11c3f42009-01-06 20:16:25 -05001654
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001655 ASSERT(ill == iras.ira_rill);
1656
1657 ip_ndp_find_addresses(mp, &iras, ill, &targ, &haddr, &haddrlen);
meeme11c3f42009-01-06 20:16:25 -05001658 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) {
carlsonj69bb4bb2006-08-14 14:10:48 -07001659 /*
meeme11c3f42009-01-06 20:16:25 -05001660 * Ignore conflicts generated by misbehaving switches that
1661 * just reflect our own messages back to us. For IPMP, we may
1662 * see reflections across any ill in the illgrp.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001663 *
1664 * RFC2462 and revisions tried to detect both the case
1665 * when a statically configured IPv6 address is a duplicate,
1666 * and the case when the L2 address itself is a duplicate. The
1667 * later is important because, with stateles address autoconf,
1668 * if the L2 address is a duplicate, the resulting IPv6
1669 * address(es) would also be duplicates. We rely on DAD of the
1670 * IPv6 address itself to detect the latter case.
carlsonj69bb4bb2006-08-14 14:10:48 -07001671 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001672 /* For an under ill_grp can change under lock */
1673 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
meeme11c3f42009-01-06 20:16:25 -05001674 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
1675 IS_UNDER_IPMP(ill) &&
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001676 ipmp_illgrp_find_ill(ill->ill_grp, haddr,
1677 haddrlen) != NULL) {
1678 rw_exit(&ipst->ips_ill_g_lock);
meeme11c3f42009-01-06 20:16:25 -05001679 goto ignore_conflict;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001680 }
1681 rw_exit(&ipst->ips_ill_g_lock);
meeme11c3f42009-01-06 20:16:25 -05001682 }
1683
1684 /*
1685 * Look up the appropriate ipif.
1686 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001687 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, ipst);
meeme11c3f42009-01-06 20:16:25 -05001688 if (ipif == NULL)
1689 goto ignore_conflict;
1690
1691 /* Reload the ill to match the ipif */
1692 ill = ipif->ipif_ill;
1693
1694 /* If it's already duplicate or ineligible, then don't do anything. */
1695 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
1696 ipif_refrele(ipif);
carlsonj69bb4bb2006-08-14 14:10:48 -07001697 goto ignore_conflict;
1698 }
meem98e93c22007-08-31 12:48:28 -07001699
meeme11c3f42009-01-06 20:16:25 -05001700 /*
1701 * If this is a failure during duplicate recovery, then don't
1702 * complain. It may take a long time to recover.
1703 */
1704 if (!ipif->ipif_was_dup) {
1705 char ibuf[LIFNAMSIZ];
1706 char hbuf[MAC_STR_LEN];
1707 char sbuf[INET6_ADDRSTRLEN];
carlsonj69bb4bb2006-08-14 14:10:48 -07001708
meeme11c3f42009-01-06 20:16:25 -05001709 ipif_get_name(ipif, ibuf, sizeof (ibuf));
1710 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
1711 " disabled", ibuf,
1712 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
1713 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)));
carlsonj69bb4bb2006-08-14 14:10:48 -07001714 }
meeme11c3f42009-01-06 20:16:25 -05001715 mutex_enter(&ill->ill_lock);
1716 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
1717 ipif->ipif_flags |= IPIF_DUPLICATE;
1718 ill->ill_ipif_dup_count++;
1719 mutex_exit(&ill->ill_lock);
1720 (void) ipif_down(ipif, NULL, NULL);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001721 (void) ipif_down_tail(ipif);
meeme11c3f42009-01-06 20:16:25 -05001722 mutex_enter(&ill->ill_lock);
1723 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
1724 ill->ill_net_type == IRE_IF_RESOLVER &&
1725 !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1726 ipst->ips_ip_dup_recovery > 0) {
1727 ASSERT(ipif->ipif_recovery_id == 0);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001728 ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
meeme11c3f42009-01-06 20:16:25 -05001729 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
1730 }
1731 mutex_exit(&ill->ill_lock);
1732 ipif_refrele(ipif);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001733
carlsonj69bb4bb2006-08-14 14:10:48 -07001734ignore_conflict:
carlsonj69bb4bb2006-08-14 14:10:48 -07001735 freemsg(mp);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001736 ira_cleanup(&iras, B_TRUE);
carlsonj69bb4bb2006-08-14 14:10:48 -07001737}
1738
1739/*
1740 * Handle failure by tearing down the ipifs with the specified address. Note
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001741 * that tearing down the ipif also means deleting the ncec through ipif_down, so
1742 * it's not possible to do recovery by just restarting the ncec timer. Instead,
carlsonj69bb4bb2006-08-14 14:10:48 -07001743 * we start a timer on the ipif.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001744 * Caller has to free mp;
carlsonj69bb4bb2006-08-14 14:10:48 -07001745 */
1746static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001747ndp_failure(mblk_t *mp, ip_recv_attr_t *ira)
carlsonj69bb4bb2006-08-14 14:10:48 -07001748{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001749 const uchar_t *haddr;
1750 ill_t *ill = ira->ira_rill;
1751
1752 /*
1753 * Ignore conflicts generated by misbehaving switches that just
1754 * reflect our own messages back to us.
1755 */
1756
1757 /* icmp_inbound_v6 ensures this */
1758 ASSERT(ira->ira_flags & IRAF_L2SRC_SET);
1759 haddr = ira->ira_l2src;
1760 if (haddr != NULL &&
1761 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) {
1762 return;
1763 }
1764
carlsonj69bb4bb2006-08-14 14:10:48 -07001765 if ((mp = copymsg(mp)) != NULL) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001766 mblk_t *attrmp;
1767
1768 attrmp = ip_recv_attr_to_mblk(ira);
1769 if (attrmp == NULL) {
1770 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1771 ip_drop_input("ipIfStatsInDiscards", mp, ill);
carlsonj69bb4bb2006-08-14 14:10:48 -07001772 freemsg(mp);
1773 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001774 ASSERT(attrmp->b_cont == NULL);
1775 attrmp->b_cont = mp;
1776 mp = attrmp;
carlsonj69bb4bb2006-08-14 14:10:48 -07001777 ill_refhold(ill);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001778 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_excl, NEW_OP,
meem8df01f72007-05-30 16:02:35 -07001779 B_FALSE);
carlsonj69bb4bb2006-08-14 14:10:48 -07001780 }
1781 }
carlsonj69bb4bb2006-08-14 14:10:48 -07001782}
1783
1784/*
1785 * Handle a discovered conflict: some other system is advertising that it owns
1786 * one of our IP addresses. We need to defend ourselves, or just shut down the
1787 * interface.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001788 *
1789 * Handles both IPv4 and IPv6
carlsonj69bb4bb2006-08-14 14:10:48 -07001790 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001791boolean_t
1792ip_nce_conflict(mblk_t *mp, ip_recv_attr_t *ira, ncec_t *ncec)
carlsonj69bb4bb2006-08-14 14:10:48 -07001793{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001794 ipif_t *ipif;
1795 clock_t now;
1796 uint_t maxdefense;
1797 uint_t defs;
1798 ill_t *ill = ira->ira_ill;
1799 ip_stack_t *ipst = ill->ill_ipst;
1800 uint32_t elapsed;
1801 boolean_t isv6 = ill->ill_isv6;
1802 ipaddr_t ncec_addr;
carlsonj69bb4bb2006-08-14 14:10:48 -07001803
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001804 if (isv6) {
1805 ipif = ipif_lookup_addr_v6(&ncec->ncec_addr, ill, ALL_ZONES,
1806 ipst);
1807 } else {
1808 if (arp_no_defense) {
1809 /*
1810 * Yes, there is a conflict, but no, we do not
1811 * defend ourself.
1812 */
1813 return (B_TRUE);
1814 }
1815 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr);
1816 ipif = ipif_lookup_addr(ncec_addr, ill, ALL_ZONES,
1817 ipst);
1818 }
carlsonj69bb4bb2006-08-14 14:10:48 -07001819 if (ipif == NULL)
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001820 return (B_FALSE);
meeme11c3f42009-01-06 20:16:25 -05001821
carlsonj69bb4bb2006-08-14 14:10:48 -07001822 /*
1823 * First, figure out if this address is disposable.
1824 */
1825 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))
dh155122f4b3ec62007-01-19 16:59:38 -08001826 maxdefense = ipst->ips_ip_max_temp_defend;
carlsonj69bb4bb2006-08-14 14:10:48 -07001827 else
dh155122f4b3ec62007-01-19 16:59:38 -08001828 maxdefense = ipst->ips_ip_max_defend;
carlsonj69bb4bb2006-08-14 14:10:48 -07001829
1830 /*
1831 * Now figure out how many times we've defended ourselves. Ignore
1832 * defenses that happened long in the past.
1833 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001834 now = ddi_get_lbolt();
1835 elapsed = (drv_hztousec(now - ncec->ncec_last_time_defended))/1000000;
1836 mutex_enter(&ncec->ncec_lock);
1837 if ((defs = ncec->ncec_defense_count) > 0 &&
1838 elapsed > ipst->ips_ip_defend_interval) {
1839 /*
1840 * ip_defend_interval has elapsed.
1841 * reset the defense count.
1842 */
1843 ncec->ncec_defense_count = defs = 0;
carlsonj69bb4bb2006-08-14 14:10:48 -07001844 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001845 ncec->ncec_defense_count++;
1846 ncec->ncec_last_time_defended = now;
1847 mutex_exit(&ncec->ncec_lock);
carlsonj69bb4bb2006-08-14 14:10:48 -07001848 ipif_refrele(ipif);
1849
1850 /*
1851 * If we've defended ourselves too many times already, then give up and
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001852 * tear down the interface(s) using this address.
1853 * Otherwise, caller has to defend by sending out an announce.
carlsonj69bb4bb2006-08-14 14:10:48 -07001854 */
1855 if (defs >= maxdefense) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001856 if (isv6)
1857 ndp_failure(mp, ira);
1858 else
1859 arp_failure(mp, ira);
carlsonj69bb4bb2006-08-14 14:10:48 -07001860 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001861 return (B_TRUE); /* caller must defend this address */
carlsonj69bb4bb2006-08-14 14:10:48 -07001862 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001863 return (B_FALSE);
carlsonj69bb4bb2006-08-14 14:10:48 -07001864}
1865
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001866/*
1867 * Handle reception of Neighbor Solicitation messages.
1868 */
carlsonj69bb4bb2006-08-14 14:10:48 -07001869static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001870ndp_input_solicit(mblk_t *mp, ip_recv_attr_t *ira)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001871{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001872 ill_t *ill = ira->ira_ill, *under_ill;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001873 nd_neighbor_solicit_t *ns;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001874 uint32_t hlen = ill->ill_phys_addr_length;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001875 uchar_t *haddr = NULL;
1876 icmp6_t *icmp_nd;
1877 ip6_t *ip6h;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001878 ncec_t *our_ncec = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001879 in6_addr_t target;
1880 in6_addr_t src;
1881 int len;
1882 int flag = 0;
1883 nd_opt_hdr_t *opt = NULL;
1884 boolean_t bad_solicit = B_FALSE;
1885 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001886 boolean_t need_ill_refrele = B_FALSE;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001887
1888 ip6h = (ip6_t *)mp->b_rptr;
1889 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
1890 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
1891 src = ip6h->ip6_src;
1892 ns = (nd_neighbor_solicit_t *)icmp_nd;
1893 target = ns->nd_ns_target;
Sowmini Varadhan01685f92010-03-29 21:44:09 -04001894 if (IN6_IS_ADDR_MULTICAST(&target) || IN6_IS_ADDR_V4MAPPED(&target) ||
1895 IN6_IS_ADDR_LOOPBACK(&target)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001896 if (ip_debug > 2) {
1897 /* ip1dbg */
Sowmini Varadhan01685f92010-03-29 21:44:09 -04001898 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
1899 AF_INET6, &target);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001900 }
1901 bad_solicit = B_TRUE;
1902 goto done;
1903 }