blob: 5b0f3fd565699006747bcfc9f385c3fd73a43d9d [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
ja97890fecf4ec2006-02-07 02:27:51 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
meem1f197382010-04-03 14:24:23 -040022 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
Sebastien Roy299625c2013-08-01 17:47:00 -080024 * Copyright (c) 2013 by Delphix. All rights reserved.
Bryan Cantrill854956c2016-09-24 08:43:10 -070025 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
Dan McDonald843ea702014-01-19 11:47:59 -050026 * Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved.
Andy Fiddamand66189a2023-03-09 13:19:57 +000027 * Copyright 2023 Oxide Computer Company
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070028 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070029
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070030/*
31 * This file contains the interface control functions for IP.
32 */
33
34#include <sys/types.h>
35#include <sys/stream.h>
36#include <sys/dlpi.h>
37#include <sys/stropts.h>
38#include <sys/strsun.h>
39#include <sys/sysmacros.h>
Erik Nordmarkde8c4a12009-02-12 08:42:06 -080040#include <sys/strsubr.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070041#include <sys/strlog.h>
42#include <sys/ddi.h>
43#include <sys/sunddi.h>
44#include <sys/cmn_err.h>
45#include <sys/kstat.h>
46#include <sys/debug.h>
47#include <sys/zone.h>
dh155122f4b3ec62007-01-19 16:59:38 -080048#include <sys/sunldi.h>
49#include <sys/file.h>
carlsonj6a8288c2007-09-11 04:26:06 -070050#include <sys/bitmap.h>
Eric Chengda14ceb2008-12-04 18:16:10 -080051#include <sys/cpuvar.h>
52#include <sys/time.h>
meeme11c3f42009-01-06 20:16:25 -050053#include <sys/ctype.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070054#include <sys/kmem.h>
55#include <sys/systm.h>
56#include <sys/param.h>
57#include <sys/socket.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070058#include <sys/isa_defs.h>
59#include <net/if.h>
60#include <net/if_arp.h>
61#include <net/if_types.h>
62#include <net/if_dl.h>
63#include <net/route.h>
64#include <sys/sockio.h>
65#include <netinet/in.h>
66#include <netinet/ip6.h>
67#include <netinet/icmp6.h>
68#include <netinet/igmp_var.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070069#include <sys/policy.h>
70#include <sys/ethernet.h>
Eric Chengda14ceb2008-12-04 18:16:10 -080071#include <sys/callb.h>
meeme11c3f42009-01-06 20:16:25 -050072#include <sys/md5.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070073
74#include <inet/common.h> /* for various inet/mi.h and inet/nd.h needs */
75#include <inet/mi.h>
76#include <inet/nd.h>
Girish Moodalbail6e91bba2010-03-26 17:53:11 -040077#include <inet/tunables.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070078#include <inet/arp.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080079#include <inet/ip_arp.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070080#include <inet/mib2.h>
81#include <inet/ip.h>
82#include <inet/ip6.h>
83#include <inet/ip6_asp.h>
84#include <inet/tcp.h>
85#include <inet/ip_multi.h>
86#include <inet/ip_ire.h>
sangeetac793af92006-08-11 05:59:29 -070087#include <inet/ip_ftable.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070088#include <inet/ip_rts.h>
89#include <inet/ip_ndp.h>
90#include <inet/ip_if.h>
masputraff550d02005-10-22 22:50:14 -070091#include <inet/ip_impl.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070092#include <inet/sctp_ip.h>
dr146992381a2a92006-10-20 16:37:58 -070093#include <inet/ip_netinfo.h>
Sangeeta Misradbed73c2009-11-03 23:15:19 -080094#include <inet/ilb_ip.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070095
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070096#include <netinet/igmp.h>
97#include <inet/ip_listutils.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070098#include <inet/ipclassifier.h>
Eric Chengda14ceb2008-12-04 18:16:10 -080099#include <sys/mac_client.h>
100#include <sys/dld.h>
Sowmini Varadhan550b6e42010-07-01 17:10:52 -0400101#include <sys/mac_flow.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700102
103#include <sys/systeminfo.h>
104#include <sys/bootconf.h>
105
jpk45916cd2006-03-24 12:29:20 -0800106#include <sys/tsol/tndb.h>
107#include <sys/tsol/tnet.h>
108
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400109#include <inet/rawip_impl.h> /* needed for icmp_stack_t */
110#include <inet/udp_impl.h> /* needed for udp_stack_t */
111
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700112/* The character which tells where the ill_name ends */
113#define IPIF_SEPARATOR_CHAR ':'
114
115/* IP ioctl function table entry */
116typedef struct ipft_s {
117 int ipft_cmd;
118 pfi_t ipft_pfi;
119 int ipft_min_size;
120 int ipft_flags;
121} ipft_t;
122#define IPFT_F_NO_REPLY 0x1 /* IP ioctl does not expect any reply */
123#define IPFT_F_SELF_REPLY 0x2 /* ioctl callee does the ioctl reply */
124
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700125static int nd_ill_forward_get(queue_t *, mblk_t *, caddr_t, cred_t *);
126static int nd_ill_forward_set(queue_t *q, mblk_t *mp,
127 char *value, caddr_t cp, cred_t *ioc_cr);
128
sowmini968d2fd2008-03-21 06:08:04 -0700129static boolean_t ill_is_quiescent(ill_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700130static boolean_t ip_addr_ok_v4(ipaddr_t addr, ipaddr_t subnet_mask);
131static ip_m_t *ip_m_lookup(t_uscalar_t mac_type);
132static int ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q,
133 mblk_t *mp, boolean_t need_up);
134static int ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q,
135 mblk_t *mp, boolean_t need_up);
136static int ip_sioctl_slifzone_tail(ipif_t *ipif, zoneid_t zoneid,
137 queue_t *q, mblk_t *mp, boolean_t need_up);
138static int ip_sioctl_flags_tail(ipif_t *ipif, uint64_t flags, queue_t *q,
meem79242222008-07-29 18:39:05 -0700139 mblk_t *mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700140static int ip_sioctl_netmask_tail(ipif_t *ipif, sin_t *sin, queue_t *q,
141 mblk_t *mp);
142static int ip_sioctl_subnet_tail(ipif_t *ipif, in6_addr_t, in6_addr_t,
143 queue_t *q, mblk_t *mp, boolean_t need_up);
meeme6ed03f2007-07-31 16:42:16 -0700144static int ip_sioctl_plink_ipmod(ipsq_t *ipsq, queue_t *q, mblk_t *mp,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800145 int ioccmd, struct linkblk *li);
dh155122f4b3ec62007-01-19 16:59:38 -0800146static ipaddr_t ip_subnet_mask(ipaddr_t addr, ipif_t **, ip_stack_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700147static void ip_wput_ioctl(queue_t *q, mblk_t *mp);
148static void ipsq_flush(ill_t *ill);
meem8df01f72007-05-30 16:02:35 -0700149
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700150static int ip_sioctl_token_tail(ipif_t *ipif, sin6_t *sin6, int addrlen,
151 queue_t *q, mblk_t *mp, boolean_t need_up);
152static void ipsq_delete(ipsq_t *);
153
154static ipif_t *ipif_allocate(ill_t *ill, int id, uint_t ire_type,
Girish Moodalbaile899e592009-12-01 11:45:15 -0500155 boolean_t initialize, boolean_t insert, int *errorp);
meeme6ed03f2007-07-31 16:42:16 -0700156static ire_t **ipif_create_bcast_ires(ipif_t *ipif, ire_t **irep);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800157static void ipif_delete_bcast_ires(ipif_t *ipif);
158static int ipif_add_ires_v4(ipif_t *, boolean_t);
kcpoon48de1bd2007-06-13 04:53:06 -0700159static boolean_t ipif_comp_multi(ipif_t *old_ipif, ipif_t *new_ipif,
160 boolean_t isv6);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700161static int ipif_logical_down(ipif_t *ipif, queue_t *q, mblk_t *mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700162static void ipif_free(ipif_t *ipif);
163static void ipif_free_tail(ipif_t *ipif);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700164static void ipif_set_default(ipif_t *ipif);
165static int ipif_set_values(queue_t *q, mblk_t *mp,
166 char *interf_name, uint_t *ppa);
167static int ipif_set_values_tail(ill_t *ill, ipif_t *ipif, mblk_t *mp,
168 queue_t *q);
169static ipif_t *ipif_lookup_on_name(char *name, size_t namelen,
170 boolean_t do_alloc, boolean_t *exists, boolean_t isv6, zoneid_t zoneid,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800171 ip_stack_t *);
Erik Nordmarkd6699632010-06-06 01:55:19 -0700172static ipif_t *ipif_lookup_on_name_async(char *name, size_t namelen,
173 boolean_t isv6, zoneid_t zoneid, queue_t *q, mblk_t *mp, ipsq_func_t func,
174 int *error, ip_stack_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700175
176static int ill_alloc_ppa(ill_if_t *, ill_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700177static void ill_delete_interface_type(ill_if_t *);
178static int ill_dl_up(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q);
carlsonj69bb4bb2006-08-14 14:10:48 -0700179static void ill_dl_down(ill_t *ill);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700180static void ill_down(ill_t *ill);
Cathy Zhou1cb875a2009-11-17 09:17:48 -0800181static void ill_down_ipifs(ill_t *, boolean_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700182static void ill_free_mib(ill_t *ill);
183static void ill_glist_delete(ill_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700184static void ill_phyint_reinit(ill_t *ill);
185static void ill_set_nce_router_flags(ill_t *, boolean_t);
meemb051ecf2006-12-27 21:32:46 -0800186static void ill_set_phys_addr_tail(ipsq_t *, queue_t *, mblk_t *, void *);
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700187static void ill_replumb_tail(ipsq_t *, queue_t *, mblk_t *, void *);
188
meeme11c3f42009-01-06 20:16:25 -0500189static ip_v6intfid_func_t ip_ether_v6intfid, ip_ib_v6intfid;
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400190static ip_v6intfid_func_t ip_ipv4_v6intfid, ip_ipv6_v6intfid;
meeme11c3f42009-01-06 20:16:25 -0500191static ip_v6intfid_func_t ip_ipmp_v6intfid, ip_nodef_v6intfid;
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400192static ip_v6intfid_func_t ip_ipv4_v6destintfid, ip_ipv6_v6destintfid;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800193static ip_v4mapinfo_func_t ip_ether_v4_mapping;
194static ip_v6mapinfo_func_t ip_ether_v6_mapping;
195static ip_v4mapinfo_func_t ip_ib_v4_mapping;
196static ip_v6mapinfo_func_t ip_ib_v6_mapping;
197static ip_v4mapinfo_func_t ip_mbcast_mapping;
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300198static void ip_cgtp_bcast_add(ire_t *, ip_stack_t *);
199static void ip_cgtp_bcast_delete(ire_t *, ip_stack_t *);
meeme11c3f42009-01-06 20:16:25 -0500200static void phyint_free(phyint_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700201
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800202static void ill_capability_dispatch(ill_t *, mblk_t *, dl_capability_sub_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700203static void ill_capability_id_ack(ill_t *, mblk_t *, dl_capability_sub_t *);
Cathy Zhou1cb875a2009-11-17 09:17:48 -0800204static void ill_capability_vrrp_ack(ill_t *, mblk_t *, dl_capability_sub_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700205static void ill_capability_hcksum_ack(ill_t *, mblk_t *, dl_capability_sub_t *);
Eric Chengda14ceb2008-12-04 18:16:10 -0800206static void ill_capability_hcksum_reset_fill(ill_t *, mblk_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700207static void ill_capability_zerocopy_ack(ill_t *, mblk_t *,
208 dl_capability_sub_t *);
Eric Chengda14ceb2008-12-04 18:16:10 -0800209static void ill_capability_zerocopy_reset_fill(ill_t *, mblk_t *);
Eric Chengda14ceb2008-12-04 18:16:10 -0800210static void ill_capability_dld_reset_fill(ill_t *, mblk_t *);
211static void ill_capability_dld_ack(ill_t *, mblk_t *,
212 dl_capability_sub_t *);
213static void ill_capability_dld_enable(ill_t *);
214static void ill_capability_ack_thr(void *);
215static void ill_capability_lso_enable(ill_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700216
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700217static ill_t *ill_prev_usesrc(ill_t *);
218static int ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t);
219static void ill_disband_usesrc_group(ill_t *);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800220static void ip_sioctl_garp_reply(mblk_t *, ill_t *, void *, int);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700221
carlsonj6a8288c2007-09-11 04:26:06 -0700222#ifdef DEBUG
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800223static void ill_trace_cleanup(const ill_t *);
224static void ipif_trace_cleanup(const ipif_t *);
carlsonj6a8288c2007-09-11 04:26:06 -0700225#endif
226
Thirumalai Srinivasan51048572010-01-16 16:04:55 -0800227static void ill_dlpi_clear_deferred(ill_t *ill);
228
Joshua M. Clulowf949c382015-04-30 13:52:59 -0700229static void phyint_flags_init(phyint_t *, t_uscalar_t);
230
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700231/*
232 * if we go over the memory footprint limit more than once in this msec
233 * interval, we'll start pruning aggressively.
234 */
235int ip_min_frag_prune_time = 0;
236
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700237static ipft_t ip_ioctl_ftbl[] = {
238 { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 },
239 { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t),
240 IPFT_F_NO_REPLY },
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700241 { IP_IOC_RTS_REQUEST, ip_rts_request, 0, IPFT_F_SELF_REPLY },
242 { 0 }
243};
244
245/* Simple ICMP IP Header Template */
246static ipha_t icmp_ipha = {
247 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
248};
249
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700250static uchar_t ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
251
Philip Kirkb127ac42008-11-06 06:47:54 -0500252static ip_m_t ip_m_tbl[] = {
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400253 { DL_ETHER, IFT_ETHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800254 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_ether_v6intfid,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700255 ip_nodef_v6intfid },
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400256 { DL_CSMACD, IFT_ISO88023, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800257 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700258 ip_nodef_v6intfid },
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400259 { DL_TPB, IFT_ISO88024, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800260 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700261 ip_nodef_v6intfid },
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400262 { DL_TPR, IFT_ISO88025, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800263 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400264 ip_nodef_v6intfid },
265 { DL_FDDI, IFT_FDDI, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800266 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_ether_v6intfid,
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400267 ip_nodef_v6intfid },
268 { DL_IB, IFT_IB, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800269 ip_ib_v4_mapping, ip_ib_v6_mapping, ip_ib_v6intfid,
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400270 ip_nodef_v6intfid },
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800271 { DL_IPV4, IFT_IPV4, IPPROTO_ENCAP, IPPROTO_IPV6,
272 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
273 ip_ipv4_v6destintfid },
274 { DL_IPV6, IFT_IPV6, IPPROTO_ENCAP, IPPROTO_IPV6,
275 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv6_v6intfid,
276 ip_ipv6_v6destintfid },
277 { DL_6TO4, IFT_6TO4, IPPROTO_ENCAP, IPPROTO_IPV6,
278 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
279 ip_nodef_v6intfid },
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400280 { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
281 NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid },
282 { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
283 NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid },
284 { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800285 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700286 ip_nodef_v6intfid }
287};
288
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700289char ipif_loopback_name[] = "lo0";
Kacheong Poon721fffe2010-02-24 07:49:29 -0800290
291/* These are used by all IP network modules. */
292sin6_t sin6_null; /* Zero address for quick clears */
293sin_t sin_null; /* Zero address for quick clears */
dh155122f4b3ec62007-01-19 16:59:38 -0800294
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700295/* When set search for unused ipif_seqid */
296static ipif_t ipif_zero;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700297
298/*
299 * ppa arena is created after these many
300 * interfaces have been plumbed.
301 */
dh155122f4b3ec62007-01-19 16:59:38 -0800302uint_t ill_no_arena = 12; /* Setable in /etc/system */
ethindra49df4562005-10-17 20:01:40 -0700303
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700304/*
apersson31736642006-12-19 17:33:00 -0800305 * Allocate per-interface mibs.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700306 * Returns true if ok. False otherwise.
307 * ipsq may not yet be allocated (loopback case ).
308 */
309static boolean_t
310ill_allocate_mibs(ill_t *ill)
311{
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700312 /* Already allocated? */
apersson31736642006-12-19 17:33:00 -0800313 if (ill->ill_ip_mib != NULL) {
314 if (ill->ill_isv6)
315 ASSERT(ill->ill_icmp6_mib != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700316 return (B_TRUE);
317 }
318
apersson31736642006-12-19 17:33:00 -0800319 ill->ill_ip_mib = kmem_zalloc(sizeof (*ill->ill_ip_mib),
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700320 KM_NOSLEEP);
apersson31736642006-12-19 17:33:00 -0800321 if (ill->ill_ip_mib == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700322 return (B_FALSE);
323 }
apersson31736642006-12-19 17:33:00 -0800324
325 /* Setup static information */
326 SET_MIB(ill->ill_ip_mib->ipIfStatsEntrySize,
327 sizeof (mib2_ipIfStatsEntry_t));
328 if (ill->ill_isv6) {
329 ill->ill_ip_mib->ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv6;
330 SET_MIB(ill->ill_ip_mib->ipIfStatsAddrEntrySize,
331 sizeof (mib2_ipv6AddrEntry_t));
332 SET_MIB(ill->ill_ip_mib->ipIfStatsRouteEntrySize,
333 sizeof (mib2_ipv6RouteEntry_t));
334 SET_MIB(ill->ill_ip_mib->ipIfStatsNetToMediaEntrySize,
335 sizeof (mib2_ipv6NetToMediaEntry_t));
336 SET_MIB(ill->ill_ip_mib->ipIfStatsMemberEntrySize,
337 sizeof (ipv6_member_t));
338 SET_MIB(ill->ill_ip_mib->ipIfStatsGroupSourceEntrySize,
339 sizeof (ipv6_grpsrc_t));
340 } else {
341 ill->ill_ip_mib->ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv4;
342 SET_MIB(ill->ill_ip_mib->ipIfStatsAddrEntrySize,
343 sizeof (mib2_ipAddrEntry_t));
344 SET_MIB(ill->ill_ip_mib->ipIfStatsRouteEntrySize,
345 sizeof (mib2_ipRouteEntry_t));
346 SET_MIB(ill->ill_ip_mib->ipIfStatsNetToMediaEntrySize,
347 sizeof (mib2_ipNetToMediaEntry_t));
348 SET_MIB(ill->ill_ip_mib->ipIfStatsMemberEntrySize,
349 sizeof (ip_member_t));
350 SET_MIB(ill->ill_ip_mib->ipIfStatsGroupSourceEntrySize,
351 sizeof (ip_grpsrc_t));
352
353 /*
354 * For a v4 ill, we are done at this point, because per ill
355 * icmp mibs are only used for v6.
356 */
357 return (B_TRUE);
358 }
359
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700360 ill->ill_icmp6_mib = kmem_zalloc(sizeof (*ill->ill_icmp6_mib),
361 KM_NOSLEEP);
362 if (ill->ill_icmp6_mib == NULL) {
apersson31736642006-12-19 17:33:00 -0800363 kmem_free(ill->ill_ip_mib, sizeof (*ill->ill_ip_mib));
364 ill->ill_ip_mib = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700365 return (B_FALSE);
366 }
apersson31736642006-12-19 17:33:00 -0800367 /* static icmp info */
368 ill->ill_icmp6_mib->ipv6IfIcmpEntrySize =
369 sizeof (mib2_ipv6IfIcmpEntry_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700370 /*
apersson31736642006-12-19 17:33:00 -0800371 * The ipIfStatsIfindex and ipv6IfIcmpIndex will be assigned later
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700372 * after the phyint merge occurs in ipif_set_values -> ill_glist_insert
373 * -> ill_phyint_reinit
374 */
375 return (B_TRUE);
376}
377
378/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700379 * Completely vaporize a lower level tap and all associated interfaces.
380 * ill_delete is called only out of ip_close when the device control
381 * stream is being closed.
382 */
383void
384ill_delete(ill_t *ill)
385{
386 ipif_t *ipif;
387 ill_t *prev_ill;
dh155122f4b3ec62007-01-19 16:59:38 -0800388 ip_stack_t *ipst = ill->ill_ipst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700389
390 /*
391 * ill_delete may be forcibly entering the ipsq. The previous
392 * ioctl may not have completed and may need to be aborted.
393 * ipsq_flush takes care of it. If we don't need to enter the
394 * the ipsq forcibly, the 2nd invocation of ipsq_flush in
395 * ill_delete_tail is sufficient.
396 */
397 ipsq_flush(ill);
398
399 /*
400 * Nuke all interfaces. ipif_free will take down the interface,
401 * remove it from the list, and free the data structure.
402 * Walk down the ipif list and remove the logical interfaces
403 * first before removing the main ipif. We can't unplumb
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800404 * zeroth interface first in the case of IPv6 as update_conn_ill
405 * -> ip_ll_multireq de-references ill_ipif for checking
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700406 * POINTOPOINT.
407 *
408 * If ill_ipif was not properly initialized (i.e low on memory),
409 * then no interfaces to clean up. In this case just clean up the
410 * ill.
411 */
412 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
413 ipif_free(ipif);
414
415 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800416 * clean out all the nce_t entries that depend on this
417 * ill for the ill_phys_addr.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700418 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800419 nce_flush(ill, B_TRUE);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700420
421 /* Clean up msgs on pending upcalls for mrouted */
422 reset_mrt_ill(ill);
423
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800424 update_conn_ill(ill, ipst);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700425
426 /*
Philip Kirkb127ac42008-11-06 06:47:54 -0500427 * Remove multicast references added as a result of calls to
428 * ip_join_allmulti().
429 */
430 ip_purge_allmulti(ill);
431
432 /*
meeme11c3f42009-01-06 20:16:25 -0500433 * If the ill being deleted is under IPMP, boot it out of the illgrp.
434 */
435 if (IS_UNDER_IPMP(ill))
436 ipmp_ill_leave_illgrp(ill);
437
438 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700439 * ill_down will arrange to blow off any IRE's dependent on this
440 * ILL, and shut down fragmentation reassembly.
441 */
442 ill_down(ill);
443
444 /* Let SCTP know, so that it can remove this from its list. */
445 sctp_update_ill(ill, SCTP_ILL_REMOVE);
446
447 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800448 * Walk all CONNs that can have a reference on an ire or nce for this
449 * ill (we actually walk all that now have stale references).
450 */
451 ipcl_walk(conn_ixa_cleanup, (void *)B_TRUE, ipst);
452
453 /* With IPv6 we have dce_ifindex. Cleanup for neatness */
454 if (ill->ill_isv6)
455 dce_cleanup(ill->ill_phyint->phyint_ifindex, ipst);
456
457 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700458 * If an address on this ILL is being used as a source address then
459 * clear out the pointers in other ILLs that point to this ILL.
460 */
dh155122f4b3ec62007-01-19 16:59:38 -0800461 rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_WRITER);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700462 if (ill->ill_usesrc_grp_next != NULL) {
463 if (ill->ill_usesrc_ifindex == 0) { /* usesrc ILL ? */
464 ill_disband_usesrc_group(ill);
465 } else { /* consumer of the usesrc ILL */
466 prev_ill = ill_prev_usesrc(ill);
467 prev_ill->ill_usesrc_grp_next =
468 ill->ill_usesrc_grp_next;
469 }
470 }
dh155122f4b3ec62007-01-19 16:59:38 -0800471 rw_exit(&ipst->ips_ill_g_usesrc_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700472}
473
carlsonj69bb4bb2006-08-14 14:10:48 -0700474static void
475ipif_non_duplicate(ipif_t *ipif)
476{
477 ill_t *ill = ipif->ipif_ill;
478 mutex_enter(&ill->ill_lock);
479 if (ipif->ipif_flags & IPIF_DUPLICATE) {
480 ipif->ipif_flags &= ~IPIF_DUPLICATE;
481 ASSERT(ill->ill_ipif_dup_count > 0);
482 ill->ill_ipif_dup_count--;
483 }
484 mutex_exit(&ill->ill_lock);
485}
486
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700487/*
488 * ill_delete_tail is called from ip_modclose after all references
489 * to the closing ill are gone. The wait is done in ip_modclose
490 */
491void
492ill_delete_tail(ill_t *ill)
493{
494 mblk_t **mpp;
495 ipif_t *ipif;
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400496 ip_stack_t *ipst = ill->ill_ipst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700497
carlsonj69bb4bb2006-08-14 14:10:48 -0700498 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
499 ipif_non_duplicate(ipif);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800500 (void) ipif_down_tail(ipif);
carlsonj69bb4bb2006-08-14 14:10:48 -0700501 }
502
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800503 ASSERT(ill->ill_ipif_dup_count == 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700504
505 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700506 * If polling capability is enabled (which signifies direct
507 * upcall into IP and driver has ill saved as a handle),
508 * we need to make sure that unbind has completed before we
509 * let the ill disappear and driver no longer has any reference
510 * to this ill.
511 */
512 mutex_enter(&ill->ill_lock);
krgopia5e428f2006-03-06 14:57:25 -0800513 while (ill->ill_state_flags & ILL_DL_UNBIND_IN_PROGRESS)
514 cv_wait(&ill->ill_cv, &ill->ill_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700515 mutex_exit(&ill->ill_lock);
Eric Chengda14ceb2008-12-04 18:16:10 -0800516 ASSERT(!(ill->ill_capabilities &
517 (ILL_CAPAB_DLD | ILL_CAPAB_DLD_POLL | ILL_CAPAB_DLD_DIRECT)));
krgopia5e428f2006-03-06 14:57:25 -0800518
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700519 if (ill->ill_net_type != IRE_LOOPBACK)
520 qprocsoff(ill->ill_rq);
521
522 /*
523 * We do an ipsq_flush once again now. New messages could have
524 * landed up from below (M_ERROR or M_HANGUP). Similarly ioctls
525 * could also have landed up if an ioctl thread had looked up
526 * the ill before we set the ILL_CONDEMNED flag, but not yet
527 * enqueued the ioctl when we did the ipsq_flush last time.
528 */
529 ipsq_flush(ill);
530
531 /*
532 * Free capabilities.
533 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700534 if (ill->ill_hcksum_capab != NULL) {
535 kmem_free(ill->ill_hcksum_capab, sizeof (ill_hcksum_capab_t));
536 ill->ill_hcksum_capab = NULL;
537 }
538
539 if (ill->ill_zerocopy_capab != NULL) {
540 kmem_free(ill->ill_zerocopy_capab,
541 sizeof (ill_zerocopy_capab_t));
542 ill->ill_zerocopy_capab = NULL;
543 }
544
yl15005183476012006-11-13 20:44:19 -0800545 if (ill->ill_lso_capab != NULL) {
546 kmem_free(ill->ill_lso_capab, sizeof (ill_lso_capab_t));
547 ill->ill_lso_capab = NULL;
548 }
549
Eric Chengda14ceb2008-12-04 18:16:10 -0800550 if (ill->ill_dld_capab != NULL) {
551 kmem_free(ill->ill_dld_capab, sizeof (ill_dld_capab_t));
552 ill->ill_dld_capab = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700553 }
554
Sowmini Varadhan550b6e42010-07-01 17:10:52 -0400555 /* Clean up ill_allowed_ips* related state */
556 if (ill->ill_allowed_ips != NULL) {
557 ASSERT(ill->ill_allowed_ips_cnt > 0);
558 kmem_free(ill->ill_allowed_ips,
559 ill->ill_allowed_ips_cnt * sizeof (in6_addr_t));
560 ill->ill_allowed_ips = NULL;
561 ill->ill_allowed_ips_cnt = 0;
562 }
563
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700564 while (ill->ill_ipif != NULL)
565 ipif_free_tail(ill->ill_ipif);
566
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700567 /*
568 * We have removed all references to ilm from conn and the ones joined
569 * within the kernel.
570 *
571 * We don't walk conns, mrts and ires because
572 *
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800573 * 1) update_conn_ill and reset_mrt_ill cleans up conns and mrts.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700574 * 2) ill_down ->ill_downi walks all the ires and cleans up
575 * ill references.
576 */
meeme11c3f42009-01-06 20:16:25 -0500577
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700578 /*
meeme11c3f42009-01-06 20:16:25 -0500579 * If this ill is an IPMP meta-interface, blow away the illgrp. This
580 * is safe to do because the illgrp has already been unlinked from the
581 * group by I_PUNLINK, and thus SIOCSLIFGROUPNAME cannot find it.
582 */
583 if (IS_IPMP(ill)) {
584 ipmp_illgrp_destroy(ill->ill_grp);
585 ill->ill_grp = NULL;
586 }
587
Ravi Chandra Nallan7f125a52010-07-13 18:17:30 +0530588 if (ill->ill_mphysaddr_list != NULL) {
589 multiphysaddr_t *mpa, *tmpa;
590
591 mpa = ill->ill_mphysaddr_list;
592 ill->ill_mphysaddr_list = NULL;
593 while (mpa) {
594 tmpa = mpa->mpa_next;
595 kmem_free(mpa, sizeof (*mpa));
596 mpa = tmpa;
597 }
598 }
meeme11c3f42009-01-06 20:16:25 -0500599 /*
600 * Take us out of the list of ILLs. ill_glist_delete -> phyint_free
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700601 * could free the phyint. No more reference to the phyint after this
602 * point.
603 */
604 (void) ill_glist_delete(ill);
605
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700606 if (ill->ill_frag_ptr != NULL) {
607 uint_t count;
608
609 for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) {
610 mutex_destroy(&ill->ill_frag_hash_tbl[count].ipfb_lock);
611 }
612 mi_free(ill->ill_frag_ptr);
613 ill->ill_frag_ptr = NULL;
614 ill->ill_frag_hash_tbl = NULL;
615 }
meemb051ecf2006-12-27 21:32:46 -0800616
617 freemsg(ill->ill_nd_lla_mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700618 /* Free all retained control messages. */
619 mpp = &ill->ill_first_mp_to_free;
620 do {
621 while (mpp[0]) {
622 mblk_t *mp;
623 mblk_t *mp1;
624
625 mp = mpp[0];
626 mpp[0] = mp->b_next;
627 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) {
628 mp1->b_next = NULL;
629 mp1->b_prev = NULL;
630 }
631 freemsg(mp);
632 }
633 } while (mpp++ != &ill->ill_last_mp_to_free);
634
635 ill_free_mib(ill);
carlsonj6a8288c2007-09-11 04:26:06 -0700636
637#ifdef DEBUG
638 ill_trace_cleanup(ill);
639#endif
640
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800641 /* The default multicast interface might have changed */
642 ire_increment_multicast_generation(ipst, ill->ill_isv6);
643
dh155122f4b3ec62007-01-19 16:59:38 -0800644 /* Drop refcnt here */
645 netstack_rele(ill->ill_ipst->ips_netstack);
646 ill->ill_ipst = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700647}
648
649static void
650ill_free_mib(ill_t *ill)
651{
dh155122f4b3ec62007-01-19 16:59:38 -0800652 ip_stack_t *ipst = ill->ill_ipst;
653
apersson31736642006-12-19 17:33:00 -0800654 /*
655 * MIB statistics must not be lost, so when an interface
656 * goes away the counter values will be added to the global
657 * MIBs.
658 */
659 if (ill->ill_ip_mib != NULL) {
dh155122f4b3ec62007-01-19 16:59:38 -0800660 if (ill->ill_isv6) {
661 ip_mib2_add_ip_stats(&ipst->ips_ip6_mib,
662 ill->ill_ip_mib);
663 } else {
664 ip_mib2_add_ip_stats(&ipst->ips_ip_mib,
665 ill->ill_ip_mib);
666 }
apersson31736642006-12-19 17:33:00 -0800667
668 kmem_free(ill->ill_ip_mib, sizeof (*ill->ill_ip_mib));
669 ill->ill_ip_mib = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700670 }
671 if (ill->ill_icmp6_mib != NULL) {
dh155122f4b3ec62007-01-19 16:59:38 -0800672 ip_mib2_add_icmp6_stats(&ipst->ips_icmp6_mib,
673 ill->ill_icmp6_mib);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700674 kmem_free(ill->ill_icmp6_mib, sizeof (*ill->ill_icmp6_mib));
675 ill->ill_icmp6_mib = NULL;
676 }
677}
678
679/*
680 * Concatenate together a physical address and a sap.
681 *
682 * Sap_lengths are interpreted as follows:
683 * sap_length == 0 ==> no sap
684 * sap_length > 0 ==> sap is at the head of the dlpi address
685 * sap_length < 0 ==> sap is at the tail of the dlpi address
686 */
687static void
688ill_dlur_copy_address(uchar_t *phys_src, uint_t phys_length,
689 t_scalar_t sap_src, t_scalar_t sap_length, uchar_t *dst)
690{
691 uint16_t sap_addr = (uint16_t)sap_src;
692
693 if (sap_length == 0) {
694 if (phys_src == NULL)
695 bzero(dst, phys_length);
696 else
697 bcopy(phys_src, dst, phys_length);
698 } else if (sap_length < 0) {
699 if (phys_src == NULL)
700 bzero(dst, phys_length);
701 else
702 bcopy(phys_src, dst, phys_length);
703 bcopy(&sap_addr, (char *)dst + phys_length, sizeof (sap_addr));
704 } else {
705 bcopy(&sap_addr, dst, sizeof (sap_addr));
706 if (phys_src == NULL)
707 bzero((char *)dst + sap_length, phys_length);
708 else
709 bcopy(phys_src, (char *)dst + sap_length, phys_length);
710 }
711}
712
713/*
714 * Generate a dl_unitdata_req mblk for the device and address given.
715 * addr_length is the length of the physical portion of the address.
716 * If addr is NULL include an all zero address of the specified length.
717 * TRUE? In any case, addr_length is taken to be the entire length of the
718 * dlpi address, including the absolute value of sap_length.
719 */
720mblk_t *
721ill_dlur_gen(uchar_t *addr, uint_t addr_length, t_uscalar_t sap,
Bryan Cantrill854956c2016-09-24 08:43:10 -0700722 t_scalar_t sap_length)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700723{
724 dl_unitdata_req_t *dlur;
725 mblk_t *mp;
726 t_scalar_t abs_sap_length; /* absolute value */
727
728 abs_sap_length = ABS(sap_length);
729 mp = ip_dlpi_alloc(sizeof (*dlur) + addr_length + abs_sap_length,
kcpoon48de1bd2007-06-13 04:53:06 -0700730 DL_UNITDATA_REQ);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700731 if (mp == NULL)
732 return (NULL);
733 dlur = (dl_unitdata_req_t *)mp->b_rptr;
734 /* HACK: accomodate incompatible DLPI drivers */
735 if (addr_length == 8)
736 addr_length = 6;
737 dlur->dl_dest_addr_length = addr_length + abs_sap_length;
738 dlur->dl_dest_addr_offset = sizeof (*dlur);
739 dlur->dl_priority.dl_min = 0;
740 dlur->dl_priority.dl_max = 0;
741 ill_dlur_copy_address(addr, addr_length, sap, sap_length,
742 (uchar_t *)&dlur[1]);
743 return (mp);
744}
745
746/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700747 * Add the pending mp to the list. There can be only 1 pending mp
748 * in the list. Any exclusive ioctl that needs to wait for a response
749 * from another module or driver needs to use this function to set
meeme11c3f42009-01-06 20:16:25 -0500750 * the ipx_pending_mp to the ioctl mblk and wait for the response from
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700751 * the other module/driver. This is also used while waiting for the
752 * ipif/ill/ire refcnts to drop to zero in bringing down an ipif.
753 */
754boolean_t
755ipsq_pending_mp_add(conn_t *connp, ipif_t *ipif, queue_t *q, mblk_t *add_mp,
756 int waitfor)
757{
meeme11c3f42009-01-06 20:16:25 -0500758 ipxop_t *ipx = ipif->ipif_ill->ill_phyint->phyint_ipsq->ipsq_xop;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700759
760 ASSERT(IAM_WRITER_IPIF(ipif));
761 ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
762 ASSERT((add_mp->b_next == NULL) && (add_mp->b_prev == NULL));
meeme11c3f42009-01-06 20:16:25 -0500763 ASSERT(ipx->ipx_pending_mp == NULL);
meemb051ecf2006-12-27 21:32:46 -0800764 /*
765 * The caller may be using a different ipif than the one passed into
766 * ipsq_current_start() (e.g., suppose an ioctl that came in on the V4
767 * ill needs to wait for the V6 ill to quiesce). So we can't ASSERT
meeme11c3f42009-01-06 20:16:25 -0500768 * that `ipx_current_ipif == ipif'.
meemb051ecf2006-12-27 21:32:46 -0800769 */
meeme11c3f42009-01-06 20:16:25 -0500770 ASSERT(ipx->ipx_current_ipif != NULL);
meemb051ecf2006-12-27 21:32:46 -0800771
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700772 /*
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400773 * M_IOCDATA from ioctls, M_ERROR/M_HANGUP/M_PROTO/M_PCPROTO from the
774 * driver.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700775 */
Sebastien Roy2b24ab62009-09-22 22:04:45 -0400776 ASSERT((DB_TYPE(add_mp) == M_IOCDATA) || (DB_TYPE(add_mp) == M_ERROR) ||
777 (DB_TYPE(add_mp) == M_HANGUP) || (DB_TYPE(add_mp) == M_PROTO) ||
778 (DB_TYPE(add_mp) == M_PCPROTO));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700779
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700780 if (connp != NULL) {
781 ASSERT(MUTEX_HELD(&connp->conn_lock));
782 /*
783 * Return error if the conn has started closing. The conn
784 * could have finished cleaning up the pending mp list,
785 * If so we should not add another mp to the list negating
786 * the cleanup.
787 */
788 if (connp->conn_state_flags & CONN_CLOSING)
789 return (B_FALSE);
790 }
meeme11c3f42009-01-06 20:16:25 -0500791 mutex_enter(&ipx->ipx_lock);
792 ipx->ipx_pending_ipif = ipif;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700793 /*
794 * Note down the queue in b_queue. This will be returned by
795 * ipsq_pending_mp_get. Caller will then use these values to restart
796 * the processing
797 */
798 add_mp->b_next = NULL;
799 add_mp->b_queue = q;
meeme11c3f42009-01-06 20:16:25 -0500800 ipx->ipx_pending_mp = add_mp;
801 ipx->ipx_waitfor = waitfor;
802 mutex_exit(&ipx->ipx_lock);
meemb051ecf2006-12-27 21:32:46 -0800803
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700804 if (connp != NULL)
805 connp->conn_oper_pending_ill = ipif->ipif_ill;
meeme11c3f42009-01-06 20:16:25 -0500806
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700807 return (B_TRUE);
808}
809
810/*
meeme11c3f42009-01-06 20:16:25 -0500811 * Retrieve the ipx_pending_mp and return it. There can be only 1 mp
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700812 * queued in the list.
813 */
814mblk_t *
815ipsq_pending_mp_get(ipsq_t *ipsq, conn_t **connpp)
816{
817 mblk_t *curr = NULL;
meeme11c3f42009-01-06 20:16:25 -0500818 ipxop_t *ipx = ipsq->ipsq_xop;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700819
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700820 *connpp = NULL;
meeme11c3f42009-01-06 20:16:25 -0500821 mutex_enter(&ipx->ipx_lock);
822 if (ipx->ipx_pending_mp == NULL) {
823 mutex_exit(&ipx->ipx_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700824 return (NULL);
825 }
826
827 /* There can be only 1 such excl message */
meeme11c3f42009-01-06 20:16:25 -0500828 curr = ipx->ipx_pending_mp;
829 ASSERT(curr->b_next == NULL);
830 ipx->ipx_pending_ipif = NULL;
831 ipx->ipx_pending_mp = NULL;
832 ipx->ipx_waitfor = 0;
833 mutex_exit(&ipx->ipx_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700834
835 if (CONN_Q(curr->b_queue)) {
836 /*
837 * This mp did a refhold on the conn, at the start of the ioctl.
838 * So we can safely return a pointer to the conn to the caller.
839 */
840 *connpp = Q_TO_CONN(curr->b_queue);
841 } else {
842 *connpp = NULL;
843 }
844 curr->b_next = NULL;
845 curr->b_prev = NULL;
846 return (curr);
847}
848
849/*
meeme11c3f42009-01-06 20:16:25 -0500850 * Cleanup the ioctl mp queued in ipx_pending_mp
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700851 * - Called in the ill_delete path
852 * - Called in the M_ERROR or M_HANGUP path on the ill.
853 * - Called in the conn close path.
Thirumalai Srinivasan51048572010-01-16 16:04:55 -0800854 *
855 * Returns success on finding the pending mblk associated with the ioctl or
856 * exclusive operation in progress, failure otherwise.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700857 */
858boolean_t
859ipsq_pending_mp_cleanup(ill_t *ill, conn_t *connp)
860{
861 mblk_t *mp;
meeme11c3f42009-01-06 20:16:25 -0500862 ipxop_t *ipx;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700863 queue_t *q;
864 ipif_t *ipif;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800865 int cmd;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700866
867 ASSERT(IAM_WRITER_ILL(ill));
meeme11c3f42009-01-06 20:16:25 -0500868 ipx = ill->ill_phyint->phyint_ipsq->ipsq_xop;
869
meeme11c3f42009-01-06 20:16:25 -0500870 mutex_enter(&ipx->ipx_lock);
871 mp = ipx->ipx_pending_mp;
Thirumalai Srinivasanb8d97ac2010-01-19 14:32:48 -0800872 if (connp != NULL) {
873 if (mp == NULL || mp->b_queue != CONNP_TO_WQ(connp)) {
874 /*
875 * Nothing to clean since the conn that is closing
876 * does not have a matching pending mblk in
877 * ipx_pending_mp.
878 */
879 mutex_exit(&ipx->ipx_lock);
880 return (B_FALSE);
881 }
882 } else {
883 /*
884 * A non-zero ill_error signifies we are called in the
885 * M_ERROR or M_HANGUP path and we need to unconditionally
886 * abort any current ioctl and do the corresponding cleanup.
887 * A zero ill_error means we are in the ill_delete path and
888 * we do the cleanup only if there is a pending mp.
889 */
890 if (mp == NULL && ill->ill_error == 0) {
891 mutex_exit(&ipx->ipx_lock);
892 return (B_FALSE);
893 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700894 }
Thirumalai Srinivasan51048572010-01-16 16:04:55 -0800895
meeme11c3f42009-01-06 20:16:25 -0500896 /* Now remove from the ipx_pending_mp */
897 ipx->ipx_pending_mp = NULL;
meeme11c3f42009-01-06 20:16:25 -0500898 ipif = ipx->ipx_pending_ipif;
899 ipx->ipx_pending_ipif = NULL;
900 ipx->ipx_waitfor = 0;
901 ipx->ipx_current_ipif = NULL;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800902 cmd = ipx->ipx_current_ioctl;
meeme11c3f42009-01-06 20:16:25 -0500903 ipx->ipx_current_ioctl = 0;
904 ipx->ipx_current_done = B_TRUE;
905 mutex_exit(&ipx->ipx_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700906
Thirumalai Srinivasan51048572010-01-16 16:04:55 -0800907 if (mp == NULL)
908 return (B_FALSE);
909
910 q = mp->b_queue;
911 mp->b_next = NULL;
912 mp->b_prev = NULL;
913 mp->b_queue = NULL;
914
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700915 if (DB_TYPE(mp) == M_IOCTL || DB_TYPE(mp) == M_IOCDATA) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800916 DTRACE_PROBE4(ipif__ioctl,
917 char *, "ipsq_pending_mp_cleanup",
918 int, cmd, ill_t *, ipif == NULL ? NULL : ipif->ipif_ill,
919 ipif_t *, ipif);
meemb051ecf2006-12-27 21:32:46 -0800920 if (connp == NULL) {
921 ip_ioctl_finish(q, mp, ENXIO, NO_COPYOUT, NULL);
922 } else {
923 ip_ioctl_finish(q, mp, ENXIO, CONN_CLOSE, NULL);
924 mutex_enter(&ipif->ipif_ill->ill_lock);
925 ipif->ipif_state_flags &= ~IPIF_CHANGING;
926 mutex_exit(&ipif->ipif_ill->ill_lock);
927 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700928 } else {
masputraff550d02005-10-22 22:50:14 -0700929 inet_freemsg(mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700930 }
931 return (B_TRUE);
932}
933
934/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700935 * Called in the conn close path and ill delete path
936 */
937static void
938ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp)
939{
940 ipsq_t *ipsq;
941 mblk_t *prev;
942 mblk_t *curr;
943 mblk_t *next;
Brian Ruthven8e0a6f32010-01-19 08:19:05 +0000944 queue_t *wq, *rq = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700945 mblk_t *tmp_list = NULL;
946
947 ASSERT(IAM_WRITER_ILL(ill));
948 if (connp != NULL)
Brian Ruthven838a4ff2009-12-22 00:58:06 +0000949 wq = CONNP_TO_WQ(connp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700950 else
Brian Ruthven838a4ff2009-12-22 00:58:06 +0000951 wq = ill->ill_wq;
Brian Ruthven8e0a6f32010-01-19 08:19:05 +0000952
953 /*
954 * In the case of lo0 being unplumbed, ill_wq will be NULL. Guard
955 * against this here.
956 */
957 if (wq != NULL)
958 rq = RD(wq);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700959
960 ipsq = ill->ill_phyint->phyint_ipsq;
961 /*
962 * Cleanup the ioctl mp's queued in ipsq_xopq_pending_mp if any.
963 * In the case of ioctl from a conn, there can be only 1 mp
Sowmini Varadhan44b099c2010-02-17 22:59:58 -0500964 * queued on the ipsq. If an ill is being unplumbed flush all
965 * the messages.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700966 */
967 mutex_enter(&ipsq->ipsq_lock);
968 for (prev = NULL, curr = ipsq->ipsq_xopq_mphead; curr != NULL;
969 curr = next) {
970 next = curr->b_next;
Sowmini Varadhan44b099c2010-02-17 22:59:58 -0500971 if (connp == NULL ||
972 (curr->b_queue == wq || curr->b_queue == rq)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700973 /* Unlink the mblk from the pending mp list */
974 if (prev != NULL) {
975 prev->b_next = curr->b_next;
976 } else {
977 ASSERT(ipsq->ipsq_xopq_mphead == curr);
978 ipsq->ipsq_xopq_mphead = curr->b_next;
979 }
980 if (ipsq->ipsq_xopq_mptail == curr)
981 ipsq->ipsq_xopq_mptail = prev;
982 /*
983 * Create a temporary list and release the ipsq lock
984 * New elements are added to the head of the tmp_list
985 */
986 curr->b_next = tmp_list;
987 tmp_list = curr;
988 } else {
989 prev = curr;
990 }
991 }
992 mutex_exit(&ipsq->ipsq_lock);
993
994 while (tmp_list != NULL) {
995 curr = tmp_list;
996 tmp_list = curr->b_next;
997 curr->b_next = NULL;
998 curr->b_prev = NULL;
Sowmini Varadhan3e87ae12010-03-23 16:21:48 -0400999 wq = curr->b_queue;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001000 curr->b_queue = NULL;
1001 if (DB_TYPE(curr) == M_IOCTL || DB_TYPE(curr) == M_IOCDATA) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001002 DTRACE_PROBE4(ipif__ioctl,
1003 char *, "ipsq_xopq_mp_cleanup",
1004 int, 0, ill_t *, NULL, ipif_t *, NULL);
Brian Ruthven838a4ff2009-12-22 00:58:06 +00001005 ip_ioctl_finish(wq, curr, ENXIO, connp != NULL ?
meemb051ecf2006-12-27 21:32:46 -08001006 CONN_CLOSE : NO_COPYOUT, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001007 } else {
1008 /*
1009 * IP-MT XXX In the case of TLI/XTI bind / optmgmt
masputraff550d02005-10-22 22:50:14 -07001010 * this can't be just inet_freemsg. we have to
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001011 * restart it otherwise the thread will be stuck.
1012 */
masputraff550d02005-10-22 22:50:14 -07001013 inet_freemsg(curr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001014 }
1015 }
1016}
1017
1018/*
1019 * This conn has started closing. Cleanup any pending ioctl from this conn.
Brian Ruthven838a4ff2009-12-22 00:58:06 +00001020 * STREAMS ensures that there can be at most 1 active ioctl on a stream.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001021 */
1022void
1023conn_ioctl_cleanup(conn_t *connp)
1024{
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001025 ipsq_t *ipsq;
1026 ill_t *ill;
1027 boolean_t refheld;
1028
1029 /*
Brian Ruthven838a4ff2009-12-22 00:58:06 +00001030 * Check for a queued ioctl. If the ioctl has not yet started, the mp
1031 * is pending in the list headed by ipsq_xopq_head. If the ioctl has
1032 * started the mp could be present in ipx_pending_mp. Note that if
1033 * conn_oper_pending_ill is NULL, the ioctl may still be in flight and
1034 * not yet queued anywhere. In this case, the conn close code will wait
1035 * until the conn_ref is dropped. If the stream was a tcp stream, then
1036 * tcp_close will wait first until all ioctls have completed for this
1037 * conn.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001038 */
1039 mutex_enter(&connp->conn_lock);
1040 ill = connp->conn_oper_pending_ill;
1041 if (ill == NULL) {
1042 mutex_exit(&connp->conn_lock);
1043 return;
1044 }
1045
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001046 /*
1047 * We may not be able to refhold the ill if the ill/ipif
1048 * is changing. But we need to make sure that the ill will
1049 * not vanish. So we just bump up the ill_waiter count.
1050 */
1051 refheld = ill_waiter_inc(ill);
1052 mutex_exit(&connp->conn_lock);
1053 if (refheld) {
Eric Chengda14ceb2008-12-04 18:16:10 -08001054 if (ipsq_enter(ill, B_TRUE, NEW_OP)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001055 ill_waiter_dcr(ill);
1056 /*
1057 * Check whether this ioctl has started and is
meeme11c3f42009-01-06 20:16:25 -05001058 * pending. If it is not found there then check
1059 * whether this ioctl has not even started and is in
1060 * the ipsq_xopq list.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001061 */
1062 if (!ipsq_pending_mp_cleanup(ill, connp))
1063 ipsq_xopq_mp_cleanup(ill, connp);
1064 ipsq = ill->ill_phyint->phyint_ipsq;
meem328c7d12008-07-14 18:29:45 -07001065 ipsq_exit(ipsq);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001066 return;
1067 }
1068 }
1069
1070 /*
1071 * The ill is also closing and we could not bump up the
1072 * ill_waiter_count or we could not enter the ipsq. Leave
1073 * the cleanup to ill_delete
1074 */
1075 mutex_enter(&connp->conn_lock);
1076 while (connp->conn_oper_pending_ill != NULL)
1077 cv_wait(&connp->conn_refcv, &connp->conn_lock);
1078 mutex_exit(&connp->conn_lock);
1079 if (refheld)
1080 ill_waiter_dcr(ill);
1081}
1082
1083/*
1084 * ipcl_walk function for cleaning up conn_*_ill fields.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001085 * Note that we leave ixa_multicast_ifindex, conn_incoming_ifindex, and
1086 * conn_bound_if in place. We prefer dropping
1087 * packets instead of sending them out the wrong interface, or accepting
1088 * packets from the wrong ifindex.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001089 */
1090static void
1091conn_cleanup_ill(conn_t *connp, caddr_t arg)
1092{
1093 ill_t *ill = (ill_t *)arg;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001094
1095 mutex_enter(&connp->conn_lock);
meeme704a8f2007-10-30 11:15:43 -07001096 if (connp->conn_dhcpinit_ill == ill) {
1097 connp->conn_dhcpinit_ill = NULL;
1098 ASSERT(ill->ill_dhcpinit != 0);
1099 atomic_dec_32(&ill->ill_dhcpinit);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001100 ill_set_inputfn(ill);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001101 }
1102 mutex_exit(&connp->conn_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001103}
1104
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001105static int
Cathy Zhou5d460ea2009-03-17 20:14:50 -07001106ill_down_ipifs_tail(ill_t *ill)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001107{
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001108 ipif_t *ipif;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001109 int err;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001110
Cathy Zhou5d460ea2009-03-17 20:14:50 -07001111 ASSERT(IAM_WRITER_ILL(ill));
carlsonj69bb4bb2006-08-14 14:10:48 -07001112 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
1113 ipif_non_duplicate(ipif);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001114 /*
1115 * ipif_down_tail will call arp_ll_down on the last ipif
1116 * and typically return EINPROGRESS when the DL_UNBIND is sent.
1117 */
1118 if ((err = ipif_down_tail(ipif)) != 0)
1119 return (err);
carlsonj69bb4bb2006-08-14 14:10:48 -07001120 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001121 return (0);
Cathy Zhou5d460ea2009-03-17 20:14:50 -07001122}
1123
1124/* ARGSUSED */
1125void
1126ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
1127{
1128 ASSERT(IAM_WRITER_IPSQ(ipsq));
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001129 (void) ill_down_ipifs_tail(q->q_ptr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001130 freemsg(mp);
meemb051ecf2006-12-27 21:32:46 -08001131 ipsq_current_finish(ipsq);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001132}
1133
1134/*
1135 * ill_down_start is called when we want to down this ill and bring it up again
1136 * It is called when we receive an M_ERROR / M_HANGUP. In this case we shut down
1137 * all interfaces, but don't tear down any plumbing.
1138 */
1139boolean_t
1140ill_down_start(queue_t *q, mblk_t *mp)
1141{
meemb051ecf2006-12-27 21:32:46 -08001142 ill_t *ill = q->q_ptr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001143 ipif_t *ipif;
1144
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001145 ASSERT(IAM_WRITER_ILL(ill));
Thirumalai Srinivasan51048572010-01-16 16:04:55 -08001146 /*
1147 * It is possible that some ioctl is already in progress while we
1148 * received the M_ERROR / M_HANGUP in which case, we need to abort
Thirumalai Srinivasanb8d97ac2010-01-19 14:32:48 -08001149 * the ioctl. ill_down_start() is being processed as CUR_OP rather
1150 * than as NEW_OP since the cause of the M_ERROR / M_HANGUP may prevent
1151 * the in progress ioctl from ever completing.
1152 *
1153 * The thread that started the ioctl (if any) must have returned,
1154 * since we are now executing as writer. After the 2 calls below,
1155 * the state of the ipsq and the ill would reflect no trace of any
1156 * pending operation. Subsequently if there is any response to the
1157 * original ioctl from the driver, it would be discarded as an
1158 * unsolicited message from the driver.
Thirumalai Srinivasan51048572010-01-16 16:04:55 -08001159 */
1160 (void) ipsq_pending_mp_cleanup(ill, NULL);
1161 ill_dlpi_clear_deferred(ill);
1162
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001163 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
1164 (void) ipif_down(ipif, NULL, NULL);
1165
1166 ill_down(ill);
1167
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001168 /*
1169 * Walk all CONNs that can have a reference on an ire or nce for this
1170 * ill (we actually walk all that now have stale references).
1171 */
1172 ipcl_walk(conn_ixa_cleanup, (void *)B_TRUE, ill->ill_ipst);
1173
1174 /* With IPv6 we have dce_ifindex. Cleanup for neatness */
1175 if (ill->ill_isv6)
1176 dce_cleanup(ill->ill_phyint->phyint_ifindex, ill->ill_ipst);
1177
meemb051ecf2006-12-27 21:32:46 -08001178 ipsq_current_start(ill->ill_phyint->phyint_ipsq, ill->ill_ipif, 0);
1179
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001180 /*
meemb051ecf2006-12-27 21:32:46 -08001181 * Atomically test and add the pending mp if references are active.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001182 */
meemb051ecf2006-12-27 21:32:46 -08001183 mutex_enter(&ill->ill_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001184 if (!ill_is_quiescent(ill)) {
meemb051ecf2006-12-27 21:32:46 -08001185 /* call cannot fail since `conn_t *' argument is NULL */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001186 (void) ipsq_pending_mp_add(NULL, ill->ill_ipif, ill->ill_rq,
1187 mp, ILL_DOWN);
1188 mutex_exit(&ill->ill_lock);
1189 return (B_FALSE);
1190 }
1191 mutex_exit(&ill->ill_lock);
1192 return (B_TRUE);
1193}
1194
1195static void
1196ill_down(ill_t *ill)
1197{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001198 mblk_t *mp;
dh155122f4b3ec62007-01-19 16:59:38 -08001199 ip_stack_t *ipst = ill->ill_ipst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001200
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001201 /*
1202 * Blow off any IREs dependent on this ILL.
1203 * The caller needs to handle conn_ixa_cleanup
1204 */
1205 ill_delete_ires(ill);
1206
1207 ire_walk_ill(0, 0, ill_downi, ill, ill);
dh155122f4b3ec62007-01-19 16:59:38 -08001208
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001209 /* Remove any conn_*_ill depending on this ill */
dh155122f4b3ec62007-01-19 16:59:38 -08001210 ipcl_walk(conn_cleanup_ill, (caddr_t)ill, ipst);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001211
1212 /*
1213 * Free state for additional IREs.
1214 */
1215 mutex_enter(&ill->ill_saved_ire_lock);
1216 mp = ill->ill_saved_ire_mp;
1217 ill->ill_saved_ire_mp = NULL;
1218 ill->ill_saved_ire_cnt = 0;
1219 mutex_exit(&ill->ill_saved_ire_lock);
1220 freemsg(mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001221}
1222
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001223/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001224 * ire_walk routine used to delete every IRE that depends on
Sowmini Varadhan44b099c2010-02-17 22:59:58 -05001225 * 'ill'. (Always called as writer, and may only be called from ire_walk.)
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001226 *
1227 * Note: since the routes added by the kernel are deleted separately,
1228 * this will only be 1) IRE_IF_CLONE and 2) manually added IRE_INTERFACE.
1229 *
1230 * We also remove references on ire_nce_cache entries that refer to the ill.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001231 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001232void
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001233ill_downi(ire_t *ire, char *ill_arg)
1234{
1235 ill_t *ill = (ill_t *)ill_arg;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001236 nce_t *nce;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001237
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001238 mutex_enter(&ire->ire_lock);
1239 nce = ire->ire_nce_cache;
1240 if (nce != NULL && nce->nce_ill == ill)
1241 ire->ire_nce_cache = NULL;
1242 else
1243 nce = NULL;
1244 mutex_exit(&ire->ire_lock);
1245 if (nce != NULL)
1246 nce_refrele(nce);
Sowmini Varadhan44b099c2010-02-17 22:59:58 -05001247 if (ire->ire_ill == ill) {
1248 /*
1249 * The existing interface binding for ire must be
1250 * deleted before trying to bind the route to another
1251 * interface. However, since we are using the contents of the
1252 * ire after ire_delete, the caller has to ensure that
1253 * CONDEMNED (deleted) ire's are not removed from the list
1254 * when ire_delete() returns. Currently ill_downi() is
1255 * only called as part of ire_walk*() routines, so that
1256 * the irb_refhold() done by ire_walk*() will ensure that
1257 * ire_delete() does not lead to ire_inactive().
1258 */
1259 ASSERT(ire->ire_bucket->irb_refcnt > 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001260 ire_delete(ire);
Sowmini Varadhan44b099c2010-02-17 22:59:58 -05001261 if (ire->ire_unbound)
1262 ire_rebind(ire);
1263 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001264}
1265
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001266/* Remove IRE_IF_CLONE on this ill */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001267void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001268ill_downi_if_clone(ire_t *ire, char *ill_arg)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001269{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001270 ill_t *ill = (ill_t *)ill_arg;
1271
1272 ASSERT(ire->ire_type & IRE_IF_CLONE);
1273 if (ire->ire_ill == ill)
1274 ire_delete(ire);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001275}
1276
1277/* Consume an M_IOCACK of the fastpath probe. */
1278void
1279ill_fastpath_ack(ill_t *ill, mblk_t *mp)
1280{
1281 mblk_t *mp1 = mp;
1282
1283 /*
1284 * If this was the first attempt turn on the fastpath probing.
1285 */
1286 mutex_enter(&ill->ill_lock);
ja978904d876312006-10-10 06:05:36 -07001287 if (ill->ill_dlpi_fastpath_state == IDS_INPROGRESS)
1288 ill->ill_dlpi_fastpath_state = IDS_OK;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001289 mutex_exit(&ill->ill_lock);
1290
1291 /* Free the M_IOCACK mblk, hold on to the data */
1292 mp = mp->b_cont;
1293 freeb(mp1);
1294 if (mp == NULL)
1295 return;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001296 if (mp->b_cont != NULL)
1297 nce_fastpath_update(ill, mp);
1298 else
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001299 ip0dbg(("ill_fastpath_ack: no b_cont\n"));
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001300 freemsg(mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001301}
1302
1303/*
1304 * Throw an M_IOCTL message downstream asking "do you know fastpath?"
1305 * The data portion of the request is a dl_unitdata_req_t template for
1306 * what we would send downstream in the absence of a fastpath confirmation.
1307 */
1308int
1309ill_fastpath_probe(ill_t *ill, mblk_t *dlur_mp)
1310{
1311 struct iocblk *ioc;
1312 mblk_t *mp;
1313
1314 if (dlur_mp == NULL)
1315 return (EINVAL);
1316
1317 mutex_enter(&ill->ill_lock);
1318 switch (ill->ill_dlpi_fastpath_state) {
ja978904d876312006-10-10 06:05:36 -07001319 case IDS_FAILED:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001320 /*
1321 * Driver NAKed the first fastpath ioctl - assume it doesn't
1322 * support it.
1323 */
1324 mutex_exit(&ill->ill_lock);
1325 return (ENOTSUP);
ja978904d876312006-10-10 06:05:36 -07001326 case IDS_UNKNOWN:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001327 /* This is the first probe */
ja978904d876312006-10-10 06:05:36 -07001328 ill->ill_dlpi_fastpath_state = IDS_INPROGRESS;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001329 break;
1330 default:
1331 break;
1332 }
1333 mutex_exit(&ill->ill_lock);
1334
1335 if ((mp = mkiocb(DL_IOC_HDR_INFO)) == NULL)
1336 return (EAGAIN);
1337
1338 mp->b_cont = copyb(dlur_mp);
1339 if (mp->b_cont == NULL) {
1340 freeb(mp);
1341 return (EAGAIN);
1342 }
1343
1344 ioc = (struct iocblk *)mp->b_rptr;
1345 ioc->ioc_count = msgdsize(mp->b_cont);
1346
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001347 DTRACE_PROBE3(ill__dlpi, char *, "ill_fastpath_probe",
1348 char *, "DL_IOC_HDR_INFO", ill_t *, ill);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001349 putnext(ill->ill_wq, mp);
1350 return (0);
1351}
1352
1353void
1354ill_capability_probe(ill_t *ill)
1355{
Eric Chengda14ceb2008-12-04 18:16:10 -08001356 mblk_t *mp;
1357
1358 ASSERT(IAM_WRITER_ILL(ill));
1359
1360 if (ill->ill_dlpi_capab_state != IDCS_UNKNOWN &&
1361 ill->ill_dlpi_capab_state != IDCS_FAILED)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001362 return;
1363
Eric Chengda14ceb2008-12-04 18:16:10 -08001364 /*
1365 * We are starting a new cycle of capability negotiation.
1366 * Free up the capab reset messages of any previous incarnation.
1367 * We will do a fresh allocation when we get the response to our probe
1368 */
1369 if (ill->ill_capab_reset_mp != NULL) {
1370 freemsg(ill->ill_capab_reset_mp);
1371 ill->ill_capab_reset_mp = NULL;
1372 }
1373
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001374 ip1dbg(("ill_capability_probe: starting capability negotiation\n"));
Eric Chengda14ceb2008-12-04 18:16:10 -08001375
1376 mp = ip_dlpi_alloc(sizeof (dl_capability_req_t), DL_CAPABILITY_REQ);
1377 if (mp == NULL)
1378 return;
1379
1380 ill_capability_send(ill, mp);
1381 ill->ill_dlpi_capab_state = IDCS_PROBE_SENT;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001382}
1383
1384void
Eric Chengda14ceb2008-12-04 18:16:10 -08001385ill_capability_reset(ill_t *ill, boolean_t reneg)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001386{
Eric Chengda14ceb2008-12-04 18:16:10 -08001387 ASSERT(IAM_WRITER_ILL(ill));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001388
Eric Chengda14ceb2008-12-04 18:16:10 -08001389 if (ill->ill_dlpi_capab_state != IDCS_OK)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001390 return;
1391
Eric Chengda14ceb2008-12-04 18:16:10 -08001392 ill->ill_dlpi_capab_state = reneg ? IDCS_RENEG : IDCS_RESET_SENT;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001393
Eric Chengda14ceb2008-12-04 18:16:10 -08001394 ill_capability_send(ill, ill->ill_capab_reset_mp);
1395 ill->ill_capab_reset_mp = NULL;
1396 /*
1397 * We turn off all capabilities except those pertaining to
1398 * direct function call capabilities viz. ILL_CAPAB_DLD*
1399 * which will be turned off by the corresponding reset functions.
1400 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001401 ill->ill_capabilities &= ~(ILL_CAPAB_HCKSUM | ILL_CAPAB_ZEROCOPY);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001402}
1403
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001404static void
Eric Chengda14ceb2008-12-04 18:16:10 -08001405ill_capability_reset_alloc(ill_t *ill)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001406{
1407 mblk_t *mp;
Eric Chengda14ceb2008-12-04 18:16:10 -08001408 size_t size = 0;
1409 int err;
1410 dl_capability_req_t *capb;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001411
Eric Chengda14ceb2008-12-04 18:16:10 -08001412 ASSERT(IAM_WRITER_ILL(ill));
1413 ASSERT(ill->ill_capab_reset_mp == NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001414
Eric Chengda14ceb2008-12-04 18:16:10 -08001415 if (ILL_HCKSUM_CAPABLE(ill)) {
1416 size += sizeof (dl_capability_sub_t) +
1417 sizeof (dl_capab_hcksum_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001418 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001419
Eric Chengda14ceb2008-12-04 18:16:10 -08001420 if (ill->ill_capabilities & ILL_CAPAB_ZEROCOPY) {
1421 size += sizeof (dl_capability_sub_t) +
1422 sizeof (dl_capab_zerocopy_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001423 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001424
Eric Chengda14ceb2008-12-04 18:16:10 -08001425 if (ill->ill_capabilities & ILL_CAPAB_DLD) {
1426 size += sizeof (dl_capability_sub_t) +
1427 sizeof (dl_capab_dld_t);
1428 }
1429
1430 mp = allocb_wait(size + sizeof (dl_capability_req_t), BPRI_MED,
1431 STR_NOSIG, &err);
1432
1433 mp->b_datap->db_type = M_PROTO;
1434 bzero(mp->b_rptr, size + sizeof (dl_capability_req_t));
1435
1436 capb = (dl_capability_req_t *)mp->b_rptr;
1437 capb->dl_primitive = DL_CAPABILITY_REQ;
1438 capb->dl_sub_offset = sizeof (dl_capability_req_t);
1439 capb->dl_sub_length = size;
1440
1441 mp->b_wptr += sizeof (dl_capability_req_t);
1442
1443 /*
1444 * Each handler fills in the corresponding dl_capability_sub_t
1445 * inside the mblk,
1446 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001447 ill_capability_hcksum_reset_fill(ill, mp);
1448 ill_capability_zerocopy_reset_fill(ill, mp);
Eric Chengda14ceb2008-12-04 18:16:10 -08001449 ill_capability_dld_reset_fill(ill, mp);
1450
1451 ill->ill_capab_reset_mp = mp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001452}
1453
1454static void
1455ill_capability_id_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *outers)
1456{
1457 dl_capab_id_t *id_ic;
1458 uint_t sub_dl_cap = outers->dl_cap;
1459 dl_capability_sub_t *inners;
1460 uint8_t *capend;
1461
1462 ASSERT(sub_dl_cap == DL_CAPAB_ID_WRAPPER);
1463
1464 /*
1465 * Note: range checks here are not absolutely sufficient to
1466 * make us robust against malformed messages sent by drivers;
1467 * this is in keeping with the rest of IP's dlpi handling.
1468 * (Remember, it's coming from something else in the kernel
1469 * address space)
1470 */
1471
1472 capend = (uint8_t *)(outers + 1) + outers->dl_length;
1473 if (capend > mp->b_wptr) {
1474 cmn_err(CE_WARN, "ill_capability_id_ack: "
1475 "malformed sub-capability too long for mblk");
1476 return;
1477 }
1478
1479 id_ic = (dl_capab_id_t *)(outers + 1);
1480
Toomas Soomeab82c292019-12-28 14:24:51 +02001481 inners = &id_ic->id_subcap;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001482 if (outers->dl_length < sizeof (*id_ic) ||
Toomas Soomeab82c292019-12-28 14:24:51 +02001483 inners->dl_length > (outers->dl_length - sizeof (*inners))) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001484 cmn_err(CE_WARN, "ill_capability_id_ack: malformed "
1485 "encapsulated capab type %d too long for mblk",
1486 inners->dl_cap);
1487 return;
1488 }
1489
1490 if (!dlcapabcheckqid(&id_ic->id_mid, ill->ill_lmod_rq)) {
1491 ip1dbg(("ill_capability_id_ack: mid token for capab type %d "
1492 "isn't as expected; pass-thru module(s) detected, "
1493 "discarding capability\n", inners->dl_cap));
1494 return;
1495 }
1496
1497 /* Process the encapsulated sub-capability */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001498 ill_capability_dispatch(ill, mp, inners);
Eric Chengda14ceb2008-12-04 18:16:10 -08001499}
1500
1501static void
1502ill_capability_dld_reset_fill(ill_t *ill, mblk_t *mp)
1503{
1504 dl_capability_sub_t *dl_subcap;
1505
1506 if (!(ill->ill_capabilities & ILL_CAPAB_DLD))
1507 return;
1508
1509 /*
1510 * The dl_capab_dld_t that follows the dl_capability_sub_t is not
1511 * initialized below since it is not used by DLD.
1512 */
1513 dl_subcap = (dl_capability_sub_t *)mp->b_wptr;
1514 dl_subcap->dl_cap = DL_CAPAB_DLD;
1515 dl_subcap->dl_length = sizeof (dl_capab_dld_t);
1516
1517 mp->b_wptr += sizeof (dl_capability_sub_t) + sizeof (dl_capab_dld_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001518}
1519
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001520static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001521ill_capability_dispatch(ill_t *ill, mblk_t *mp, dl_capability_sub_t *subp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001522{
Cathy Zhou1cb875a2009-11-17 09:17:48 -08001523 /*
1524 * If no ipif was brought up over this ill, this DL_CAPABILITY_REQ/ACK
1525 * is only to get the VRRP capability.
Cathy Zhou10abae12009-12-02 15:13:47 -08001526 *
1527 * Note that we cannot check ill_ipif_up_count here since
1528 * ill_ipif_up_count is only incremented when the resolver is setup.
1529 * That is done asynchronously, and can race with this function.
Cathy Zhou1cb875a2009-11-17 09:17:48 -08001530 */
Cathy Zhou10abae12009-12-02 15:13:47 -08001531 if (!ill->ill_dl_up) {
Cathy Zhou1cb875a2009-11-17 09:17:48 -08001532 if (subp->dl_cap == DL_CAPAB_VRRP)
1533 ill_capability_vrrp_ack(ill, mp, subp);
1534 return;
1535 }
1536
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001537 switch (subp->dl_cap) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001538 case DL_CAPAB_HCKSUM:
1539 ill_capability_hcksum_ack(ill, mp, subp);
1540 break;
1541 case DL_CAPAB_ZEROCOPY:
1542 ill_capability_zerocopy_ack(ill, mp, subp);
1543 break;
Eric Chengda14ceb2008-12-04 18:16:10 -08001544 case DL_CAPAB_DLD:
1545 ill_capability_dld_ack(ill, mp, subp);
yl15005183476012006-11-13 20:44:19 -08001546 break;
Cathy Zhou1cb875a2009-11-17 09:17:48 -08001547 case DL_CAPAB_VRRP:
1548 break;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001549 default:
1550 ip1dbg(("ill_capability_dispatch: unknown capab type %d\n",
1551 subp->dl_cap));
1552 }
1553}
1554
1555/*
Cathy Zhou1cb875a2009-11-17 09:17:48 -08001556 * Process the vrrp capability received from a DLS Provider. isub must point
1557 * to the sub-capability (DL_CAPAB_VRRP) of a DL_CAPABILITY_ACK message.
1558 */
1559static void
1560ill_capability_vrrp_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *isub)
1561{
1562 dl_capab_vrrp_t *vrrp;
1563 uint_t sub_dl_cap = isub->dl_cap;
1564 uint8_t *capend;
1565
1566 ASSERT(IAM_WRITER_ILL(ill));
1567 ASSERT(sub_dl_cap == DL_CAPAB_VRRP);
1568
1569 /*
1570 * Note: range checks here are not absolutely sufficient to
1571 * make us robust against malformed messages sent by drivers;
1572 * this is in keeping with the rest of IP's dlpi handling.
1573 * (Remember, it's coming from something else in the kernel
1574 * address space)
1575 */
1576 capend = (uint8_t *)(isub + 1) + isub->dl_length;
1577 if (capend > mp->b_wptr) {
1578 cmn_err(CE_WARN, "ill_capability_vrrp_ack: "
1579 "malformed sub-capability too long for mblk");
1580 return;
1581 }
1582 vrrp = (dl_capab_vrrp_t *)(isub + 1);
1583
1584 /*
1585 * Compare the IP address family and set ILLF_VRRP for the right ill.
1586 */
1587 if ((vrrp->vrrp_af == AF_INET6 && ill->ill_isv6) ||
1588 (vrrp->vrrp_af == AF_INET && !ill->ill_isv6)) {
1589 ill->ill_flags |= ILLF_VRRP;
1590 }
1591}
1592
1593/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001594 * Process a hardware checksum offload capability negotiation ack received
1595 * from a DLS Provider.isub must point to the sub-capability (DL_CAPAB_HCKSUM)
1596 * of a DL_CAPABILITY_ACK message.
1597 */
1598static void
1599ill_capability_hcksum_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *isub)
1600{
1601 dl_capability_req_t *ocap;
1602 dl_capab_hcksum_t *ihck, *ohck;
1603 ill_hcksum_capab_t **ill_hcksum;
1604 mblk_t *nmp = NULL;
1605 uint_t sub_dl_cap = isub->dl_cap;
1606 uint8_t *capend;
1607
1608 ASSERT(sub_dl_cap == DL_CAPAB_HCKSUM);
1609
1610 ill_hcksum = (ill_hcksum_capab_t **)&ill->ill_hcksum_capab;
1611
1612 /*
1613 * Note: range checks here are not absolutely sufficient to
1614 * make us robust against malformed messages sent by drivers;
1615 * this is in keeping with the rest of IP's dlpi handling.
1616 * (Remember, it's coming from something else in the kernel
1617 * address space)
1618 */
1619 capend = (uint8_t *)(isub + 1) + isub->dl_length;
1620 if (capend > mp->b_wptr) {
1621 cmn_err(CE_WARN, "ill_capability_hcksum_ack: "
1622 "malformed sub-capability too long for mblk");
1623 return;
1624 }
1625
1626 /*
1627 * There are two types of acks we process here:
1628 * 1. acks in reply to a (first form) generic capability req
1629 * (no ENABLE flag set)
1630 * 2. acks in reply to a ENABLE capability req.
1631 * (ENABLE flag set)
1632 */
1633 ihck = (dl_capab_hcksum_t *)(isub + 1);
1634
1635 if (ihck->hcksum_version != HCKSUM_VERSION_1) {
1636 cmn_err(CE_CONT, "ill_capability_hcksum_ack: "
1637 "unsupported hardware checksum "
1638 "sub-capability (version %d, expected %d)",
1639 ihck->hcksum_version, HCKSUM_VERSION_1);
1640 return;
1641 }
1642
1643 if (!dlcapabcheckqid(&ihck->hcksum_mid, ill->ill_lmod_rq)) {
1644 ip1dbg(("ill_capability_hcksum_ack: mid token for hardware "
1645 "checksum capability isn't as expected; pass-thru "
1646 "module(s) detected, discarding capability\n"));
1647 return;
1648 }
1649
masputraff550d02005-10-22 22:50:14 -07001650#define CURR_HCKSUM_CAPAB \
1651 (HCKSUM_INET_PARTIAL | HCKSUM_INET_FULL_V4 | \
1652 HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001653
1654 if ((ihck->hcksum_txflags & HCKSUM_ENABLE) &&
1655 (ihck->hcksum_txflags & CURR_HCKSUM_CAPAB)) {
1656 /* do ENABLE processing */
1657 if (*ill_hcksum == NULL) {
1658 *ill_hcksum = kmem_zalloc(sizeof (ill_hcksum_capab_t),
1659 KM_NOSLEEP);
1660
1661 if (*ill_hcksum == NULL) {
1662 cmn_err(CE_WARN, "ill_capability_hcksum_ack: "
1663 "could not enable hcksum version %d "
1664 "for %s (ENOMEM)\n", HCKSUM_CURRENT_VERSION,
1665 ill->ill_name);
1666 return;
1667 }
1668 }
1669
1670 (*ill_hcksum)->ill_hcksum_version = ihck->hcksum_version;
1671 (*ill_hcksum)->ill_hcksum_txflags = ihck->hcksum_txflags;
1672 ill->ill_capabilities |= ILL_CAPAB_HCKSUM;
1673 ip1dbg(("ill_capability_hcksum_ack: interface %s "
1674 "has enabled hardware checksumming\n ",
1675 ill->ill_name));
1676 } else if (ihck->hcksum_txflags & CURR_HCKSUM_CAPAB) {
1677 /*
1678 * Enabling hardware checksum offload
1679 * Currently IP supports {TCP,UDP}/IPv4
1680 * partial and full cksum offload and
1681 * IPv4 header checksum offload.
1682 * Allocate new mblk which will
1683 * contain a new capability request
1684 * to enable hardware checksum offload.
1685 */
1686 uint_t size;
1687 uchar_t *rptr;
1688
1689 size = sizeof (dl_capability_req_t) +
1690 sizeof (dl_capability_sub_t) + isub->dl_length;
1691
1692 if ((nmp = ip_dlpi_alloc(size, DL_CAPABILITY_REQ)) == NULL) {
1693 cmn_err(CE_WARN, "ill_capability_hcksum_ack: "
1694 "could not enable hardware cksum for %s (ENOMEM)\n",
1695 ill->ill_name);
1696 return;
1697 }
1698
1699 rptr = nmp->b_rptr;
1700 /* initialize dl_capability_req_t */
1701 ocap = (dl_capability_req_t *)nmp->b_rptr;
1702 ocap->dl_sub_offset =
1703 sizeof (dl_capability_req_t);
1704 ocap->dl_sub_length =
1705 sizeof (dl_capability_sub_t) +
1706 isub->dl_length;
1707 nmp->b_rptr += sizeof (dl_capability_req_t);
1708
1709 /* initialize dl_capability_sub_t */
1710 bcopy(isub, nmp->b_rptr, sizeof (*isub));
1711 nmp->b_rptr += sizeof (*isub);
1712
1713 /* initialize dl_capab_hcksum_t */
1714 ohck = (dl_capab_hcksum_t *)nmp->b_rptr;
1715 bcopy(ihck, ohck, sizeof (*ihck));
1716
1717 nmp->b_rptr = rptr;
1718 ASSERT(nmp->b_wptr == (nmp->b_rptr + size));
1719
1720 /* Set ENABLE flag */
1721 ohck->hcksum_txflags &= CURR_HCKSUM_CAPAB;
1722 ohck->hcksum_txflags |= HCKSUM_ENABLE;
1723
1724 /*
1725 * nmp points to a DL_CAPABILITY_REQ message to enable
1726 * hardware checksum acceleration.
1727 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001728 ill_capability_send(ill, nmp);
masputraff550d02005-10-22 22:50:14 -07001729 } else {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001730 ip1dbg(("ill_capability_hcksum_ack: interface %s has "
1731 "advertised %x hardware checksum capability flags\n",
1732 ill->ill_name, ihck->hcksum_txflags));
masputraff550d02005-10-22 22:50:14 -07001733 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001734}
1735
1736static void
Eric Chengda14ceb2008-12-04 18:16:10 -08001737ill_capability_hcksum_reset_fill(ill_t *ill, mblk_t *mp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001738{
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001739 dl_capab_hcksum_t *hck_subcap;
1740 dl_capability_sub_t *dl_subcap;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001741
masputraff550d02005-10-22 22:50:14 -07001742 if (!ILL_HCKSUM_CAPABLE(ill))
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001743 return;
1744
1745 ASSERT(ill->ill_hcksum_capab != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001746
Eric Chengda14ceb2008-12-04 18:16:10 -08001747 dl_subcap = (dl_capability_sub_t *)mp->b_wptr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001748 dl_subcap->dl_cap = DL_CAPAB_HCKSUM;
1749 dl_subcap->dl_length = sizeof (*hck_subcap);
1750
1751 hck_subcap = (dl_capab_hcksum_t *)(dl_subcap + 1);
1752 hck_subcap->hcksum_version = ill->ill_hcksum_capab->ill_hcksum_version;
1753 hck_subcap->hcksum_txflags = 0;
1754
Eric Chengda14ceb2008-12-04 18:16:10 -08001755 mp->b_wptr += sizeof (*dl_subcap) + sizeof (*hck_subcap);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001756}
1757
1758static void
1759ill_capability_zerocopy_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *isub)
1760{
1761 mblk_t *nmp = NULL;
1762 dl_capability_req_t *oc;
1763 dl_capab_zerocopy_t *zc_ic, *zc_oc;
1764 ill_zerocopy_capab_t **ill_zerocopy_capab;
1765 uint_t sub_dl_cap = isub->dl_cap;
1766 uint8_t *capend;
1767
1768 ASSERT(sub_dl_cap == DL_CAPAB_ZEROCOPY);
1769
1770 ill_zerocopy_capab = (ill_zerocopy_capab_t **)&ill->ill_zerocopy_capab;
1771
1772 /*
1773 * Note: range checks here are not absolutely sufficient to
1774 * make us robust against malformed messages sent by drivers;
1775 * this is in keeping with the rest of IP's dlpi handling.
1776 * (Remember, it's coming from something else in the kernel
1777 * address space)
1778 */
1779 capend = (uint8_t *)(isub + 1) + isub->dl_length;
1780 if (capend > mp->b_wptr) {
1781 cmn_err(CE_WARN, "ill_capability_zerocopy_ack: "
1782 "malformed sub-capability too long for mblk");
1783 return;
1784 }
1785
1786 zc_ic = (dl_capab_zerocopy_t *)(isub + 1);
1787 if (zc_ic->zerocopy_version != ZEROCOPY_VERSION_1) {
1788 cmn_err(CE_CONT, "ill_capability_zerocopy_ack: "
1789 "unsupported ZEROCOPY sub-capability (version %d, "
1790 "expected %d)", zc_ic->zerocopy_version,
1791 ZEROCOPY_VERSION_1);
1792 return;
1793 }
1794
1795 if (!dlcapabcheckqid(&zc_ic->zerocopy_mid, ill->ill_lmod_rq)) {
1796 ip1dbg(("ill_capability_zerocopy_ack: mid token for zerocopy "
1797 "capability isn't as expected; pass-thru module(s) "
1798 "detected, discarding capability\n"));
1799 return;
1800 }
1801
1802 if ((zc_ic->zerocopy_flags & DL_CAPAB_VMSAFE_MEM) != 0) {
1803 if (*ill_zerocopy_capab == NULL) {
1804 *ill_zerocopy_capab =
1805 kmem_zalloc(sizeof (ill_zerocopy_capab_t),
1806 KM_NOSLEEP);
1807
1808 if (*ill_zerocopy_capab == NULL) {
1809 cmn_err(CE_WARN, "ill_capability_zerocopy_ack: "
1810 "could not enable Zero-copy version %d "
1811 "for %s (ENOMEM)\n", ZEROCOPY_VERSION_1,
1812 ill->ill_name);
1813 return;
1814 }
1815 }
1816
1817 ip1dbg(("ill_capability_zerocopy_ack: interface %s "
1818 "supports Zero-copy version %d\n", ill->ill_name,
1819 ZEROCOPY_VERSION_1));
1820
1821 (*ill_zerocopy_capab)->ill_zerocopy_version =
1822 zc_ic->zerocopy_version;
1823 (*ill_zerocopy_capab)->ill_zerocopy_flags =
1824 zc_ic->zerocopy_flags;
1825
1826 ill->ill_capabilities |= ILL_CAPAB_ZEROCOPY;
1827 } else {
1828 uint_t size;
1829 uchar_t *rptr;
1830
1831 size = sizeof (dl_capability_req_t) +
1832 sizeof (dl_capability_sub_t) +
1833 sizeof (dl_capab_zerocopy_t);
1834
1835 if ((nmp = ip_dlpi_alloc(size, DL_CAPABILITY_REQ)) == NULL) {
1836 cmn_err(CE_WARN, "ill_capability_zerocopy_ack: "
1837 "could not enable zerocopy for %s (ENOMEM)\n",
1838 ill->ill_name);
1839 return;
1840 }
1841
1842 rptr = nmp->b_rptr;
1843 /* initialize dl_capability_req_t */
1844 oc = (dl_capability_req_t *)rptr;
1845 oc->dl_sub_offset = sizeof (dl_capability_req_t);
1846 oc->dl_sub_length = sizeof (dl_capability_sub_t) +
1847 sizeof (dl_capab_zerocopy_t);
1848 rptr += sizeof (dl_capability_req_t);
1849
1850 /* initialize dl_capability_sub_t */
1851 bcopy(isub, rptr, sizeof (*isub));
1852 rptr += sizeof (*isub);
1853
1854 /* initialize dl_capab_zerocopy_t */
1855 zc_oc = (dl_capab_zerocopy_t *)rptr;
1856 *zc_oc = *zc_ic;
1857
1858 ip1dbg(("ill_capability_zerocopy_ack: asking interface %s "
1859 "to enable zero-copy version %d\n", ill->ill_name,
1860 ZEROCOPY_VERSION_1));
1861
1862 /* set VMSAFE_MEM flag */
1863 zc_oc->zerocopy_flags |= DL_CAPAB_VMSAFE_MEM;
1864
1865 /* nmp points to a DL_CAPABILITY_REQ message to enable zcopy */
Eric Chengda14ceb2008-12-04 18:16:10 -08001866 ill_capability_send(ill, nmp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001867 }
1868}
1869
1870static void
Eric Chengda14ceb2008-12-04 18:16:10 -08001871ill_capability_zerocopy_reset_fill(ill_t *ill, mblk_t *mp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001872{
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001873 dl_capab_zerocopy_t *zerocopy_subcap;
1874 dl_capability_sub_t *dl_subcap;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001875
1876 if (!(ill->ill_capabilities & ILL_CAPAB_ZEROCOPY))
1877 return;
1878
1879 ASSERT(ill->ill_zerocopy_capab != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001880
Eric Chengda14ceb2008-12-04 18:16:10 -08001881 dl_subcap = (dl_capability_sub_t *)mp->b_wptr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001882 dl_subcap->dl_cap = DL_CAPAB_ZEROCOPY;
1883 dl_subcap->dl_length = sizeof (*zerocopy_subcap);
1884
1885 zerocopy_subcap = (dl_capab_zerocopy_t *)(dl_subcap + 1);
1886 zerocopy_subcap->zerocopy_version =
1887 ill->ill_zerocopy_capab->ill_zerocopy_version;
1888 zerocopy_subcap->zerocopy_flags = 0;
1889
Eric Chengda14ceb2008-12-04 18:16:10 -08001890 mp->b_wptr += sizeof (*dl_subcap) + sizeof (*zerocopy_subcap);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001891}
1892
1893/*
Eric Chengda14ceb2008-12-04 18:16:10 -08001894 * DLD capability
1895 * Refer to dld.h for more information regarding the purpose and usage
1896 * of this capability.
yl15005183476012006-11-13 20:44:19 -08001897 */
1898static void
Eric Chengda14ceb2008-12-04 18:16:10 -08001899ill_capability_dld_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *isub)
yl15005183476012006-11-13 20:44:19 -08001900{
Eric Chengda14ceb2008-12-04 18:16:10 -08001901 dl_capab_dld_t *dld_ic, dld;
1902 uint_t sub_dl_cap = isub->dl_cap;
1903 uint8_t *capend;
1904 ill_dld_capab_t *idc;
yl15005183476012006-11-13 20:44:19 -08001905
Eric Chengda14ceb2008-12-04 18:16:10 -08001906 ASSERT(IAM_WRITER_ILL(ill));
1907 ASSERT(sub_dl_cap == DL_CAPAB_DLD);
yl15005183476012006-11-13 20:44:19 -08001908
1909 /*
1910 * Note: range checks here are not absolutely sufficient to
1911 * make us robust against malformed messages sent by drivers;
1912 * this is in keeping with the rest of IP's dlpi handling.
1913 * (Remember, it's coming from something else in the kernel
1914 * address space)
1915 */
1916 capend = (uint8_t *)(isub + 1) + isub->dl_length;
1917 if (capend > mp->b_wptr) {
Eric Chengda14ceb2008-12-04 18:16:10 -08001918 cmn_err(CE_WARN, "ill_capability_dld_ack: "
yl15005183476012006-11-13 20:44:19 -08001919 "malformed sub-capability too long for mblk");
1920 return;
1921 }
Eric Chengda14ceb2008-12-04 18:16:10 -08001922 dld_ic = (dl_capab_dld_t *)(isub + 1);
1923 if (dld_ic->dld_version != DLD_CURRENT_VERSION) {
1924 cmn_err(CE_CONT, "ill_capability_dld_ack: "
1925 "unsupported DLD sub-capability (version %d, "
1926 "expected %d)", dld_ic->dld_version,
1927 DLD_CURRENT_VERSION);
yl15005183476012006-11-13 20:44:19 -08001928 return;
1929 }
Eric Chengda14ceb2008-12-04 18:16:10 -08001930 if (!dlcapabcheckqid(&dld_ic->dld_mid, ill->ill_lmod_rq)) {
1931 ip1dbg(("ill_capability_dld_ack: mid token for dld "
yl15005183476012006-11-13 20:44:19 -08001932 "capability isn't as expected; pass-thru module(s) "
1933 "detected, discarding capability\n"));
1934 return;
1935 }
1936
Eric Chengda14ceb2008-12-04 18:16:10 -08001937 /*
1938 * Copy locally to ensure alignment.
1939 */
1940 bcopy(dld_ic, &dld, sizeof (dl_capab_dld_t));
yl15005183476012006-11-13 20:44:19 -08001941
Eric Chengda14ceb2008-12-04 18:16:10 -08001942 if ((idc = ill->ill_dld_capab) == NULL) {
1943 idc = kmem_zalloc(sizeof (ill_dld_capab_t), KM_NOSLEEP);
1944 if (idc == NULL) {
1945 cmn_err(CE_WARN, "ill_capability_dld_ack: "
1946 "could not enable DLD version %d "
1947 "for %s (ENOMEM)\n", DLD_CURRENT_VERSION,
yl15005183476012006-11-13 20:44:19 -08001948 ill->ill_name);
1949 return;
1950 }
Eric Chengda14ceb2008-12-04 18:16:10 -08001951 ill->ill_dld_capab = idc;
1952 }
Cathy Zhou5d460ea2009-03-17 20:14:50 -07001953 idc->idc_capab_df = (ip_capab_func_t)dld.dld_capab;
1954 idc->idc_capab_dh = (void *)dld.dld_capab_handle;
Eric Chengda14ceb2008-12-04 18:16:10 -08001955 ip1dbg(("ill_capability_dld_ack: interface %s "
1956 "supports DLD version %d\n", ill->ill_name, DLD_CURRENT_VERSION));
yl15005183476012006-11-13 20:44:19 -08001957
Eric Chengda14ceb2008-12-04 18:16:10 -08001958 ill_capability_dld_enable(ill);
1959}
yl15005183476012006-11-13 20:44:19 -08001960
Eric Chengda14ceb2008-12-04 18:16:10 -08001961/*
1962 * Typically capability negotiation between IP and the driver happens via
1963 * DLPI message exchange. However GLD also offers a direct function call
1964 * mechanism to exchange the DLD_DIRECT_CAPAB and DLD_POLL_CAPAB capabilities,
1965 * But arbitrary function calls into IP or GLD are not permitted, since both
1966 * of them are protected by their own perimeter mechanism. The perimeter can
1967 * be viewed as a coarse lock or serialization mechanism. The hierarchy of
1968 * these perimeters is IP -> MAC. Thus for example to enable the squeue
1969 * polling, IP needs to enter its perimeter, then call ill_mac_perim_enter
1970 * to enter the mac perimeter and then do the direct function calls into
1971 * GLD to enable squeue polling. The ring related callbacks from the mac into
1972 * the stack to add, bind, quiesce, restart or cleanup a ring are all
1973 * protected by the mac perimeter.
1974 */
1975static void
1976ill_mac_perim_enter(ill_t *ill, mac_perim_handle_t *mphp)
1977{
1978 ill_dld_capab_t *idc = ill->ill_dld_capab;
1979 int err;
yl15005183476012006-11-13 20:44:19 -08001980
Eric Chengda14ceb2008-12-04 18:16:10 -08001981 err = idc->idc_capab_df(idc->idc_capab_dh, DLD_CAPAB_PERIM, mphp,
1982 DLD_ENABLE);
1983 ASSERT(err == 0);
1984}
yl15005183476012006-11-13 20:44:19 -08001985
Eric Chengda14ceb2008-12-04 18:16:10 -08001986static void
1987ill_mac_perim_exit(ill_t *ill, mac_perim_handle_t mph)
1988{
1989 ill_dld_capab_t *idc = ill->ill_dld_capab;
1990 int err;
yl15005183476012006-11-13 20:44:19 -08001991
Eric Chengda14ceb2008-12-04 18:16:10 -08001992 err = idc->idc_capab_df(idc->idc_capab_dh, DLD_CAPAB_PERIM, mph,
1993 DLD_DISABLE);
1994 ASSERT(err == 0);
1995}
yl15005183476012006-11-13 20:44:19 -08001996
Eric Chengda14ceb2008-12-04 18:16:10 -08001997boolean_t
1998ill_mac_perim_held(ill_t *ill)
1999{
2000 ill_dld_capab_t *idc = ill->ill_dld_capab;
2001
2002 return (idc->idc_capab_df(idc->idc_capab_dh, DLD_CAPAB_PERIM, NULL,
2003 DLD_QUERY));
2004}
2005
2006static void
2007ill_capability_direct_enable(ill_t *ill)
2008{
2009 ill_dld_capab_t *idc = ill->ill_dld_capab;
2010 ill_dld_direct_t *idd = &idc->idc_direct;
2011 dld_capab_direct_t direct;
2012 int rc;
2013
2014 ASSERT(!ill->ill_isv6 && IAM_WRITER_ILL(ill));
2015
2016 bzero(&direct, sizeof (direct));
2017 direct.di_rx_cf = (uintptr_t)ip_input;
2018 direct.di_rx_ch = ill;
2019
2020 rc = idc->idc_capab_df(idc->idc_capab_dh, DLD_CAPAB_DIRECT, &direct,
2021 DLD_ENABLE);
2022 if (rc == 0) {
2023 idd->idd_tx_df = (ip_dld_tx_t)direct.di_tx_df;
2024 idd->idd_tx_dh = direct.di_tx_dh;
2025 idd->idd_tx_cb_df = (ip_dld_callb_t)direct.di_tx_cb_df;
2026 idd->idd_tx_cb_dh = direct.di_tx_cb_dh;
Venugopal Iyerae6aa222009-02-17 01:31:30 -08002027 idd->idd_tx_fctl_df = (ip_dld_fctl_t)direct.di_tx_fctl_df;
2028 idd->idd_tx_fctl_dh = direct.di_tx_fctl_dh;
Cathy Zhou79eeb642009-05-27 11:56:36 -07002029 ASSERT(idd->idd_tx_cb_df != NULL);
2030 ASSERT(idd->idd_tx_fctl_df != NULL);
2031 ASSERT(idd->idd_tx_df != NULL);
Eric Chengda14ceb2008-12-04 18:16:10 -08002032 /*
2033 * One time registration of flow enable callback function
2034 */
2035 ill->ill_flownotify_mh = idd->idd_tx_cb_df(idd->idd_tx_cb_dh,
2036 ill_flow_enable, ill);
2037 ill->ill_capabilities |= ILL_CAPAB_DLD_DIRECT;
2038 DTRACE_PROBE1(direct_on, (ill_t *), ill);
yl15005183476012006-11-13 20:44:19 -08002039 } else {
Eric Chengda14ceb2008-12-04 18:16:10 -08002040 cmn_err(CE_WARN, "warning: could not enable DIRECT "
2041 "capability, rc = %d\n", rc);
2042 DTRACE_PROBE2(direct_off, (ill_t *), ill, (int), rc);
yl15005183476012006-11-13 20:44:19 -08002043 }
2044}
2045
yl15005183476012006-11-13 20:44:19 -08002046static void
Eric Chengda14ceb2008-12-04 18:16:10 -08002047ill_capability_poll_enable(ill_t *ill)
yl15005183476012006-11-13 20:44:19 -08002048{
Eric Chengda14ceb2008-12-04 18:16:10 -08002049 ill_dld_capab_t *idc = ill->ill_dld_capab;
2050 dld_capab_poll_t poll;
2051 int rc;
yl15005183476012006-11-13 20:44:19 -08002052
Eric Chengda14ceb2008-12-04 18:16:10 -08002053 ASSERT(!ill->ill_isv6 && IAM_WRITER_ILL(ill));
yl15005183476012006-11-13 20:44:19 -08002054
Eric Chengda14ceb2008-12-04 18:16:10 -08002055 bzero(&poll, sizeof (poll));
2056 poll.poll_ring_add_cf = (uintptr_t)ip_squeue_add_ring;
2057 poll.poll_ring_remove_cf = (uintptr_t)ip_squeue_clean_ring;
2058 poll.poll_ring_quiesce_cf = (uintptr_t)ip_squeue_quiesce_ring;
2059 poll.poll_ring_restart_cf = (uintptr_t)ip_squeue_restart_ring;
2060 poll.poll_ring_bind_cf = (uintptr_t)ip_squeue_bind_ring;
2061 poll.poll_ring_ch = ill;
2062 rc = idc->idc_capab_df(idc->idc_capab_dh, DLD_CAPAB_POLL, &poll,
2063 DLD_ENABLE);
2064 if (rc == 0) {
2065 ill->ill_capabilities |= ILL_CAPAB_DLD_POLL;
2066 DTRACE_PROBE1(poll_on, (ill_t *), ill);
2067 } else {
2068 ip1dbg(("warning: could not enable POLL "
2069 "capability, rc = %d\n", rc));
2070 DTRACE_PROBE2(poll_off, (ill_t *), ill, (int), rc);
2071 }
2072}
yl15005183476012006-11-13 20:44:19 -08002073
Eric Chengda14ceb2008-12-04 18:16:10 -08002074/*
2075 * Enable the LSO capability.
2076 */
2077static void
2078ill_capability_lso_enable(ill_t *ill)
2079{
2080 ill_dld_capab_t *idc = ill->ill_dld_capab;
2081 dld_capab_lso_t lso;
2082 int rc;
yl15005183476012006-11-13 20:44:19 -08002083
Robert Mustacchi62366fb2020-04-01 15:30:20 +00002084 ASSERT(IAM_WRITER_ILL(ill));
Eric Chengda14ceb2008-12-04 18:16:10 -08002085
2086 if (ill->ill_lso_capab == NULL) {
2087 ill->ill_lso_capab = kmem_zalloc(sizeof (ill_lso_capab_t),
2088 KM_NOSLEEP);
2089 if (ill->ill_lso_capab == NULL) {
2090 cmn_err(CE_WARN, "ill_capability_lso_enable: "
2091 "could not enable LSO for %s (ENOMEM)\n",