stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
ja97890 | fecf4ec | 2006-02-07 02:27:51 -0800 | [diff] [blame] | 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 21 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 22 | /* |
meem | 66b718c | 2010-04-12 21:02:11 -0400 | [diff] [blame] | 23 | * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. |
| 24 | * Copyright (c) 1990 Mentat Inc. |
Dan McDonald | 7199b8e | 2017-02-01 14:55:57 -0500 | [diff] [blame] | 25 | * Copyright (c) 2017 OmniTI Computer Consulting, Inc. All rights reserved. |
Daniel Hoffman | 48bbca8 | 2017-02-17 11:48:20 -0800 | [diff] [blame] | 26 | * Copyright (c) 2016 by Delphix. All rights reserved. |
Andy Fiddaman | 221e47f | 2020-09-18 20:04:57 +0000 | [diff] [blame] | 27 | * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. |
Dan McDonald | 9495f63 | 2021-10-30 00:14:30 -0400 | [diff] [blame] | 28 | * Copyright 2021 Joyent, Inc. |
Robert Mustacchi | 0accf55 | 2022-10-22 23:25:41 +0000 | [diff] [blame] | 29 | * Copyright 2022 Oxide Computer Company |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 30 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 31 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 32 | #include <sys/types.h> |
| 33 | #include <sys/stream.h> |
| 34 | #include <sys/dlpi.h> |
| 35 | #include <sys/stropts.h> |
| 36 | #include <sys/sysmacros.h> |
| 37 | #include <sys/strsubr.h> |
| 38 | #include <sys/strlog.h> |
| 39 | #include <sys/strsun.h> |
| 40 | #include <sys/zone.h> |
| 41 | #define _SUN_TPI_VERSION 2 |
| 42 | #include <sys/tihdr.h> |
| 43 | #include <sys/xti_inet.h> |
| 44 | #include <sys/ddi.h> |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 45 | #include <sys/suntpi.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 46 | #include <sys/cmn_err.h> |
| 47 | #include <sys/debug.h> |
| 48 | #include <sys/kobj.h> |
| 49 | #include <sys/modctl.h> |
| 50 | #include <sys/atomic.h> |
| 51 | #include <sys/policy.h> |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 52 | #include <sys/priv.h> |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 53 | #include <sys/taskq.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 54 | |
| 55 | #include <sys/systm.h> |
| 56 | #include <sys/param.h> |
| 57 | #include <sys/kmem.h> |
dr146992 | 381a2a9 | 2006-10-20 16:37:58 -0700 | [diff] [blame] | 58 | #include <sys/sdt.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 59 | #include <sys/socket.h> |
| 60 | #include <sys/vtrace.h> |
| 61 | #include <sys/isa_defs.h> |
dr146992 | 1b47e08 | 2008-01-20 23:43:45 -0800 | [diff] [blame] | 62 | #include <sys/mac.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 63 | #include <net/if.h> |
| 64 | #include <net/if_arp.h> |
| 65 | #include <net/route.h> |
| 66 | #include <sys/sockio.h> |
| 67 | #include <netinet/in.h> |
| 68 | #include <net/if_dl.h> |
| 69 | |
| 70 | #include <inet/common.h> |
| 71 | #include <inet/mi.h> |
| 72 | #include <inet/mib2.h> |
| 73 | #include <inet/nd.h> |
| 74 | #include <inet/arp.h> |
| 75 | #include <inet/snmpcom.h> |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 76 | #include <inet/optcom.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 77 | #include <inet/kstatcom.h> |
| 78 | |
| 79 | #include <netinet/igmp_var.h> |
| 80 | #include <netinet/ip6.h> |
| 81 | #include <netinet/icmp6.h> |
| 82 | #include <netinet/sctp.h> |
| 83 | |
| 84 | #include <inet/ip.h> |
masputra | ff550d0 | 2005-10-22 22:50:14 -0700 | [diff] [blame] | 85 | #include <inet/ip_impl.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 86 | #include <inet/ip6.h> |
| 87 | #include <inet/ip6_asp.h> |
| 88 | #include <inet/tcp.h> |
masputra | ff550d0 | 2005-10-22 22:50:14 -0700 | [diff] [blame] | 89 | #include <inet/tcp_impl.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 90 | #include <inet/ip_multi.h> |
| 91 | #include <inet/ip_if.h> |
| 92 | #include <inet/ip_ire.h> |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 93 | #include <inet/ip_ftable.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 94 | #include <inet/ip_rts.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 95 | #include <inet/ip_ndp.h> |
| 96 | #include <inet/ip_listutils.h> |
| 97 | #include <netinet/igmp.h> |
| 98 | #include <netinet/ip_mroute.h> |
| 99 | #include <inet/ipp_common.h> |
Sebastien Roy | 45a4b79 | 2017-08-01 13:21:40 -0400 | [diff] [blame] | 100 | #include <inet/cc.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 101 | |
| 102 | #include <net/pfkeyv2.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 103 | #include <inet/sadb.h> |
| 104 | #include <inet/ipsec_impl.h> |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 105 | #include <inet/iptun/iptun_impl.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 106 | #include <inet/ipdrop.h> |
dr146992 | 381a2a9 | 2006-10-20 16:37:58 -0700 | [diff] [blame] | 107 | #include <inet/ip_netinfo.h> |
Sangeeta Misra | dbed73c | 2009-11-03 23:15:19 -0800 | [diff] [blame] | 108 | #include <inet/ilb_ip.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 109 | |
| 110 | #include <sys/ethernet.h> |
| 111 | #include <net/if_types.h> |
| 112 | #include <sys/cpuvar.h> |
| 113 | |
| 114 | #include <ipp/ipp.h> |
| 115 | #include <ipp/ipp_impl.h> |
| 116 | #include <ipp/ipgpc/ipgpc.h> |
| 117 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 118 | #include <sys/pattr.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 119 | #include <inet/ipclassifier.h> |
| 120 | #include <inet/sctp_ip.h> |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 121 | #include <inet/sctp/sctp_impl.h> |
masputra | ff550d0 | 2005-10-22 22:50:14 -0700 | [diff] [blame] | 122 | #include <inet/udp_impl.h> |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 123 | #include <inet/rawip_impl.h> |
| 124 | #include <inet/rts_impl.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 125 | |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 126 | #include <sys/tsol/label.h> |
| 127 | #include <sys/tsol/tnet.h> |
| 128 | |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 129 | #include <sys/squeue_impl.h> |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 130 | #include <inet/ip_arp.h> |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 131 | |
Erik Nordmark | b36a561 | 2009-11-19 11:04:40 -0800 | [diff] [blame] | 132 | #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ |
| 133 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 134 | /* |
| 135 | * Values for squeue switch: |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 136 | * IP_SQUEUE_ENTER_NODRAIN: SQ_NODRAIN |
| 137 | * IP_SQUEUE_ENTER: SQ_PROCESS |
| 138 | * IP_SQUEUE_FILL: SQ_FILL |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 139 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 140 | int ip_squeue_enter = IP_SQUEUE_ENTER; /* Setable in /etc/system */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 141 | |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 142 | int ip_squeue_flag; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 143 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 144 | /* |
| 145 | * Setable in /etc/system |
| 146 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 147 | int ip_poll_normal_ms = 100; |
| 148 | int ip_poll_normal_ticks = 0; |
yz147064 | e717623 | 2006-12-13 10:43:15 -0800 | [diff] [blame] | 149 | int ip_modclose_ackwait_ms = 3000; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 150 | |
| 151 | /* |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 152 | * It would be nice to have these present only in DEBUG systems, but the |
| 153 | * current design of the global symbol checking logic requires them to be |
| 154 | * unconditionally present. |
| 155 | */ |
| 156 | uint_t ip_thread_data; /* TSD key for debug support */ |
| 157 | krwlock_t ip_thread_rwlock; |
| 158 | list_t ip_thread_list; |
| 159 | |
| 160 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 161 | * Structure to represent a linked list of msgblks. Used by ip_snmp_ functions. |
| 162 | */ |
| 163 | |
| 164 | struct listptr_s { |
| 165 | mblk_t *lp_head; /* pointer to the head of the list */ |
| 166 | mblk_t *lp_tail; /* pointer to the tail of the list */ |
| 167 | }; |
| 168 | |
| 169 | typedef struct listptr_s listptr_t; |
| 170 | |
| 171 | /* |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 172 | * This is used by ip_snmp_get_mib2_ip_route_media and |
| 173 | * ip_snmp_get_mib2_ip6_route_media to carry the lists of return data. |
| 174 | */ |
| 175 | typedef struct iproutedata_s { |
| 176 | uint_t ird_idx; |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 177 | uint_t ird_flags; /* see below */ |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 178 | listptr_t ird_route; /* ipRouteEntryTable */ |
| 179 | listptr_t ird_netmedia; /* ipNetToMediaEntryTable */ |
| 180 | listptr_t ird_attrs; /* ipRouteAttributeTable */ |
| 181 | } iproutedata_t; |
| 182 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 183 | /* Include ire_testhidden and IRE_IF_CLONE routes */ |
| 184 | #define IRD_REPORT_ALL 0x01 |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 185 | |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 186 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 187 | * Cluster specific hooks. These should be NULL when booted as a non-cluster |
| 188 | */ |
| 189 | |
| 190 | /* |
| 191 | * Hook functions to enable cluster networking |
| 192 | * On non-clustered systems these vectors must always be NULL. |
| 193 | * |
| 194 | * Hook function to Check ip specified ip address is a shared ip address |
| 195 | * in the cluster |
| 196 | * |
| 197 | */ |
Lu Huafeng | 8e4b770 | 2008-12-17 12:37:29 +0800 | [diff] [blame] | 198 | int (*cl_inet_isclusterwide)(netstackid_t stack_id, uint8_t protocol, |
| 199 | sa_family_t addr_family, uint8_t *laddrp, void *args) = NULL; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 200 | |
| 201 | /* |
| 202 | * Hook function to generate cluster wide ip fragment identifier |
| 203 | */ |
Lu Huafeng | 8e4b770 | 2008-12-17 12:37:29 +0800 | [diff] [blame] | 204 | uint32_t (*cl_inet_ipident)(netstackid_t stack_id, uint8_t protocol, |
| 205 | sa_family_t addr_family, uint8_t *laddrp, uint8_t *faddrp, |
| 206 | void *args) = NULL; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 207 | |
| 208 | /* |
Thejaswini Singarajipura | 9c2c14a | 2008-09-29 19:18:37 -0400 | [diff] [blame] | 209 | * Hook function to generate cluster wide SPI. |
| 210 | */ |
Lu Huafeng | 8e4b770 | 2008-12-17 12:37:29 +0800 | [diff] [blame] | 211 | void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t, |
| 212 | void *) = NULL; |
Thejaswini Singarajipura | 9c2c14a | 2008-09-29 19:18:37 -0400 | [diff] [blame] | 213 | |
| 214 | /* |
| 215 | * Hook function to verify if the SPI is already utlized. |
| 216 | */ |
| 217 | |
Lu Huafeng | 8e4b770 | 2008-12-17 12:37:29 +0800 | [diff] [blame] | 218 | int (*cl_inet_checkspi)(netstackid_t, uint8_t, uint32_t, void *) = NULL; |
Thejaswini Singarajipura | 9c2c14a | 2008-09-29 19:18:37 -0400 | [diff] [blame] | 219 | |
| 220 | /* |
| 221 | * Hook function to delete the SPI from the cluster wide repository. |
| 222 | */ |
| 223 | |
Lu Huafeng | 8e4b770 | 2008-12-17 12:37:29 +0800 | [diff] [blame] | 224 | void (*cl_inet_deletespi)(netstackid_t, uint8_t, uint32_t, void *) = NULL; |
Thejaswini Singarajipura | 9c2c14a | 2008-09-29 19:18:37 -0400 | [diff] [blame] | 225 | |
| 226 | /* |
| 227 | * Hook function to inform the cluster when packet received on an IDLE SA |
| 228 | */ |
| 229 | |
Lu Huafeng | 8e4b770 | 2008-12-17 12:37:29 +0800 | [diff] [blame] | 230 | void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t, sa_family_t, |
| 231 | in6_addr_t, in6_addr_t, void *) = NULL; |
Thejaswini Singarajipura | 9c2c14a | 2008-09-29 19:18:37 -0400 | [diff] [blame] | 232 | |
| 233 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 234 | * Synchronization notes: |
| 235 | * |
| 236 | * IP is a fully D_MP STREAMS module/driver. Thus it does not depend on any |
| 237 | * MT level protection given by STREAMS. IP uses a combination of its own |
| 238 | * internal serialization mechanism and standard Solaris locking techniques. |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 239 | * The internal serialization is per phyint. This is used to serialize |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 240 | * plumbing operations, IPMP operations, most set ioctls, etc. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 241 | * |
| 242 | * Plumbing is a long sequence of operations involving message |
| 243 | * exchanges between IP, ARP and device drivers. Many set ioctls are typically |
| 244 | * involved in plumbing operations. A natural model is to serialize these |
| 245 | * ioctls one per ill. For example plumbing of hme0 and qfe0 can go on in |
| 246 | * parallel without any interference. But various set ioctls on hme0 are best |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 247 | * serialized, along with IPMP operations and processing of DLPI control |
| 248 | * messages received from drivers on a per phyint basis. This serialization is |
| 249 | * provided by the ipsq_t and primitives operating on this. Details can |
| 250 | * be found in ip_if.c above the core primitives operating on ipsq_t. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 251 | * |
| 252 | * Lookups of an ipif or ill by a thread return a refheld ipif / ill. |
| 253 | * Simiarly lookup of an ire by a thread also returns a refheld ire. |
| 254 | * In addition ipif's and ill's referenced by the ire are also indirectly |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 255 | * refheld. Thus no ipif or ill can vanish as long as an ipif is refheld |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 256 | * directly or indirectly. For example an SIOCSLIFADDR ioctl that changes the |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 257 | * address of an ipif has to go through the ipsq_t. This ensures that only |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 258 | * one such exclusive operation proceeds at any time on the ipif. It then |
| 259 | * waits for all refcnts |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 260 | * associated with this ipif to come down to zero. The address is changed |
| 261 | * only after the ipif has been quiesced. Then the ipif is brought up again. |
| 262 | * More details are described above the comment in ip_sioctl_flags. |
| 263 | * |
| 264 | * Packet processing is based mostly on IREs and are fully multi-threaded |
| 265 | * using standard Solaris MT techniques. |
| 266 | * |
| 267 | * There are explicit locks in IP to handle: |
| 268 | * - The ip_g_head list maintained by mi_open_link() and friends. |
| 269 | * |
| 270 | * - The reassembly data structures (one lock per hash bucket) |
| 271 | * |
| 272 | * - conn_lock is meant to protect conn_t fields. The fields actually |
| 273 | * protected by conn_lock are documented in the conn_t definition. |
| 274 | * |
| 275 | * - ire_lock to protect some of the fields of the ire, IRE tables |
| 276 | * (one lock per hash bucket). Refer to ip_ire.c for details. |
| 277 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 278 | * - ndp_g_lock and ncec_lock for protecting NCEs. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 279 | * |
| 280 | * - ill_lock protects fields of the ill and ipif. Details in ip.h |
| 281 | * |
| 282 | * - ill_g_lock: This is a global reader/writer lock. Protects the following |
| 283 | * * The AVL tree based global multi list of all ills. |
| 284 | * * The linked list of all ipifs of an ill |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 285 | * * The <ipsq-xop> mapping |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 286 | * * <ill-phyint> association |
| 287 | * Insertion/deletion of an ill in the system, insertion/deletion of an ipif |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 288 | * into an ill, changing the <ipsq-xop> mapping of an ill, changing the |
| 289 | * <ill-phyint> assoc of an ill will all have to hold the ill_g_lock as |
| 290 | * writer for the actual duration of the insertion/deletion/change. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 291 | * |
| 292 | * - ill_lock: This is a per ill mutex. |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 293 | * It protects some members of the ill_t struct; see ip.h for details. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 294 | * It also protects the <ill-phyint> assoc. |
| 295 | * It also protects the list of ipifs hanging off the ill. |
| 296 | * |
| 297 | * - ipsq_lock: This is a per ipsq_t mutex lock. |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 298 | * This protects some members of the ipsq_t struct; see ip.h for details. |
| 299 | * It also protects the <ipsq-ipxop> mapping |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 300 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 301 | * - ipx_lock: This is a per ipxop_t mutex lock. |
| 302 | * This protects some members of the ipxop_t struct; see ip.h for details. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 303 | * |
| 304 | * - phyint_lock: This is a per phyint mutex lock. Protects just the |
| 305 | * phyint_flags |
| 306 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 307 | * - ip_addr_avail_lock: This is used to ensure the uniqueness of IP addresses. |
| 308 | * This lock is held in ipif_up_done and the ipif is marked IPIF_UP and the |
| 309 | * uniqueness check also done atomically. |
| 310 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 311 | * - ill_g_usesrc_lock: This readers/writer lock protects the usesrc |
| 312 | * group list linked by ill_usesrc_grp_next. It also protects the |
| 313 | * ill_usesrc_ifindex field. It is taken as a writer when a member of the |
| 314 | * group is being added or deleted. This lock is taken as a reader when |
| 315 | * walking the list/group(eg: to get the number of members in a usesrc group). |
| 316 | * Note, it is only necessary to take this lock if the ill_usesrc_grp_next |
| 317 | * field is changing state i.e from NULL to non-NULL or vice-versa. For |
| 318 | * example, it is not necessary to take this lock in the initial portion |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 319 | * of ip_sioctl_slifusesrc or at all in ip_sioctl_flags since these |
| 320 | * operations are executed exclusively and that ensures that the "usesrc |
| 321 | * group state" cannot change. The "usesrc group state" change can happen |
| 322 | * only in the latter part of ip_sioctl_slifusesrc and in ill_delete. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 323 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 324 | * Changing <ill-phyint>, <ipsq-xop> assocications: |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 325 | * |
| 326 | * To change the <ill-phyint> association, the ill_g_lock must be held |
| 327 | * as writer, and the ill_locks of both the v4 and v6 instance of the ill |
| 328 | * must be held. |
| 329 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 330 | * To change the <ipsq-xop> association, the ill_g_lock must be held as |
| 331 | * writer, the ipsq_lock must be held, and one must be writer on the ipsq. |
| 332 | * This is only done when ills are added or removed from IPMP groups. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 333 | * |
| 334 | * To add or delete an ipif from the list of ipifs hanging off the ill, |
| 335 | * ill_g_lock (writer) and ill_lock must be held and the thread must be |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 336 | * a writer on the associated ipsq. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 337 | * |
| 338 | * To add or delete an ill to the system, the ill_g_lock must be held as |
| 339 | * writer and the thread must be a writer on the associated ipsq. |
| 340 | * |
| 341 | * To add or delete an ilm to an ill, the ill_lock must be held and the thread |
| 342 | * must be a writer on the associated ipsq. |
| 343 | * |
| 344 | * Lock hierarchy |
| 345 | * |
| 346 | * Some lock hierarchy scenarios are listed below. |
| 347 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 348 | * ill_g_lock -> conn_lock -> ill_lock -> ipsq_lock -> ipx_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 349 | * ill_g_lock -> ill_lock(s) -> phyint_lock |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 350 | * ill_g_lock -> ndp_g_lock -> ill_lock -> ncec_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 351 | * ill_g_lock -> ip_addr_avail_lock |
| 352 | * conn_lock -> irb_lock -> ill_lock -> ire_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 353 | * ill_g_lock -> ip_g_nd_lock |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 354 | * ill_g_lock -> ips_ipmp_lock -> ill_lock -> nce_lock |
| 355 | * ill_g_lock -> ndp_g_lock -> ill_lock -> ncec_lock -> nce_lock |
| 356 | * arl_lock -> ill_lock |
| 357 | * ips_ire_dep_lock -> irb_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 358 | * |
| 359 | * When more than 1 ill lock is needed to be held, all ill lock addresses |
| 360 | * are sorted on address and locked starting from highest addressed lock |
| 361 | * downward. |
| 362 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 363 | * Multicast scenarios |
| 364 | * ips_ill_g_lock -> ill_mcast_lock |
| 365 | * conn_ilg_lock -> ips_ill_g_lock -> ill_lock |
| 366 | * ill_mcast_serializer -> ill_mcast_lock -> ips_ipmp_lock -> ill_lock |
| 367 | * ill_mcast_serializer -> ill_mcast_lock -> connf_lock -> conn_lock |
| 368 | * ill_mcast_serializer -> ill_mcast_lock -> conn_ilg_lock |
| 369 | * ill_mcast_serializer -> ill_mcast_lock -> ips_igmp_timer_lock |
| 370 | * |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 371 | * IPsec scenarios |
| 372 | * |
| 373 | * ipsa_lock -> ill_g_lock -> ill_lock |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 374 | * ill_g_usesrc_lock -> ill_g_lock -> ill_lock |
| 375 | * |
| 376 | * Trusted Solaris scenarios |
| 377 | * |
| 378 | * igsa_lock -> gcgrp_rwlock -> gcgrp_lock |
| 379 | * igsa_lock -> gcdb_lock |
| 380 | * gcgrp_rwlock -> ire_lock |
| 381 | * gcgrp_rwlock -> gcdb_lock |
| 382 | * |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 383 | * squeue(sq_lock), flow related (ft_lock, fe_lock) locking |
| 384 | * |
| 385 | * cpu_lock --> ill_lock --> sqset_lock --> sq_lock |
| 386 | * sq_lock -> conn_lock -> QLOCK(q) |
| 387 | * ill_lock -> ft_lock -> fe_lock |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 388 | * |
| 389 | * Routing/forwarding table locking notes: |
| 390 | * |
| 391 | * Lock acquisition order: Radix tree lock, irb_lock. |
| 392 | * Requirements: |
| 393 | * i. Walker must not hold any locks during the walker callback. |
| 394 | * ii Walker must not see a truncated tree during the walk because of any node |
| 395 | * deletion. |
| 396 | * iii Existing code assumes ire_bucket is valid if it is non-null and is used |
| 397 | * in many places in the code to walk the irb list. Thus even if all the |
| 398 | * ires in a bucket have been deleted, we still can't free the radix node |
| 399 | * until the ires have actually been inactive'd (freed). |
| 400 | * |
| 401 | * Tree traversal - Need to hold the global tree lock in read mode. |
| 402 | * Before dropping the global tree lock, need to either increment the ire_refcnt |
| 403 | * to ensure that the radix node can't be deleted. |
| 404 | * |
| 405 | * Tree add - Need to hold the global tree lock in write mode to add a |
| 406 | * radix node. To prevent the node from being deleted, increment the |
| 407 | * irb_refcnt, after the node is added to the tree. The ire itself is |
| 408 | * added later while holding the irb_lock, but not the tree lock. |
| 409 | * |
| 410 | * Tree delete - Need to hold the global tree lock and irb_lock in write mode. |
| 411 | * All associated ires must be inactive (i.e. freed), and irb_refcnt |
| 412 | * must be zero. |
| 413 | * |
| 414 | * Walker - Increment irb_refcnt before calling the walker callback. Hold the |
| 415 | * global tree lock (read mode) for traversal. |
| 416 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 417 | * IRE dependencies - In some cases we hold ips_ire_dep_lock across ire_refrele |
| 418 | * hence we will acquire irb_lock while holding ips_ire_dep_lock. |
| 419 | * |
danmcd | 437220c | 2007-09-04 06:48:33 -0700 | [diff] [blame] | 420 | * IPsec notes : |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 421 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 422 | * IP interacts with the IPsec code (AH/ESP) by storing IPsec attributes |
| 423 | * in the ip_xmit_attr_t ip_recv_attr_t. For outbound datagrams, the |
| 424 | * ip_xmit_attr_t has the |
danmcd | 437220c | 2007-09-04 06:48:33 -0700 | [diff] [blame] | 425 | * information used by the IPsec code for applying the right level of |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 426 | * protection. The information initialized by IP in the ip_xmit_attr_t |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 427 | * is determined by the per-socket policy or global policy in the system. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 428 | * For inbound datagrams, the ip_recv_attr_t |
| 429 | * starts out with nothing in it. It gets filled |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 430 | * with the right information if it goes through the AH/ESP code, which |
| 431 | * happens if the incoming packet is secure. The information initialized |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 432 | * by AH/ESP, is later used by IP (during fanouts to ULP) to see whether |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 433 | * the policy requirements needed by per-socket policy or global policy |
| 434 | * is met or not. |
| 435 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 436 | * For fully connected sockets i.e dst, src [addr, port] is known, |
| 437 | * conn_policy_cached is set indicating that policy has been cached. |
| 438 | * conn_in_enforce_policy may or may not be set depending on whether |
| 439 | * there is a global policy match or per-socket policy match. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 440 | * Policy inheriting happpens in ip_policy_set once the destination is known. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 441 | * Once the right policy is set on the conn_t, policy cannot change for |
| 442 | * this socket. This makes life simpler for TCP (UDP ?) where |
| 443 | * re-transmissions go out with the same policy. For symmetry, policy |
| 444 | * is cached for fully connected UDP sockets also. Thus if policy is cached, |
| 445 | * it also implies that policy is latched i.e policy cannot change |
| 446 | * on these sockets. As we have the right policy on the conn, we don't |
| 447 | * have to lookup global policy for every outbound and inbound datagram |
| 448 | * and thus serving as an optimization. Note that a global policy change |
| 449 | * does not affect fully connected sockets if they have policy. If fully |
| 450 | * connected sockets did not have any policy associated with it, global |
| 451 | * policy change may affect them. |
| 452 | * |
| 453 | * IP Flow control notes: |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 454 | * --------------------- |
| 455 | * Non-TCP streams are flow controlled by IP. The way this is accomplished |
| 456 | * differs when ILL_CAPAB_DLD_DIRECT is enabled for that IP instance. When |
| 457 | * ILL_DIRECT_CAPABLE(ill) is TRUE, IP can do direct function calls into |
| 458 | * GLDv3. Otherwise packets are sent down to lower layers using STREAMS |
| 459 | * functions. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 460 | * |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 461 | * Per Tx ring udp flow control: |
| 462 | * This is applicable only when ILL_CAPAB_DLD_DIRECT capability is set in |
| 463 | * the ill (i.e. ILL_DIRECT_CAPABLE(ill) is true). |
| 464 | * |
| 465 | * The underlying link can expose multiple Tx rings to the GLDv3 mac layer. |
| 466 | * To achieve best performance, outgoing traffic need to be fanned out among |
| 467 | * these Tx ring. mac_tx() is called (via str_mdata_fastpath_put()) to send |
| 468 | * traffic out of the NIC and it takes a fanout hint. UDP connections pass |
| 469 | * the address of connp as fanout hint to mac_tx(). Under flow controlled |
| 470 | * condition, mac_tx() returns a non-NULL cookie (ip_mac_tx_cookie_t). This |
| 471 | * cookie points to a specific Tx ring that is blocked. The cookie is used to |
| 472 | * hash into an idl_tx_list[] entry in idl_tx_list[] array. Each idl_tx_list_t |
| 473 | * point to drain_lists (idl_t's). These drain list will store the blocked UDP |
| 474 | * connp's. The drain list is not a single list but a configurable number of |
| 475 | * lists. |
| 476 | * |
| 477 | * The diagram below shows idl_tx_list_t's and their drain_lists. ip_stack_t |
| 478 | * has an array of idl_tx_list_t. The size of the array is TX_FANOUT_SIZE |
| 479 | * which is equal to 128. This array in turn contains a pointer to idl_t[], |
| 480 | * the ip drain list. The idl_t[] array size is MIN(max_ncpus, 8). The drain |
| 481 | * list will point to the list of connp's that are flow controlled. |
| 482 | * |
| 483 | * --------------- ------- ------- ------- |
| 484 | * |->|drain_list[0]|-->|connp|-->|connp|-->|connp|--> |
| 485 | * | --------------- ------- ------- ------- |
| 486 | * | --------------- ------- ------- ------- |
| 487 | * |->|drain_list[1]|-->|connp|-->|connp|-->|connp|--> |
| 488 | * ---------------- | --------------- ------- ------- ------- |
| 489 | * |idl_tx_list[0]|->| --------------- ------- ------- ------- |
| 490 | * ---------------- |->|drain_list[2]|-->|connp|-->|connp|-->|connp|--> |
| 491 | * | --------------- ------- ------- ------- |
| 492 | * . . . . . |
| 493 | * | --------------- ------- ------- ------- |
| 494 | * |->|drain_list[n]|-->|connp|-->|connp|-->|connp|--> |
| 495 | * --------------- ------- ------- ------- |
| 496 | * --------------- ------- ------- ------- |
| 497 | * |->|drain_list[0]|-->|connp|-->|connp|-->|connp|--> |
| 498 | * | --------------- ------- ------- ------- |
| 499 | * | --------------- ------- ------- ------- |
| 500 | * ---------------- |->|drain_list[1]|-->|connp|-->|connp|-->|connp|--> |
| 501 | * |idl_tx_list[1]|->| --------------- ------- ------- ------- |
| 502 | * ---------------- | . . . . |
| 503 | * | --------------- ------- ------- ------- |
| 504 | * |->|drain_list[n]|-->|connp|-->|connp|-->|connp|--> |
| 505 | * --------------- ------- ------- ------- |
| 506 | * ..... |
| 507 | * ---------------- |
| 508 | * |idl_tx_list[n]|-> ... |
| 509 | * ---------------- |
| 510 | * |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 511 | * When mac_tx() returns a cookie, the cookie is hashed into an index into |
| 512 | * ips_idl_tx_list[], and conn_drain_insert() is called with the idl_tx_list |
| 513 | * to insert the conn onto. conn_drain_insert() asserts flow control for the |
| 514 | * sockets via su_txq_full() (non-STREAMS) or QFULL on conn_wq (STREAMS). |
| 515 | * Further, conn_blocked is set to indicate that the conn is blocked. |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 516 | * |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 517 | * GLDv3 calls ill_flow_enable() when flow control is relieved. The cookie |
| 518 | * passed in the call to ill_flow_enable() identifies the blocked Tx ring and |
| 519 | * is again hashed to locate the appropriate idl_tx_list, which is then |
| 520 | * drained via conn_walk_drain(). conn_walk_drain() goes through each conn in |
| 521 | * the drain list and calls conn_drain_remove() to clear flow control (via |
| 522 | * calling su_txq_full() or clearing QFULL), and remove the conn from the |
| 523 | * drain list. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 524 | * |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 525 | * Note that the drain list is not a single list but a (configurable) array of |
| 526 | * lists (8 elements by default). Synchronization between drain insertion and |
| 527 | * flow control wakeup is handled by using idl_txl->txl_lock, and only |
| 528 | * conn_drain_insert() and conn_drain_remove() manipulate the drain list. |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 529 | * |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 530 | * Flow control via STREAMS is used when ILL_DIRECT_CAPABLE() returns FALSE. |
| 531 | * On the send side, if the packet cannot be sent down to the driver by IP |
| 532 | * (canput() fails), ip_xmit() drops the packet and returns EWOULDBLOCK to the |
| 533 | * caller, who may then invoke ixa_check_drain_insert() to insert the conn on |
| 534 | * the 0'th drain list. When ip_wsrv() runs on the ill_wq because flow |
| 535 | * control has been relieved, the blocked conns in the 0'th drain list are |
| 536 | * drained as in the non-STREAMS case. |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 537 | * |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 538 | * In both the STREAMS and non-STREAMS cases, the sockfs upcall to set QFULL |
| 539 | * is done when the conn is inserted into the drain list (conn_drain_insert()) |
| 540 | * and cleared when the conn is removed from the it (conn_drain_remove()). |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 541 | * |
| 542 | * IPQOS notes: |
| 543 | * |
| 544 | * IPQoS Policies are applied to packets using IPPF (IP Policy framework) |
| 545 | * and IPQoS modules. IPPF includes hooks in IP at different control points |
| 546 | * (callout positions) which direct packets to IPQoS modules for policy |
| 547 | * processing. Policies, if present, are global. |
| 548 | * |
| 549 | * The callout positions are located in the following paths: |
| 550 | * o local_in (packets destined for this host) |
| 551 | * o local_out (packets orginating from this host ) |
| 552 | * o fwd_in (packets forwarded by this m/c - inbound) |
| 553 | * o fwd_out (packets forwarded by this m/c - outbound) |
| 554 | * Hooks at these callout points can be enabled/disabled using the ndd variable |
| 555 | * ip_policy_mask (a bit mask with the 4 LSB indicating the callout positions). |
| 556 | * By default all the callout positions are enabled. |
| 557 | * |
| 558 | * Outbound (local_out) |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 559 | * Hooks are placed in ire_send_wire_v4 and ire_send_wire_v6. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 560 | * |
| 561 | * Inbound (local_in) |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 562 | * Hooks are placed in ip_fanout_v4 and ip_fanout_v6. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 563 | * |
| 564 | * Forwarding (in and out) |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 565 | * Hooks are placed in ire_recv_forward_v4/v6. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 566 | * |
| 567 | * IP Policy Framework processing (IPPF processing) |
| 568 | * Policy processing for a packet is initiated by ip_process, which ascertains |
| 569 | * that the classifier (ipgpc) is loaded and configured, failing which the |
| 570 | * packet resumes normal processing in IP. If the clasifier is present, the |
| 571 | * packet is acted upon by one or more IPQoS modules (action instances), per |
| 572 | * filters configured in ipgpc and resumes normal IP processing thereafter. |
| 573 | * An action instance can drop a packet in course of its processing. |
| 574 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 575 | * Zones notes: |
| 576 | * |
| 577 | * The partitioning rules for networking are as follows: |
| 578 | * 1) Packets coming from a zone must have a source address belonging to that |
| 579 | * zone. |
| 580 | * 2) Packets coming from a zone can only be sent on a physical interface on |
| 581 | * which the zone has an IP address. |
| 582 | * 3) Between two zones on the same machine, packet delivery is only allowed if |
| 583 | * there's a matching route for the destination and zone in the forwarding |
| 584 | * table. |
| 585 | * 4) The TCP and UDP port spaces are per-zone; that is, two processes in |
| 586 | * different zones can bind to the same port with the wildcard address |
| 587 | * (INADDR_ANY). |
| 588 | * |
| 589 | * The granularity of interface partitioning is at the logical interface level. |
| 590 | * Therefore, every zone has its own IP addresses, and incoming packets can be |
| 591 | * attributed to a zone unambiguously. A logical interface is placed into a zone |
| 592 | * using the SIOCSLIFZONE ioctl; this sets the ipif_zoneid field in the ipif_t |
| 593 | * structure. Rule (1) is implemented by modifying the source address selection |
| 594 | * algorithm so that the list of eligible addresses is filtered based on the |
| 595 | * sending process zone. |
| 596 | * |
| 597 | * The Internet Routing Entries (IREs) are either exclusive to a zone or shared |
| 598 | * across all zones, depending on their type. Here is the break-up: |
| 599 | * |
| 600 | * IRE type Shared/exclusive |
| 601 | * -------- ---------------- |
| 602 | * IRE_BROADCAST Exclusive |
| 603 | * IRE_DEFAULT (default routes) Shared (*) |
nordmark | 5597b60 | 2006-09-14 18:05:27 -0700 | [diff] [blame] | 604 | * IRE_LOCAL Exclusive (x) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 605 | * IRE_LOOPBACK Exclusive |
| 606 | * IRE_PREFIX (net routes) Shared (*) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 607 | * IRE_IF_NORESOLVER (interface routes) Exclusive |
| 608 | * IRE_IF_RESOLVER (interface routes) Exclusive |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 609 | * IRE_IF_CLONE (interface routes) Exclusive |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 610 | * IRE_HOST (host routes) Shared (*) |
| 611 | * |
| 612 | * (*) A zone can only use a default or off-subnet route if the gateway is |
| 613 | * directly reachable from the zone, that is, if the gateway's address matches |
| 614 | * one of the zone's logical interfaces. |
| 615 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 616 | * (x) IRE_LOCAL are handled a bit differently. |
| 617 | * When ip_restrict_interzone_loopback is set (the default), |
| 618 | * ire_route_recursive restricts loopback using an IRE_LOCAL |
nordmark | 5597b60 | 2006-09-14 18:05:27 -0700 | [diff] [blame] | 619 | * between zone to the case when L2 would have conceptually looped the packet |
| 620 | * back, i.e. the loopback which is required since neither Ethernet drivers |
| 621 | * nor Ethernet hardware loops them back. This is the case when the normal |
| 622 | * routes (ignoring IREs with different zoneids) would send out the packet on |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 623 | * the same ill as the ill with which is IRE_LOCAL is associated. |
nordmark | 5597b60 | 2006-09-14 18:05:27 -0700 | [diff] [blame] | 624 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 625 | * Multiple zones can share a common broadcast address; typically all zones |
| 626 | * share the 255.255.255.255 address. Incoming as well as locally originated |
| 627 | * broadcast packets must be dispatched to all the zones on the broadcast |
| 628 | * network. For directed broadcasts (e.g. 10.16.72.255) this is not trivial |
| 629 | * since some zones may not be on the 10.16.72/24 network. To handle this, each |
| 630 | * zone has its own set of IRE_BROADCAST entries; then, broadcast packets are |
| 631 | * sent to every zone that has an IRE_BROADCAST entry for the destination |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 632 | * address on the input ill, see ip_input_broadcast(). |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 633 | * |
| 634 | * Applications in different zones can join the same multicast group address. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 635 | * The same logic applies for multicast as for broadcast. ip_input_multicast |
| 636 | * dispatches packets to all zones that have members on the physical interface. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 637 | */ |
| 638 | |
| 639 | /* |
| 640 | * Squeue Fanout flags: |
| 641 | * 0: No fanout. |
| 642 | * 1: Fanout across all squeues |
| 643 | */ |
| 644 | boolean_t ip_squeue_fanout = 0; |
| 645 | |
| 646 | /* |
| 647 | * Maximum dups allowed per packet. |
| 648 | */ |
| 649 | uint_t ip_max_frag_dups = 10; |
| 650 | |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 651 | static int ip_open(queue_t *q, dev_t *devp, int flag, int sflag, |
| 652 | cred_t *credp, boolean_t isv6); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 653 | static mblk_t *ip_xmit_attach_llhdr(mblk_t *, nce_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 654 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 655 | static boolean_t icmp_inbound_verify_v4(mblk_t *, icmph_t *, ip_recv_attr_t *); |
| 656 | static void icmp_inbound_too_big_v4(icmph_t *, ip_recv_attr_t *); |
| 657 | static void icmp_inbound_error_fanout_v4(mblk_t *, icmph_t *, |
| 658 | ip_recv_attr_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 659 | static void icmp_options_update(ipha_t *); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 660 | static void icmp_param_problem(mblk_t *, uint8_t, ip_recv_attr_t *); |
| 661 | static void icmp_pkt(mblk_t *, void *, size_t, ip_recv_attr_t *); |
| 662 | static mblk_t *icmp_pkt_err_ok(mblk_t *, ip_recv_attr_t *); |
| 663 | static void icmp_redirect_v4(mblk_t *mp, ipha_t *, icmph_t *, |
| 664 | ip_recv_attr_t *); |
| 665 | static void icmp_send_redirect(mblk_t *, ipaddr_t, ip_recv_attr_t *); |
| 666 | static void icmp_send_reply_v4(mblk_t *, ipha_t *, icmph_t *, |
| 667 | ip_recv_attr_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 668 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 669 | mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); |
| 670 | char *ip_dot_addr(ipaddr_t, char *); |
| 671 | mblk_t *ip_carve_mp(mblk_t **, ssize_t); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 672 | static char *ip_dot_saddr(uchar_t *, char *); |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 673 | static int ip_lrput(queue_t *, mblk_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 674 | ipaddr_t ip_net_mask(ipaddr_t); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 675 | char *ip_nv_lookup(nv_t *, int); |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 676 | int ip_rput(queue_t *, mblk_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 677 | static void ip_rput_dlpi_writer(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, |
| 678 | void *dummy_arg); |
Baban Kenkre | 6f773e2 | 2010-07-23 16:52:46 -0400 | [diff] [blame] | 679 | int ip_snmp_get(queue_t *, mblk_t *, int, boolean_t); |
apersson | 3173664 | 2006-12-19 17:33:00 -0800 | [diff] [blame] | 680 | static mblk_t *ip_snmp_get_mib2_ip(queue_t *, mblk_t *, |
Baban Kenkre | 6f773e2 | 2010-07-23 16:52:46 -0400 | [diff] [blame] | 681 | mib2_ipIfStatsEntry_t *, ip_stack_t *, boolean_t); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 682 | static mblk_t *ip_snmp_get_mib2_ip_traffic_stats(queue_t *, mblk_t *, |
Baban Kenkre | 6f773e2 | 2010-07-23 16:52:46 -0400 | [diff] [blame] | 683 | ip_stack_t *, boolean_t); |
| 684 | static mblk_t *ip_snmp_get_mib2_ip6(queue_t *, mblk_t *, ip_stack_t *, |
| 685 | boolean_t); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 686 | static mblk_t *ip_snmp_get_mib2_icmp(queue_t *, mblk_t *, ip_stack_t *ipst); |
| 687 | static mblk_t *ip_snmp_get_mib2_icmp6(queue_t *, mblk_t *, ip_stack_t *ipst); |
| 688 | static mblk_t *ip_snmp_get_mib2_igmp(queue_t *, mblk_t *, ip_stack_t *ipst); |
| 689 | static mblk_t *ip_snmp_get_mib2_multi(queue_t *, mblk_t *, ip_stack_t *ipst); |
| 690 | static mblk_t *ip_snmp_get_mib2_ip_addr(queue_t *, mblk_t *, |
Baban Kenkre | 6f773e2 | 2010-07-23 16:52:46 -0400 | [diff] [blame] | 691 | ip_stack_t *ipst, boolean_t); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 692 | static mblk_t *ip_snmp_get_mib2_ip6_addr(queue_t *, mblk_t *, |
Baban Kenkre | 6f773e2 | 2010-07-23 16:52:46 -0400 | [diff] [blame] | 693 | ip_stack_t *ipst, boolean_t); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 694 | static mblk_t *ip_snmp_get_mib2_ip_group_src(queue_t *, mblk_t *, |
| 695 | ip_stack_t *ipst); |
| 696 | static mblk_t *ip_snmp_get_mib2_ip6_group_src(queue_t *, mblk_t *, |
| 697 | ip_stack_t *ipst); |
| 698 | static mblk_t *ip_snmp_get_mib2_ip_group_mem(queue_t *, mblk_t *, |
| 699 | ip_stack_t *ipst); |
| 700 | static mblk_t *ip_snmp_get_mib2_ip6_group_mem(queue_t *, mblk_t *, |
| 701 | ip_stack_t *ipst); |
| 702 | static mblk_t *ip_snmp_get_mib2_virt_multi(queue_t *, mblk_t *, |
| 703 | ip_stack_t *ipst); |
| 704 | static mblk_t *ip_snmp_get_mib2_multi_rtable(queue_t *, mblk_t *, |
| 705 | ip_stack_t *ipst); |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 706 | static mblk_t *ip_snmp_get_mib2_ip_route_media(queue_t *, mblk_t *, int, |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 707 | ip_stack_t *ipst); |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 708 | static mblk_t *ip_snmp_get_mib2_ip6_route_media(queue_t *, mblk_t *, int, |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 709 | ip_stack_t *ipst); |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 710 | static void ip_snmp_get2_v4(ire_t *, iproutedata_t *); |
| 711 | static void ip_snmp_get2_v6_route(ire_t *, iproutedata_t *); |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 712 | static void ip_snmp_get2_v4_media(ncec_t *, void *); |
| 713 | static void ip_snmp_get2_v6_media(ncec_t *, void *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 714 | int ip_snmp_set(queue_t *, int, int, uchar_t *, int); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 715 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 716 | static mblk_t *ip_fragment_copyhdr(uchar_t *, int, int, ip_stack_t *, |
Erik Nordmark | de8c4a1 | 2009-02-12 08:42:06 -0800 | [diff] [blame] | 717 | mblk_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 718 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 719 | static void conn_drain_init(ip_stack_t *); |
| 720 | static void conn_drain_fini(ip_stack_t *); |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 721 | static void conn_drain(conn_t *connp, boolean_t closing); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 722 | |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 723 | static void conn_walk_drain(ip_stack_t *, idl_tx_list_t *); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 724 | static void conn_walk_sctp(pfv_t, void *, zoneid_t, netstack_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 725 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 726 | static void *ip_stack_init(netstackid_t stackid, netstack_t *ns); |
| 727 | static void ip_stack_shutdown(netstackid_t stackid, void *arg); |
| 728 | static void ip_stack_fini(netstackid_t stackid, void *arg); |
| 729 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 730 | static int ip_multirt_apply_membership(int (*fn)(conn_t *, boolean_t, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 731 | const in6_addr_t *, ipaddr_t, uint_t, mcast_record_t, const in6_addr_t *), |
| 732 | ire_t *, conn_t *, boolean_t, const in6_addr_t *, mcast_record_t, |
| 733 | const in6_addr_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 734 | |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 735 | static int ip_squeue_switch(int); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 736 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 737 | static void *ip_kstat_init(netstackid_t, ip_stack_t *); |
| 738 | static void ip_kstat_fini(netstackid_t, kstat_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 739 | static int ip_kstat_update(kstat_t *kp, int rw); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 740 | static void *icmp_kstat_init(netstackid_t); |
| 741 | static void icmp_kstat_fini(netstackid_t, kstat_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 742 | static int icmp_kstat_update(kstat_t *kp, int rw); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 743 | static void *ip_kstat2_init(netstackid_t, ip_stat_t *); |
| 744 | static void ip_kstat2_fini(netstackid_t, kstat_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 745 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 746 | static void ipobs_init(ip_stack_t *); |
| 747 | static void ipobs_fini(ip_stack_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 748 | |
Kacheong Poon | 5dd46ab | 2010-07-19 17:27:45 -0700 | [diff] [blame] | 749 | static int ip_tp_cpu_update(cpu_setup_t, int, void *); |
| 750 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 751 | ipaddr_t ip_g_all_ones = IP_HOST_MASK; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 752 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 753 | static long ip_rput_pullups; |
| 754 | int dohwcksum = 1; /* use h/w cksum if supported by the hardware */ |
| 755 | |
gt145670 | aa92d85 | 2008-01-11 12:54:58 -0800 | [diff] [blame] | 756 | vmem_t *ip_minor_arena_sa; /* for minor nos. from INET_MIN_DEV+2 thru 2^^18-1 */ |
| 757 | vmem_t *ip_minor_arena_la; /* for minor nos. from 2^^18 thru 2^^32-1 */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 758 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 759 | int ip_debug; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 760 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 761 | /* |
| 762 | * Multirouting/CGTP stuff |
| 763 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 764 | int ip_cgtp_filter_rev = CGTP_FILTER_REV; /* CGTP hooks version */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 765 | |
| 766 | /* |
Girish Moodalbail | 6e91bba | 2010-03-26 17:53:11 -0400 | [diff] [blame] | 767 | * IP tunables related declarations. Definitions are in ip_tunables.c |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 768 | */ |
Girish Moodalbail | 6e91bba | 2010-03-26 17:53:11 -0400 | [diff] [blame] | 769 | extern mod_prop_info_t ip_propinfo_tbl[]; |
| 770 | extern int ip_propinfo_count; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 771 | |
| 772 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 773 | * Table of IP ioctls encoding the various properties of the ioctl and |
| 774 | * indexed based on the last byte of the ioctl command. Occasionally there |
| 775 | * is a clash, and there is more than 1 ioctl with the same last byte. |
| 776 | * In such a case 1 ioctl is encoded in the ndx table and the remaining |
| 777 | * ioctls are encoded in the misc table. An entry in the ndx table is |
| 778 | * retrieved by indexing on the last byte of the ioctl command and comparing |
| 779 | * the ioctl command with the value in the ndx table. In the event of a |
| 780 | * mismatch the misc table is then searched sequentially for the desired |
| 781 | * ioctl command. |
| 782 | * |
| 783 | * Entry: <command> <copyin_size> <flags> <cmd_type> <function> <restart_func> |
| 784 | */ |
| 785 | ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { |
| 786 | /* 000 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 787 | /* 001 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 788 | /* 002 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 789 | /* 003 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 790 | /* 004 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 791 | /* 005 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 792 | /* 006 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 793 | /* 007 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 794 | /* 008 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 795 | /* 009 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 796 | |
| 797 | /* 010 */ { SIOCADDRT, sizeof (struct rtentry), IPI_PRIV, |
| 798 | MISC_CMD, ip_siocaddrt, NULL }, |
| 799 | /* 011 */ { SIOCDELRT, sizeof (struct rtentry), IPI_PRIV, |
| 800 | MISC_CMD, ip_siocdelrt, NULL }, |
| 801 | |
| 802 | /* 012 */ { SIOCSIFADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| 803 | IF_CMD, ip_sioctl_addr, ip_sioctl_addr_restart }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 804 | /* 013 */ { SIOCGIFADDR, sizeof (struct ifreq), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 805 | IF_CMD, ip_sioctl_get_addr, NULL }, |
| 806 | |
| 807 | /* 014 */ { SIOCSIFDSTADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| 808 | IF_CMD, ip_sioctl_dstaddr, ip_sioctl_dstaddr_restart }, |
| 809 | /* 015 */ { SIOCGIFDSTADDR, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 810 | IPI_GET_CMD, IF_CMD, ip_sioctl_get_dstaddr, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 811 | |
| 812 | /* 016 */ { SIOCSIFFLAGS, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 813 | IPI_PRIV | IPI_WR, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 814 | IF_CMD, ip_sioctl_flags, ip_sioctl_flags_restart }, |
| 815 | /* 017 */ { SIOCGIFFLAGS, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 816 | IPI_MODOK | IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 817 | IF_CMD, ip_sioctl_get_flags, NULL }, |
| 818 | |
| 819 | /* 018 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 820 | /* 019 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 821 | |
| 822 | /* copyin size cannot be coded for SIOCGIFCONF */ |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 823 | /* 020 */ { O_SIOCGIFCONF, 0, IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 824 | MISC_CMD, ip_sioctl_get_ifconf, NULL }, |
| 825 | |
| 826 | /* 021 */ { SIOCSIFMTU, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| 827 | IF_CMD, ip_sioctl_mtu, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 828 | /* 022 */ { SIOCGIFMTU, sizeof (struct ifreq), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 829 | IF_CMD, ip_sioctl_get_mtu, NULL }, |
| 830 | /* 023 */ { SIOCGIFBRDADDR, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 831 | IPI_GET_CMD, IF_CMD, ip_sioctl_get_brdaddr, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 832 | /* 024 */ { SIOCSIFBRDADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| 833 | IF_CMD, ip_sioctl_brdaddr, NULL }, |
| 834 | /* 025 */ { SIOCGIFNETMASK, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 835 | IPI_GET_CMD, IF_CMD, ip_sioctl_get_netmask, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 836 | /* 026 */ { SIOCSIFNETMASK, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| 837 | IF_CMD, ip_sioctl_netmask, ip_sioctl_netmask_restart }, |
| 838 | /* 027 */ { SIOCGIFMETRIC, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 839 | IPI_GET_CMD, IF_CMD, ip_sioctl_get_metric, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 840 | /* 028 */ { SIOCSIFMETRIC, sizeof (struct ifreq), IPI_PRIV, |
| 841 | IF_CMD, ip_sioctl_metric, NULL }, |
| 842 | /* 029 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 843 | |
| 844 | /* See 166-168 below for extended SIOC*XARP ioctls */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 845 | /* 030 */ { SIOCSARP, sizeof (struct arpreq), IPI_PRIV | IPI_WR, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 846 | ARP_CMD, ip_sioctl_arp, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 847 | /* 031 */ { SIOCGARP, sizeof (struct arpreq), IPI_GET_CMD, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 848 | ARP_CMD, ip_sioctl_arp, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 849 | /* 032 */ { SIOCDARP, sizeof (struct arpreq), IPI_PRIV | IPI_WR, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 850 | ARP_CMD, ip_sioctl_arp, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 851 | |
| 852 | /* 033 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 853 | /* 034 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 854 | /* 035 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 855 | /* 036 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 856 | /* 037 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 857 | /* 038 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 858 | /* 039 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 859 | /* 040 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 860 | /* 041 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 861 | /* 042 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 862 | /* 043 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 863 | /* 044 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 864 | /* 045 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 865 | /* 046 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 866 | /* 047 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 867 | /* 048 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 868 | /* 049 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 869 | /* 050 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 870 | /* 051 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 871 | /* 052 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 872 | /* 053 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 873 | |
| 874 | /* 054 */ { IF_UNITSEL, sizeof (int), IPI_PRIV | IPI_WR | IPI_MODOK, |
| 875 | MISC_CMD, if_unitsel, if_unitsel_restart }, |
| 876 | |
| 877 | /* 055 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 878 | /* 056 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 879 | /* 057 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 880 | /* 058 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 881 | /* 059 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 882 | /* 060 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 883 | /* 061 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 884 | /* 062 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 885 | /* 063 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 886 | /* 064 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 887 | /* 065 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 888 | /* 066 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 889 | /* 067 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 890 | /* 068 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 891 | /* 069 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 892 | /* 070 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 893 | /* 071 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 894 | /* 072 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 895 | |
| 896 | /* 073 */ { SIOCSIFNAME, sizeof (struct ifreq), |
| 897 | IPI_PRIV | IPI_WR | IPI_MODOK, |
| 898 | IF_CMD, ip_sioctl_sifname, NULL }, |
| 899 | |
| 900 | /* 074 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 901 | /* 075 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 902 | /* 076 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 903 | /* 077 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 904 | /* 078 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 905 | /* 079 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 906 | /* 080 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 907 | /* 081 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 908 | /* 082 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 909 | /* 083 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 910 | /* 084 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 911 | /* 085 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 912 | /* 086 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 913 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 914 | /* 087 */ { SIOCGIFNUM, sizeof (int), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 915 | MISC_CMD, ip_sioctl_get_ifnum, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 916 | /* 088 */ { SIOCGIFMUXID, sizeof (struct ifreq), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 917 | IF_CMD, ip_sioctl_get_muxid, NULL }, |
| 918 | /* 089 */ { SIOCSIFMUXID, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 919 | IPI_PRIV | IPI_WR, IF_CMD, ip_sioctl_muxid, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 920 | |
| 921 | /* Both if and lif variants share same func */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 922 | /* 090 */ { SIOCGIFINDEX, sizeof (struct ifreq), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 923 | IF_CMD, ip_sioctl_get_lifindex, NULL }, |
| 924 | /* Both if and lif variants share same func */ |
| 925 | /* 091 */ { SIOCSIFINDEX, sizeof (struct ifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 926 | IPI_PRIV | IPI_WR, IF_CMD, ip_sioctl_slifindex, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 927 | |
| 928 | /* copyin size cannot be coded for SIOCGIFCONF */ |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 929 | /* 092 */ { SIOCGIFCONF, 0, IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 930 | MISC_CMD, ip_sioctl_get_ifconf, NULL }, |
| 931 | /* 093 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 932 | /* 094 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 933 | /* 095 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 934 | /* 096 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 935 | /* 097 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 936 | /* 098 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 937 | /* 099 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 938 | /* 100 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 939 | /* 101 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 940 | /* 102 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 941 | /* 103 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 942 | /* 104 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 943 | /* 105 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 944 | /* 106 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 945 | /* 107 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 946 | /* 108 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 947 | /* 109 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 948 | |
| 949 | /* 110 */ { SIOCLIFREMOVEIF, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 950 | IPI_PRIV | IPI_WR, LIF_CMD, ip_sioctl_removeif, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 951 | ip_sioctl_removeif_restart }, |
| 952 | /* 111 */ { SIOCLIFADDIF, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 953 | IPI_GET_CMD | IPI_PRIV | IPI_WR, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 954 | LIF_CMD, ip_sioctl_addif, NULL }, |
| 955 | #define SIOCLIFADDR_NDX 112 |
| 956 | /* 112 */ { SIOCSLIFADDR, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 957 | LIF_CMD, ip_sioctl_addr, ip_sioctl_addr_restart }, |
| 958 | /* 113 */ { SIOCGLIFADDR, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 959 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_addr, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 960 | /* 114 */ { SIOCSLIFDSTADDR, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 961 | LIF_CMD, ip_sioctl_dstaddr, ip_sioctl_dstaddr_restart }, |
| 962 | /* 115 */ { SIOCGLIFDSTADDR, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 963 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_dstaddr, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 964 | /* 116 */ { SIOCSLIFFLAGS, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 965 | IPI_PRIV | IPI_WR, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 966 | LIF_CMD, ip_sioctl_flags, ip_sioctl_flags_restart }, |
| 967 | /* 117 */ { SIOCGLIFFLAGS, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 968 | IPI_GET_CMD | IPI_MODOK, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 969 | LIF_CMD, ip_sioctl_get_flags, NULL }, |
| 970 | |
| 971 | /* 118 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 972 | /* 119 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 973 | |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 974 | /* 120 */ { O_SIOCGLIFCONF, 0, IPI_GET_CMD, MISC_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 975 | ip_sioctl_get_lifconf, NULL }, |
| 976 | /* 121 */ { SIOCSLIFMTU, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 977 | LIF_CMD, ip_sioctl_mtu, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 978 | /* 122 */ { SIOCGLIFMTU, sizeof (struct lifreq), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 979 | LIF_CMD, ip_sioctl_get_mtu, NULL }, |
| 980 | /* 123 */ { SIOCGLIFBRDADDR, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 981 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_brdaddr, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 982 | /* 124 */ { SIOCSLIFBRDADDR, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 983 | LIF_CMD, ip_sioctl_brdaddr, NULL }, |
| 984 | /* 125 */ { SIOCGLIFNETMASK, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 985 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_netmask, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 986 | /* 126 */ { SIOCSLIFNETMASK, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 987 | LIF_CMD, ip_sioctl_netmask, ip_sioctl_netmask_restart }, |
| 988 | /* 127 */ { SIOCGLIFMETRIC, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 989 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_metric, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 990 | /* 128 */ { SIOCSLIFMETRIC, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 991 | LIF_CMD, ip_sioctl_metric, NULL }, |
| 992 | /* 129 */ { SIOCSLIFNAME, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 993 | IPI_PRIV | IPI_WR | IPI_MODOK, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 994 | LIF_CMD, ip_sioctl_slifname, |
| 995 | ip_sioctl_slifname_restart }, |
| 996 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 997 | /* 130 */ { SIOCGLIFNUM, sizeof (struct lifnum), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 998 | MISC_CMD, ip_sioctl_get_lifnum, NULL }, |
| 999 | /* 131 */ { SIOCGLIFMUXID, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1000 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_muxid, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1001 | /* 132 */ { SIOCSLIFMUXID, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1002 | IPI_PRIV | IPI_WR, LIF_CMD, ip_sioctl_muxid, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1003 | /* 133 */ { SIOCGLIFINDEX, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1004 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_lifindex, 0 }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1005 | /* 134 */ { SIOCSLIFINDEX, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1006 | IPI_PRIV | IPI_WR, LIF_CMD, ip_sioctl_slifindex, 0 }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1007 | /* 135 */ { SIOCSLIFTOKEN, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 1008 | LIF_CMD, ip_sioctl_token, NULL }, |
| 1009 | /* 136 */ { SIOCGLIFTOKEN, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1010 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_token, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1011 | /* 137 */ { SIOCSLIFSUBNET, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 1012 | LIF_CMD, ip_sioctl_subnet, ip_sioctl_subnet_restart }, |
| 1013 | /* 138 */ { SIOCGLIFSUBNET, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1014 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_subnet, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1015 | /* 139 */ { SIOCSLIFLNKINFO, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| 1016 | LIF_CMD, ip_sioctl_lnkinfo, NULL }, |
| 1017 | |
| 1018 | /* 140 */ { SIOCGLIFLNKINFO, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1019 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_lnkinfo, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1020 | /* 141 */ { SIOCLIFDELND, sizeof (struct lifreq), IPI_PRIV, |
| 1021 | LIF_CMD, ip_siocdelndp_v6, NULL }, |
| 1022 | /* 142 */ { SIOCLIFGETND, sizeof (struct lifreq), IPI_GET_CMD, |
| 1023 | LIF_CMD, ip_siocqueryndp_v6, NULL }, |
| 1024 | /* 143 */ { SIOCLIFSETND, sizeof (struct lifreq), IPI_PRIV, |
| 1025 | LIF_CMD, ip_siocsetndp_v6, NULL }, |
| 1026 | /* 144 */ { SIOCTMYADDR, sizeof (struct sioc_addrreq), IPI_GET_CMD, |
| 1027 | MISC_CMD, ip_sioctl_tmyaddr, NULL }, |
| 1028 | /* 145 */ { SIOCTONLINK, sizeof (struct sioc_addrreq), IPI_GET_CMD, |
| 1029 | MISC_CMD, ip_sioctl_tonlink, NULL }, |
| 1030 | /* 146 */ { SIOCTMYSITE, sizeof (struct sioc_addrreq), 0, |
| 1031 | MISC_CMD, ip_sioctl_tmysite, NULL }, |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1032 | /* 147 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1033 | /* 148 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
Dan McDonald | 843ea70 | 2014-01-19 11:47:59 -0500 | [diff] [blame] | 1034 | |
| 1035 | /* Old *IPSECONFIG ioctls are now deprecated, now see spdsock.c */ |
| 1036 | /* 149 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1037 | /* 150 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1038 | /* 151 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1039 | /* 152 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1040 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1041 | /* 153 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1042 | |
meem | c445e3e | 2009-02-05 14:37:01 -0500 | [diff] [blame] | 1043 | /* 154 */ { SIOCGLIFBINDING, sizeof (struct lifreq), IPI_GET_CMD, |
| 1044 | LIF_CMD, ip_sioctl_get_binding, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1045 | /* 155 */ { SIOCSLIFGROUPNAME, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1046 | IPI_PRIV | IPI_WR, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1047 | LIF_CMD, ip_sioctl_groupname, ip_sioctl_groupname }, |
| 1048 | /* 156 */ { SIOCGLIFGROUPNAME, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1049 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_groupname, NULL }, |
| 1050 | /* 157 */ { SIOCGLIFGROUPINFO, sizeof (lifgroupinfo_t), |
| 1051 | IPI_GET_CMD, MISC_CMD, ip_sioctl_groupinfo, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1052 | |
| 1053 | /* Leave 158-160 unused; used to be SIOC*IFARP ioctls */ |
| 1054 | /* 158 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1055 | /* 159 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1056 | /* 160 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1057 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1058 | /* 161 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1059 | |
| 1060 | /* These are handled in ip_sioctl_copyin_setup itself */ |
| 1061 | /* 162 */ { SIOCGIP6ADDRPOLICY, 0, IPI_NULL_BCONT, |
| 1062 | MISC_CMD, NULL, NULL }, |
| 1063 | /* 163 */ { SIOCSIP6ADDRPOLICY, 0, IPI_PRIV | IPI_NULL_BCONT, |
| 1064 | MISC_CMD, NULL, NULL }, |
| 1065 | /* 164 */ { SIOCGDSTINFO, 0, IPI_GET_CMD, MISC_CMD, NULL, NULL }, |
| 1066 | |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1067 | /* 165 */ { SIOCGLIFCONF, 0, IPI_GET_CMD, MISC_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1068 | ip_sioctl_get_lifconf, NULL }, |
| 1069 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1070 | /* 166 */ { SIOCSXARP, sizeof (struct xarpreq), IPI_PRIV | IPI_WR, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1071 | XARP_CMD, ip_sioctl_arp, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1072 | /* 167 */ { SIOCGXARP, sizeof (struct xarpreq), IPI_GET_CMD, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1073 | XARP_CMD, ip_sioctl_arp, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1074 | /* 168 */ { SIOCDXARP, sizeof (struct xarpreq), IPI_PRIV | IPI_WR, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1075 | XARP_CMD, ip_sioctl_arp, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1076 | |
| 1077 | /* SIOCPOPSOCKFS is not handled by IP */ |
| 1078 | /* 169 */ { IPI_DONTCARE /* SIOCPOPSOCKFS */, 0, 0, 0, NULL, NULL }, |
| 1079 | |
| 1080 | /* 170 */ { SIOCGLIFZONE, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1081 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_lifzone, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1082 | /* 171 */ { SIOCSLIFZONE, sizeof (struct lifreq), |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1083 | IPI_PRIV | IPI_WR, LIF_CMD, ip_sioctl_slifzone, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1084 | ip_sioctl_slifzone_restart }, |
| 1085 | /* 172-174 are SCTP ioctls and not handled by IP */ |
| 1086 | /* 172 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1087 | /* 173 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1088 | /* 174 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| 1089 | /* 175 */ { SIOCGLIFUSESRC, sizeof (struct lifreq), |
| 1090 | IPI_GET_CMD, LIF_CMD, |
| 1091 | ip_sioctl_get_lifusesrc, 0 }, |
| 1092 | /* 176 */ { SIOCSLIFUSESRC, sizeof (struct lifreq), |
| 1093 | IPI_PRIV | IPI_WR, |
| 1094 | LIF_CMD, ip_sioctl_slifusesrc, |
| 1095 | NULL }, |
| 1096 | /* 177 */ { SIOCGLIFSRCOF, 0, IPI_GET_CMD, MISC_CMD, |
| 1097 | ip_sioctl_get_lifsrcof, NULL }, |
| 1098 | /* 178 */ { SIOCGMSFILTER, sizeof (struct group_filter), IPI_GET_CMD, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1099 | MSFILT_CMD, ip_sioctl_msfilter, NULL }, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1100 | /* 179 */ { SIOCSMSFILTER, sizeof (struct group_filter), 0, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1101 | MSFILT_CMD, ip_sioctl_msfilter, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1102 | /* 180 */ { SIOCGIPMSFILTER, sizeof (struct ip_msfilter), IPI_GET_CMD, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1103 | MSFILT_CMD, ip_sioctl_msfilter, NULL }, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1104 | /* 181 */ { SIOCSIPMSFILTER, sizeof (struct ip_msfilter), 0, |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 1105 | MSFILT_CMD, ip_sioctl_msfilter, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1106 | /* 182 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
meem | e704a8f | 2007-10-30 11:15:43 -0700 | [diff] [blame] | 1107 | /* SIOCSENABLESDP is handled by SDP */ |
| 1108 | /* 183 */ { IPI_DONTCARE /* SIOCSENABLESDP */, 0, 0, 0, NULL, NULL }, |
Yu Xiangning | 0f1702c | 2008-12-11 20:04:13 -0800 | [diff] [blame] | 1109 | /* 184 */ { IPI_DONTCARE /* SIOCSQPTR */, 0, 0, 0, NULL, NULL }, |
Darren Reed | a691161 | 2010-08-12 16:05:23 -0700 | [diff] [blame] | 1110 | /* 185 */ { SIOCGIFHWADDR, sizeof (struct ifreq), IPI_GET_CMD, |
| 1111 | IF_CMD, ip_sioctl_get_ifhwaddr, NULL }, |
Sangeeta Misra | dbed73c | 2009-11-03 23:15:19 -0800 | [diff] [blame] | 1112 | /* 186 */ { IPI_DONTCARE /* SIOCGSTAMP */, 0, 0, 0, NULL, NULL }, |
| 1113 | /* 187 */ { SIOCILB, 0, IPI_PRIV | IPI_GET_CMD, MISC_CMD, |
| 1114 | ip_sioctl_ilb_cmd, NULL }, |
Girish Moodalbail | 6e91bba | 2010-03-26 17:53:11 -0400 | [diff] [blame] | 1115 | /* 188 */ { SIOCGETPROP, 0, IPI_GET_CMD, 0, NULL, NULL }, |
| 1116 | /* 189 */ { SIOCSETPROP, 0, IPI_PRIV | IPI_WR, 0, NULL, NULL}, |
| 1117 | /* 190 */ { SIOCGLIFDADSTATE, sizeof (struct lifreq), |
| 1118 | IPI_GET_CMD, LIF_CMD, ip_sioctl_get_dadstate, NULL }, |
| 1119 | /* 191 */ { SIOCSLIFPREFIX, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
Darren Reed | a691161 | 2010-08-12 16:05:23 -0700 | [diff] [blame] | 1120 | LIF_CMD, ip_sioctl_prefix, ip_sioctl_prefix_restart }, |
| 1121 | /* 192 */ { SIOCGLIFHWADDR, sizeof (struct lifreq), IPI_GET_CMD, |
| 1122 | LIF_CMD, ip_sioctl_get_lifhwaddr, NULL } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1123 | }; |
| 1124 | |
| 1125 | int ip_ndx_ioctl_count = sizeof (ip_ndx_ioctl_table) / sizeof (ip_ioctl_cmd_t); |
| 1126 | |
| 1127 | ip_ioctl_cmd_t ip_misc_ioctl_table[] = { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1128 | { I_LINK, 0, IPI_PRIV | IPI_WR, 0, NULL, NULL }, |
| 1129 | { I_UNLINK, 0, IPI_PRIV | IPI_WR, 0, NULL, NULL }, |
| 1130 | { I_PLINK, 0, IPI_PRIV | IPI_WR, 0, NULL, NULL }, |
| 1131 | { I_PUNLINK, 0, IPI_PRIV | IPI_WR, 0, NULL, NULL }, |
| 1132 | { ND_GET, 0, 0, 0, NULL, NULL }, |
| 1133 | { ND_SET, 0, IPI_PRIV | IPI_WR, 0, NULL, NULL }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1134 | { IP_IOCTL, 0, 0, 0, NULL, NULL }, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1135 | { SIOCGETVIFCNT, sizeof (struct sioc_vif_req), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1136 | MISC_CMD, mrt_ioctl}, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1137 | { SIOCGETSGCNT, sizeof (struct sioc_sg_req), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1138 | MISC_CMD, mrt_ioctl}, |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1139 | { SIOCGETLSGCNT, sizeof (struct sioc_lsg_req), IPI_GET_CMD, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1140 | MISC_CMD, mrt_ioctl} |
| 1141 | }; |
| 1142 | |
| 1143 | int ip_misc_ioctl_count = |
| 1144 | sizeof (ip_misc_ioctl_table) / sizeof (ip_ioctl_cmd_t); |
| 1145 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1146 | int conn_drain_nthreads; /* Number of drainers reqd. */ |
| 1147 | /* Settable in /etc/system */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1148 | /* Defined in ip_ire.c */ |
| 1149 | extern uint32_t ip_ire_max_bucket_cnt, ip6_ire_max_bucket_cnt; |
| 1150 | extern uint32_t ip_ire_min_bucket_cnt, ip6_ire_min_bucket_cnt; |
| 1151 | extern uint32_t ip_ire_mem_ratio, ip_ire_cpu_ratio; |
| 1152 | |
| 1153 | static nv_t ire_nv_arr[] = { |
| 1154 | { IRE_BROADCAST, "BROADCAST" }, |
| 1155 | { IRE_LOCAL, "LOCAL" }, |
| 1156 | { IRE_LOOPBACK, "LOOPBACK" }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1157 | { IRE_DEFAULT, "DEFAULT" }, |
| 1158 | { IRE_PREFIX, "PREFIX" }, |
| 1159 | { IRE_IF_NORESOLVER, "IF_NORESOL" }, |
| 1160 | { IRE_IF_RESOLVER, "IF_RESOLV" }, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1161 | { IRE_IF_CLONE, "IF_CLONE" }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1162 | { IRE_HOST, "HOST" }, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1163 | { IRE_MULTICAST, "MULTICAST" }, |
| 1164 | { IRE_NOROUTE, "NOROUTE" }, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1165 | { 0 } |
| 1166 | }; |
| 1167 | |
| 1168 | nv_t *ire_nv_tbl = ire_nv_arr; |
| 1169 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1170 | /* Simple ICMP IP Header Template */ |
| 1171 | static ipha_t icmp_ipha = { |
| 1172 | IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP |
| 1173 | }; |
| 1174 | |
| 1175 | struct module_info ip_mod_info = { |
Yu Xiangning | 0f1702c | 2008-12-11 20:04:13 -0800 | [diff] [blame] | 1176 | IP_MOD_ID, IP_MOD_NAME, IP_MOD_MINPSZ, IP_MOD_MAXPSZ, IP_MOD_HIWAT, |
| 1177 | IP_MOD_LOWAT |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1178 | }; |
| 1179 | |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 1180 | /* |
| 1181 | * Duplicate static symbols within a module confuses mdb; so we avoid the |
| 1182 | * problem by making the symbols here distinct from those in udp.c. |
| 1183 | */ |
| 1184 | |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 1185 | /* |
| 1186 | * Entry points for IP as a device and as a module. |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 1187 | * We have separate open functions for the /dev/ip and /dev/ip6 devices. |
| 1188 | */ |
| 1189 | static struct qinit iprinitv4 = { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1190 | ip_rput, NULL, ip_openv4, ip_close, NULL, &ip_mod_info |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1191 | }; |
| 1192 | |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 1193 | struct qinit iprinitv6 = { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1194 | ip_rput_v6, NULL, ip_openv6, ip_close, NULL, &ip_mod_info |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 1195 | }; |
| 1196 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1197 | static struct qinit ipwinit = { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1198 | ip_wput_nondata, ip_wsrv, NULL, NULL, NULL, &ip_mod_info |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1199 | }; |
| 1200 | |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 1201 | static struct qinit iplrinit = { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1202 | ip_lrput, NULL, ip_openv4, ip_close, NULL, &ip_mod_info |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1203 | }; |
| 1204 | |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 1205 | static struct qinit iplwinit = { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1206 | ip_lwput, NULL, NULL, NULL, NULL, &ip_mod_info |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1207 | }; |
| 1208 | |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 1209 | /* For AF_INET aka /dev/ip */ |
| 1210 | struct streamtab ipinfov4 = { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1211 | &iprinitv4, &ipwinit, &iplrinit, &iplwinit |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 1212 | }; |
| 1213 | |
| 1214 | /* For AF_INET6 aka /dev/ip6 */ |
| 1215 | struct streamtab ipinfov6 = { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1216 | &iprinitv6, &ipwinit, &iplrinit, &iplwinit |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1217 | }; |
| 1218 | |
| 1219 | #ifdef DEBUG |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1220 | boolean_t skip_sctp_cksum = B_FALSE; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1221 | #endif |
nordmark | 5597b60 | 2006-09-14 18:05:27 -0700 | [diff] [blame] | 1222 | |
| 1223 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1224 | * Generate an ICMP fragmentation needed message. |
| 1225 | * When called from ip_output side a minimal ip_recv_attr_t needs to be |
| 1226 | * constructed by the caller. |
nordmark | 5597b60 | 2006-09-14 18:05:27 -0700 | [diff] [blame] | 1227 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1228 | void |
| 1229 | icmp_frag_needed(mblk_t *mp, int mtu, ip_recv_attr_t *ira) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1230 | { |
| 1231 | icmph_t icmph; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1232 | ip_stack_t *ipst = ira->ira_ill->ill_ipst; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1233 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1234 | mp = icmp_pkt_err_ok(mp, ira); |
| 1235 | if (mp == NULL) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1236 | return; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1237 | |
| 1238 | bzero(&icmph, sizeof (icmph_t)); |
| 1239 | icmph.icmph_type = ICMP_DEST_UNREACHABLE; |
| 1240 | icmph.icmph_code = ICMP_FRAGMENTATION_NEEDED; |
| 1241 | icmph.icmph_du_mtu = htons((uint16_t)mtu); |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1242 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutFragNeeded); |
| 1243 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDestUnreachs); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1244 | |
| 1245 | icmp_pkt(mp, &icmph, sizeof (icmph_t), ira); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1246 | } |
| 1247 | |
| 1248 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1249 | * icmp_inbound_v4 deals with ICMP messages that are handled by IP. |
| 1250 | * If the ICMP message is consumed by IP, i.e., it should not be delivered |
| 1251 | * to any IPPROTO_ICMP raw sockets, then it returns NULL. |
| 1252 | * Likewise, if the ICMP error is misformed (too short, etc), then it |
| 1253 | * returns NULL. The caller uses this to determine whether or not to send |
| 1254 | * to raw sockets. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1255 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1256 | * All error messages are passed to the matching transport stream. |
| 1257 | * |
| 1258 | * The following cases are handled by icmp_inbound: |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1259 | * 1) It needs to send a reply back and possibly delivering it |
| 1260 | * to the "interested" upper clients. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1261 | * 2) Return the mblk so that the caller can pass it to the RAW socket clients. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1262 | * 3) It needs to change some values in IP only. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1263 | * 4) It needs to change some values in IP and upper layers e.g TCP |
| 1264 | * by delivering an error to the upper layers. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1265 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1266 | * We handle the above three cases in the context of IPsec in the |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1267 | * following way : |
| 1268 | * |
| 1269 | * 1) Send the reply back in the same way as the request came in. |
| 1270 | * If it came in encrypted, it goes out encrypted. If it came in |
| 1271 | * clear, it goes out in clear. Thus, this will prevent chosen |
| 1272 | * plain text attack. |
| 1273 | * 2) The client may or may not expect things to come in secure. |
| 1274 | * If it comes in secure, the policy constraints are checked |
| 1275 | * before delivering it to the upper layers. If it comes in |
| 1276 | * clear, ipsec_inbound_accept_clear will decide whether to |
| 1277 | * accept this in clear or not. In both the cases, if the returned |
| 1278 | * message (IP header + 8 bytes) that caused the icmp message has |
| 1279 | * AH/ESP headers, it is sent up to AH/ESP for validation before |
| 1280 | * sending up. If there are only 8 bytes of returned message, then |
| 1281 | * upper client will not be notified. |
| 1282 | * 3) Check with global policy to see whether it matches the constaints. |
| 1283 | * But this will be done only if icmp_accept_messages_in_clear is |
| 1284 | * zero. |
| 1285 | * 4) If we need to change both in IP and ULP, then the decision taken |
| 1286 | * while affecting the values in IP and while delivering up to TCP |
| 1287 | * should be the same. |
| 1288 | * |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1289 | * There are two cases. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1290 | * |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1291 | * a) If we reject data at the IP layer (ipsec_check_global_policy() |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1292 | * failed), we will not deliver it to the ULP, even though they |
| 1293 | * are *willing* to accept in *clear*. This is fine as our global |
| 1294 | * disposition to icmp messages asks us reject the datagram. |
| 1295 | * |
| 1296 | * b) If we accept data at the IP layer (ipsec_check_global_policy() |
| 1297 | * succeeded or icmp_accept_messages_in_clear is 1), and not able |
| 1298 | * to deliver it to ULP (policy failed), it can lead to |
| 1299 | * consistency problems. The cases known at this time are |
| 1300 | * ICMP_DESTINATION_UNREACHABLE messages with following code |
| 1301 | * values : |
| 1302 | * |
| 1303 | * - ICMP_FRAGMENTATION_NEEDED : IP adapts to the new value |
| 1304 | * and Upper layer rejects. Then the communication will |
| 1305 | * come to a stop. This is solved by making similar decisions |
| 1306 | * at both levels. Currently, when we are unable to deliver |
| 1307 | * to the Upper Layer (due to policy failures) while IP has |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1308 | * adjusted dce_pmtu, the next outbound datagram would |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1309 | * generate a local ICMP_FRAGMENTATION_NEEDED message - which |
| 1310 | * will be with the right level of protection. Thus the right |
| 1311 | * value will be communicated even if we are not able to |
| 1312 | * communicate when we get from the wire initially. But this |
| 1313 | * assumes there would be at least one outbound datagram after |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1314 | * IP has adjusted its dce_pmtu value. To make things |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1315 | * simpler, we accept in clear after the validation of |
| 1316 | * AH/ESP headers. |
| 1317 | * |
| 1318 | * - Other ICMP ERRORS : We may not be able to deliver it to the |
| 1319 | * upper layer depending on the level of protection the upper |
| 1320 | * layer expects and the disposition in ipsec_inbound_accept_clear(). |
| 1321 | * ipsec_inbound_accept_clear() decides whether a given ICMP error |
| 1322 | * should be accepted in clear when the Upper layer expects secure. |
| 1323 | * Thus the communication may get aborted by some bad ICMP |
| 1324 | * packets. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1325 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1326 | mblk_t * |
| 1327 | icmp_inbound_v4(mblk_t *mp, ip_recv_attr_t *ira) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1328 | { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1329 | icmph_t *icmph; |
| 1330 | ipha_t *ipha; /* Outer header */ |
| 1331 | int ip_hdr_length; /* Outer header length */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1332 | boolean_t interested; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1333 | ipif_t *ipif; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1334 | uint32_t ts; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1335 | uint32_t *tsp; |
| 1336 | timestruc_t now; |
| 1337 | ill_t *ill = ira->ira_ill; |
| 1338 | ip_stack_t *ipst = ill->ill_ipst; |
| 1339 | zoneid_t zoneid = ira->ira_zoneid; |
| 1340 | int len_needed; |
| 1341 | mblk_t *mp_ret = NULL; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1342 | |
| 1343 | ipha = (ipha_t *)mp->b_rptr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1344 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1345 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInMsgs); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1346 | |
| 1347 | ip_hdr_length = ira->ira_ip_hdr_length; |
| 1348 | if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMPH_SIZE)) { |
| 1349 | if (ira->ira_pktlen < (ip_hdr_length + ICMPH_SIZE)) { |
| 1350 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); |
| 1351 | ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); |
| 1352 | freemsg(mp); |
| 1353 | return (NULL); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1354 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1355 | /* Last chance to get real. */ |
| 1356 | ipha = ip_pullup(mp, ip_hdr_length + ICMPH_SIZE, ira); |
| 1357 | if (ipha == NULL) { |
| 1358 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); |
| 1359 | freemsg(mp); |
| 1360 | return (NULL); |
| 1361 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1362 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1363 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1364 | /* The IP header will always be a multiple of four bytes */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1365 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
| 1366 | ip2dbg(("icmp_inbound_v4: type %d code %d\n", icmph->icmph_type, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1367 | icmph->icmph_code)); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1368 | |
| 1369 | /* |
| 1370 | * We will set "interested" to "true" if we should pass a copy to |
| 1371 | * the transport or if we handle the packet locally. |
| 1372 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1373 | interested = B_FALSE; |
| 1374 | switch (icmph->icmph_type) { |
| 1375 | case ICMP_ECHO_REPLY: |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1376 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInEchoReps); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1377 | break; |
| 1378 | case ICMP_DEST_UNREACHABLE: |
| 1379 | if (icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1380 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInFragNeeded); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1381 | interested = B_TRUE; /* Pass up to transport */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1382 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInDestUnreachs); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1383 | break; |
| 1384 | case ICMP_SOURCE_QUENCH: |
| 1385 | interested = B_TRUE; /* Pass up to transport */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1386 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInSrcQuenchs); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1387 | break; |
| 1388 | case ICMP_REDIRECT: |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1389 | if (!ipst->ips_ip_ignore_redirect) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1390 | interested = B_TRUE; |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1391 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInRedirects); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1392 | break; |
| 1393 | case ICMP_ECHO_REQUEST: |
| 1394 | /* |
| 1395 | * Whether to respond to echo requests that come in as IP |
| 1396 | * broadcasts or as IP multicast is subject to debate |
| 1397 | * (what isn't?). We aim to please, you pick it. |
| 1398 | * Default is do it. |
| 1399 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1400 | if (ira->ira_flags & IRAF_MULTICAST) { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1401 | /* multicast: respond based on tunable */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1402 | interested = ipst->ips_ip_g_resp_to_echo_mcast; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1403 | } else if (ira->ira_flags & IRAF_BROADCAST) { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1404 | /* broadcast: respond based on tunable */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1405 | interested = ipst->ips_ip_g_resp_to_echo_bcast; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1406 | } else { |
| 1407 | /* unicast: always respond */ |
| 1408 | interested = B_TRUE; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1409 | } |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1410 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInEchos); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1411 | if (!interested) { |
| 1412 | /* We never pass these to RAW sockets */ |
| 1413 | freemsg(mp); |
| 1414 | return (NULL); |
| 1415 | } |
| 1416 | |
| 1417 | /* Check db_ref to make sure we can modify the packet. */ |
| 1418 | if (mp->b_datap->db_ref > 1) { |
| 1419 | mblk_t *mp1; |
| 1420 | |
| 1421 | mp1 = copymsg(mp); |
| 1422 | freemsg(mp); |
| 1423 | if (!mp1) { |
| 1424 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); |
| 1425 | return (NULL); |
| 1426 | } |
| 1427 | mp = mp1; |
| 1428 | ipha = (ipha_t *)mp->b_rptr; |
| 1429 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
| 1430 | } |
| 1431 | icmph->icmph_type = ICMP_ECHO_REPLY; |
| 1432 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutEchoReps); |
| 1433 | icmp_send_reply_v4(mp, ipha, icmph, ira); |
| 1434 | return (NULL); |
| 1435 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1436 | case ICMP_ROUTER_ADVERTISEMENT: |
| 1437 | case ICMP_ROUTER_SOLICITATION: |
| 1438 | break; |
| 1439 | case ICMP_TIME_EXCEEDED: |
| 1440 | interested = B_TRUE; /* Pass up to transport */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1441 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInTimeExcds); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1442 | break; |
| 1443 | case ICMP_PARAM_PROBLEM: |
| 1444 | interested = B_TRUE; /* Pass up to transport */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1445 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInParmProbs); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1446 | break; |
| 1447 | case ICMP_TIME_STAMP_REQUEST: |
| 1448 | /* Response to Time Stamp Requests is local policy. */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1449 | if (ipst->ips_ip_g_resp_to_timestamp) { |
| 1450 | if (ira->ira_flags & IRAF_MULTIBROADCAST) |
| 1451 | interested = |
| 1452 | ipst->ips_ip_g_resp_to_timestamp_bcast; |
| 1453 | else |
| 1454 | interested = B_TRUE; |
| 1455 | } |
| 1456 | if (!interested) { |
| 1457 | /* We never pass these to RAW sockets */ |
| 1458 | freemsg(mp); |
| 1459 | return (NULL); |
| 1460 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1461 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1462 | /* Make sure we have enough of the packet */ |
| 1463 | len_needed = ip_hdr_length + ICMPH_SIZE + |
| 1464 | 3 * sizeof (uint32_t); |
| 1465 | |
| 1466 | if (mp->b_wptr - mp->b_rptr < len_needed) { |
| 1467 | ipha = ip_pullup(mp, len_needed, ira); |
| 1468 | if (ipha == NULL) { |
| 1469 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| 1470 | ip_drop_input("ipIfStatsInDiscards - ip_pullup", |
| 1471 | mp, ill); |
| 1472 | freemsg(mp); |
| 1473 | return (NULL); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1474 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1475 | /* Refresh following the pullup. */ |
| 1476 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1477 | } |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1478 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInTimestamps); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1479 | /* Check db_ref to make sure we can modify the packet. */ |
| 1480 | if (mp->b_datap->db_ref > 1) { |
| 1481 | mblk_t *mp1; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1482 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1483 | mp1 = copymsg(mp); |
| 1484 | freemsg(mp); |
| 1485 | if (!mp1) { |
| 1486 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); |
| 1487 | return (NULL); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1488 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1489 | mp = mp1; |
| 1490 | ipha = (ipha_t *)mp->b_rptr; |
| 1491 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1492 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1493 | icmph->icmph_type = ICMP_TIME_STAMP_REPLY; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1494 | tsp = (uint32_t *)&icmph[1]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1495 | tsp++; /* Skip past 'originate time' */ |
| 1496 | /* Compute # of milliseconds since midnight */ |
| 1497 | gethrestime(&now); |
| 1498 | ts = (now.tv_sec % (24 * 60 * 60)) * 1000 + |
Josef 'Jeff' Sipek | 1944925 | 2014-04-29 13:05:25 -0400 | [diff] [blame] | 1499 | NSEC2MSEC(now.tv_nsec); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1500 | *tsp++ = htonl(ts); /* Lay in 'receive time' */ |
| 1501 | *tsp++ = htonl(ts); /* Lay in 'send time' */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1502 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutTimestampReps); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1503 | icmp_send_reply_v4(mp, ipha, icmph, ira); |
| 1504 | return (NULL); |
| 1505 | |
| 1506 | case ICMP_TIME_STAMP_REPLY: |
| 1507 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInTimestampReps); |
| 1508 | break; |
| 1509 | case ICMP_INFO_REQUEST: |
| 1510 | /* Per RFC 1122 3.2.2.7, ignore this. */ |
| 1511 | case ICMP_INFO_REPLY: |
| 1512 | break; |
| 1513 | case ICMP_ADDRESS_MASK_REQUEST: |
| 1514 | if (ira->ira_flags & IRAF_MULTIBROADCAST) { |
| 1515 | interested = |
| 1516 | ipst->ips_ip_respond_to_address_mask_broadcast; |
| 1517 | } else { |
| 1518 | interested = B_TRUE; |
| 1519 | } |
| 1520 | if (!interested) { |
| 1521 | /* We never pass these to RAW sockets */ |
| 1522 | freemsg(mp); |
| 1523 | return (NULL); |
| 1524 | } |
| 1525 | len_needed = ip_hdr_length + ICMPH_SIZE + IP_ADDR_LEN; |
| 1526 | if (mp->b_wptr - mp->b_rptr < len_needed) { |
| 1527 | ipha = ip_pullup(mp, len_needed, ira); |
| 1528 | if (ipha == NULL) { |
| 1529 | BUMP_MIB(ill->ill_ip_mib, |
| 1530 | ipIfStatsInTruncatedPkts); |
| 1531 | ip_drop_input("ipIfStatsInTruncatedPkts", mp, |
| 1532 | ill); |
| 1533 | freemsg(mp); |
| 1534 | return (NULL); |
| 1535 | } |
| 1536 | /* Refresh following the pullup. */ |
| 1537 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
| 1538 | } |
| 1539 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInAddrMasks); |
| 1540 | /* Check db_ref to make sure we can modify the packet. */ |
| 1541 | if (mp->b_datap->db_ref > 1) { |
| 1542 | mblk_t *mp1; |
| 1543 | |
| 1544 | mp1 = copymsg(mp); |
| 1545 | freemsg(mp); |
| 1546 | if (!mp1) { |
| 1547 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); |
| 1548 | return (NULL); |
| 1549 | } |
| 1550 | mp = mp1; |
| 1551 | ipha = (ipha_t *)mp->b_rptr; |
| 1552 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
| 1553 | } |
| 1554 | /* |
| 1555 | * Need the ipif with the mask be the same as the source |
| 1556 | * address of the mask reply. For unicast we have a specific |
| 1557 | * ipif. For multicast/broadcast we only handle onlink |
| 1558 | * senders, and use the source address to pick an ipif. |
| 1559 | */ |
| 1560 | ipif = ipif_lookup_addr(ipha->ipha_dst, ill, zoneid, ipst); |
| 1561 | if (ipif == NULL) { |
| 1562 | /* Broadcast or multicast */ |
| 1563 | ipif = ipif_lookup_remote(ill, ipha->ipha_src, zoneid); |
| 1564 | if (ipif == NULL) { |
| 1565 | freemsg(mp); |
| 1566 | return (NULL); |
| 1567 | } |
| 1568 | } |
| 1569 | icmph->icmph_type = ICMP_ADDRESS_MASK_REPLY; |
| 1570 | bcopy(&ipif->ipif_net_mask, &icmph[1], IP_ADDR_LEN); |
| 1571 | ipif_refrele(ipif); |
| 1572 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutAddrMaskReps); |
| 1573 | icmp_send_reply_v4(mp, ipha, icmph, ira); |
| 1574 | return (NULL); |
| 1575 | |
| 1576 | case ICMP_ADDRESS_MASK_REPLY: |
| 1577 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInAddrMaskReps); |
| 1578 | break; |
| 1579 | default: |
| 1580 | interested = B_TRUE; /* Pass up to transport */ |
| 1581 | BUMP_MIB(&ipst->ips_icmp_mib, icmpInUnknowns); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1582 | break; |
| 1583 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1584 | /* |
| 1585 | * See if there is an ICMP client to avoid an extra copymsg/freemsg |
| 1586 | * if there isn't one. |
| 1587 | */ |
| 1588 | if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_ICMP].connf_head != NULL) { |
| 1589 | /* If there is an ICMP client and we want one too, copy it. */ |
| 1590 | |
| 1591 | if (!interested) { |
| 1592 | /* Caller will deliver to RAW sockets */ |
| 1593 | return (mp); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1594 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1595 | mp_ret = copymsg(mp); |
| 1596 | if (mp_ret == NULL) { |
apersson | 3173664 | 2006-12-19 17:33:00 -0800 | [diff] [blame] | 1597 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1598 | ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1599 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1600 | } else if (!interested) { |
| 1601 | /* Neither we nor raw sockets are interested. Drop packet now */ |
| 1602 | freemsg(mp); |
| 1603 | return (NULL); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1604 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1605 | |
| 1606 | /* |
| 1607 | * ICMP error or redirect packet. Make sure we have enough of |
| 1608 | * the header and that db_ref == 1 since we might end up modifying |
| 1609 | * the packet. |
| 1610 | */ |
| 1611 | if (mp->b_cont != NULL) { |
| 1612 | if (ip_pullup(mp, -1, ira) == NULL) { |
| 1613 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| 1614 | ip_drop_input("ipIfStatsInDiscards - ip_pullup", |
| 1615 | mp, ill); |
| 1616 | freemsg(mp); |
| 1617 | return (mp_ret); |
| 1618 | } |
| 1619 | } |
| 1620 | |
| 1621 | if (mp->b_datap->db_ref > 1) { |
| 1622 | mblk_t *mp1; |
| 1623 | |
| 1624 | mp1 = copymsg(mp); |
| 1625 | if (mp1 == NULL) { |
| 1626 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| 1627 | ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); |
| 1628 | freemsg(mp); |
| 1629 | return (mp_ret); |
| 1630 | } |
| 1631 | freemsg(mp); |
| 1632 | mp = mp1; |
| 1633 | } |
| 1634 | |
| 1635 | /* |
| 1636 | * In case mp has changed, verify the message before any further |
| 1637 | * processes. |
| 1638 | */ |
| 1639 | ipha = (ipha_t *)mp->b_rptr; |
| 1640 | icmph = (icmph_t *)&mp->b_rptr[ip_hdr_length]; |
| 1641 | if (!icmp_inbound_verify_v4(mp, icmph, ira)) { |
| 1642 | freemsg(mp); |
| 1643 | return (mp_ret); |
| 1644 | } |
| 1645 | |
| 1646 | switch (icmph->icmph_type) { |
| 1647 | case ICMP_REDIRECT: |
| 1648 | icmp_redirect_v4(mp, ipha, icmph, ira); |
| 1649 | break; |
| 1650 | case ICMP_DEST_UNREACHABLE: |
| 1651 | if (icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) { |
| 1652 | /* Update DCE and adjust MTU is icmp header if needed */ |
| 1653 | icmp_inbound_too_big_v4(icmph, ira); |
| 1654 | } |
Toomas Soome | a9f62b1 | 2018-03-27 08:17:48 +0300 | [diff] [blame] | 1655 | /* FALLTHROUGH */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1656 | default: |
| 1657 | icmp_inbound_error_fanout_v4(mp, icmph, ira); |
| 1658 | break; |
| 1659 | } |
| 1660 | return (mp_ret); |
| 1661 | } |
| 1662 | |
| 1663 | /* |
| 1664 | * Send an ICMP echo, timestamp or address mask reply. |
| 1665 | * The caller has already updated the payload part of the packet. |
| 1666 | * We handle the ICMP checksum, IP source address selection and feed |
| 1667 | * the packet into ip_output_simple. |
| 1668 | */ |
| 1669 | static void |
| 1670 | icmp_send_reply_v4(mblk_t *mp, ipha_t *ipha, icmph_t *icmph, |
| 1671 | ip_recv_attr_t *ira) |
| 1672 | { |
| 1673 | uint_t ip_hdr_length = ira->ira_ip_hdr_length; |
| 1674 | ill_t *ill = ira->ira_ill; |
| 1675 | ip_stack_t *ipst = ill->ill_ipst; |
| 1676 | ip_xmit_attr_t ixas; |
| 1677 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1678 | /* Send out an ICMP packet */ |
| 1679 | icmph->icmph_checksum = 0; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1680 | icmph->icmph_checksum = IP_CSUM(mp, ip_hdr_length, 0); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1681 | /* Reset time to live. */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1682 | ipha->ipha_ttl = ipst->ips_ip_def_ttl; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1683 | { |
| 1684 | /* Swap source and destination addresses */ |
| 1685 | ipaddr_t tmp; |
| 1686 | |
| 1687 | tmp = ipha->ipha_src; |
| 1688 | ipha->ipha_src = ipha->ipha_dst; |
| 1689 | ipha->ipha_dst = tmp; |
| 1690 | } |
| 1691 | ipha->ipha_ident = 0; |
| 1692 | if (!IS_SIMPLE_IPH(ipha)) |
| 1693 | icmp_options_update(ipha); |
| 1694 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1695 | bzero(&ixas, sizeof (ixas)); |
| 1696 | ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; |
| 1697 | ixas.ixa_zoneid = ira->ira_zoneid; |
| 1698 | ixas.ixa_cred = kcred; |
| 1699 | ixas.ixa_cpid = NOPID; |
| 1700 | ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ |
| 1701 | ixas.ixa_ifindex = 0; |
| 1702 | ixas.ixa_ipst = ipst; |
| 1703 | ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; |
| 1704 | |
| 1705 | if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1706 | /* |
| 1707 | * This packet should go out the same way as it |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1708 | * came in i.e in clear, independent of the IPsec policy |
| 1709 | * for transmitting packets. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1710 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1711 | ixas.ixa_flags |= IXAF_NO_IPSEC; |
| 1712 | } else { |
| 1713 | if (!ipsec_in_to_out(ira, &ixas, mp, ipha, NULL)) { |
apersson | 3173664 | 2006-12-19 17:33:00 -0800 | [diff] [blame] | 1714 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1715 | /* Note: mp already consumed and ip_drop_packet done */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1716 | return; |
| 1717 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1718 | } |
| 1719 | if (ira->ira_flags & IRAF_MULTIBROADCAST) { |
| 1720 | /* |
| 1721 | * Not one or our addresses (IRE_LOCALs), thus we let |
| 1722 | * ip_output_simple pick the source. |
| 1723 | */ |
| 1724 | ipha->ipha_src = INADDR_ANY; |
| 1725 | ixas.ixa_flags |= IXAF_SET_SOURCE; |
| 1726 | } |
| 1727 | /* Should we send with DF and use dce_pmtu? */ |
| 1728 | if (ipst->ips_ipv4_icmp_return_pmtu) { |
| 1729 | ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; |
| 1730 | ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; |
| 1731 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1732 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1733 | BUMP_MIB(&ipst->ips_icmp_mib, icmpOutMsgs); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1734 | |
| 1735 | (void) ip_output_simple(mp, &ixas); |
| 1736 | ixa_cleanup(&ixas); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1737 | } |
| 1738 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1739 | /* |
| 1740 | * Verify the ICMP messages for either for ICMP error or redirect packet. |
| 1741 | * The caller should have fully pulled up the message. If it's a redirect |
| 1742 | * packet, only basic checks on IP header will be done; otherwise, verify |
| 1743 | * the packet by looking at the included ULP header. |
| 1744 | * |
| 1745 | * Called before icmp_inbound_error_fanout_v4 is called. |
| 1746 | */ |
| 1747 | static boolean_t |
| 1748 | icmp_inbound_verify_v4(mblk_t *mp, icmph_t *icmph, ip_recv_attr_t *ira) |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1749 | { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1750 | ill_t *ill = ira->ira_ill; |
| 1751 | int hdr_length; |
| 1752 | ip_stack_t *ipst = ira->ira_ill->ill_ipst; |
| 1753 | conn_t *connp; |
| 1754 | ipha_t *ipha; /* Inner IP header */ |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1755 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1756 | ipha = (ipha_t *)&icmph[1]; |
| 1757 | if ((uchar_t *)ipha + IP_SIMPLE_HDR_LENGTH > mp->b_wptr) |
| 1758 | goto truncated; |
| 1759 | |
| 1760 | hdr_length = IPH_HDR_LENGTH(ipha); |
| 1761 | |
| 1762 | if ((IPH_HDR_VERSION(ipha) != IPV4_VERSION)) |
| 1763 | goto discard_pkt; |
| 1764 | |
| 1765 | if (hdr_length < sizeof (ipha_t)) |
| 1766 | goto truncated; |
| 1767 | |
| 1768 | if ((uchar_t *)ipha + hdr_length > mp->b_wptr) |
| 1769 | goto truncated; |
| 1770 | |
| 1771 | /* |
| 1772 | * Stop here for ICMP_REDIRECT. |
| 1773 | */ |
| 1774 | if (icmph->icmph_type == ICMP_REDIRECT) |
| 1775 | return (B_TRUE); |
| 1776 | |
| 1777 | /* |
| 1778 | * ICMP errors only. |
| 1779 | */ |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1780 | switch (ipha->ipha_protocol) { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1781 | case IPPROTO_UDP: |
| 1782 | /* |
| 1783 | * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of |
| 1784 | * transport header. |
| 1785 | */ |
| 1786 | if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN > |
| 1787 | mp->b_wptr) |
| 1788 | goto truncated; |
| 1789 | break; |
| 1790 | case IPPROTO_TCP: { |
| 1791 | tcpha_t *tcpha; |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1792 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1793 | /* |
| 1794 | * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of |
| 1795 | * transport header. |
| 1796 | */ |
| 1797 | if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN > |
| 1798 | mp->b_wptr) |
| 1799 | goto truncated; |
| 1800 | |
| 1801 | tcpha = (tcpha_t *)((uchar_t *)ipha + hdr_length); |
| 1802 | connp = ipcl_tcp_lookup_reversed_ipv4(ipha, tcpha, TCPS_LISTEN, |
| 1803 | ipst); |
| 1804 | if (connp == NULL) |
| 1805 | goto discard_pkt; |
| 1806 | |
| 1807 | if ((connp->conn_verifyicmp != NULL) && |
| 1808 | !connp->conn_verifyicmp(connp, tcpha, icmph, NULL, ira)) { |
| 1809 | CONN_DEC_REF(connp); |
| 1810 | goto discard_pkt; |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1811 | } |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1812 | CONN_DEC_REF(connp); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1813 | break; |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1814 | } |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1815 | case IPPROTO_SCTP: |
| 1816 | /* |
| 1817 | * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of |
| 1818 | * transport header. |
| 1819 | */ |
| 1820 | if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN > |
| 1821 | mp->b_wptr) |
| 1822 | goto truncated; |
| 1823 | break; |
| 1824 | case IPPROTO_ESP: |
| 1825 | case IPPROTO_AH: |
| 1826 | break; |
| 1827 | case IPPROTO_ENCAP: |
| 1828 | if ((uchar_t *)ipha + hdr_length + sizeof (ipha_t) > |
| 1829 | mp->b_wptr) |
| 1830 | goto truncated; |
| 1831 | break; |
| 1832 | default: |
| 1833 | break; |
| 1834 | } |
| 1835 | |
| 1836 | return (B_TRUE); |
| 1837 | |
| 1838 | discard_pkt: |
| 1839 | /* Bogus ICMP error. */ |
| 1840 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| 1841 | return (B_FALSE); |
| 1842 | |
| 1843 | truncated: |
| 1844 | /* We pulled up everthing already. Must be truncated */ |
| 1845 | BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); |
| 1846 | ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); |
| 1847 | return (B_FALSE); |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1848 | } |
| 1849 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1850 | /* Table from RFC 1191 */ |
| 1851 | static int icmp_frag_size_table[] = |
| 1852 | { 32000, 17914, 8166, 4352, 2002, 1496, 1006, 508, 296, 68 }; |
| 1853 | |
| 1854 | /* |
| 1855 | * Process received ICMP Packet too big. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1856 | * Just handles the DCE create/update, including using the above table of |
| 1857 | * PMTU guesses. The caller is responsible for validating the packet before |
| 1858 | * passing it in and also to fanout the ICMP error to any matching transport |
| 1859 | * conns. Assumes the message has been fully pulled up and verified. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1860 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1861 | * Before getting here, the caller has called icmp_inbound_verify_v4() |
| 1862 | * that should have verified with ULP to prevent undoing the changes we're |
| 1863 | * going to make to DCE. For example, TCP might have verified that the packet |
| 1864 | * which generated error is in the send window. |
| 1865 | * |
| 1866 | * In some cases modified this MTU in the ICMP header packet; the caller |
| 1867 | * should pass to the matching ULP after this returns. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1868 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1869 | static void |
| 1870 | icmp_inbound_too_big_v4(icmph_t *icmph, ip_recv_attr_t *ira) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1871 | { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1872 | dce_t *dce; |
| 1873 | int old_mtu; |
| 1874 | int mtu, orig_mtu; |
| 1875 | ipaddr_t dst; |
| 1876 | boolean_t disable_pmtud; |
| 1877 | ill_t *ill = ira->ira_ill; |
| 1878 | ip_stack_t *ipst = ill->ill_ipst; |
| 1879 | uint_t hdr_length; |
| 1880 | ipha_t *ipha; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1881 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1882 | /* Caller already pulled up everything. */ |
| 1883 | ipha = (ipha_t *)&icmph[1]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1884 | ASSERT(icmph->icmph_type == ICMP_DEST_UNREACHABLE && |
| 1885 | icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED); |
apersson | 3173664 | 2006-12-19 17:33:00 -0800 | [diff] [blame] | 1886 | ASSERT(ill != NULL); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1887 | |
| 1888 | hdr_length = IPH_HDR_LENGTH(ipha); |
| 1889 | |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1890 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1891 | * We handle path MTU for source routed packets since the DCE |
| 1892 | * is looked up using the final destination. |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1893 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1894 | dst = ip_get_dst(ipha); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1895 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1896 | dce = dce_lookup_and_add_v4(dst, ipst); |
| 1897 | if (dce == NULL) { |
| 1898 | /* Couldn't add a unique one - ENOMEM */ |
| 1899 | ip1dbg(("icmp_inbound_too_big_v4: no dce for 0x%x\n", |
| 1900 | ntohl(dst))); |
| 1901 | return; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1902 | } |
Kacheong Poon | 16c9d76 | 2008-11-13 10:19:37 -0800 | [diff] [blame] | 1903 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1904 | /* Check for MTU discovery advice as described in RFC 1191 */ |
| 1905 | mtu = ntohs(icmph->icmph_du_mtu); |
Kacheong Poon | 16c9d76 | 2008-11-13 10:19:37 -0800 | [diff] [blame] | 1906 | orig_mtu = mtu; |
| 1907 | disable_pmtud = B_FALSE; |
| 1908 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1909 | mutex_enter(&dce->dce_lock); |
| 1910 | if (dce->dce_flags & DCEF_PMTU) |
| 1911 | old_mtu = dce->dce_pmtu; |
| 1912 | else |
| 1913 | old_mtu = ill->ill_mtu; |
| 1914 | |
| 1915 | if (icmph->icmph_du_zero != 0 || mtu < ipst->ips_ip_pmtu_min) { |
| 1916 | uint32_t length; |
| 1917 | int i; |
| 1918 | |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1919 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1920 | * Use the table from RFC 1191 to figure out |
| 1921 | * the next "plateau" based on the length in |
| 1922 | * the original IP packet. |
priyanka | be54702 | 2006-06-21 12:18:43 -0700 | [diff] [blame] | 1923 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1924 | length = ntohs(ipha->ipha_length); |
| 1925 | DTRACE_PROBE2(ip4__pmtu__guess, dce_t *, dce, |
| 1926 | uint32_t, length); |
| 1927 | if (old_mtu <= length && |
|