stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
kcpoon | 5dddb8b | 2006-01-06 00:24:46 -0800 | [diff] [blame] | 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
kcpoon | 5dddb8b | 2006-01-06 00:24:46 -0800 | [diff] [blame] | 21 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 22 | /* |
Yuri Pankov | 59927d3 | 2014-01-08 18:32:42 +0400 | [diff] [blame] | 23 | * Copyright (c) 1990 Mentat Inc. |
meem | 1f19738 | 2010-04-03 14:24:23 -0400 | [diff] [blame] | 24 | * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. |
Yuri Pankov | 0b905b4 | 2017-07-07 18:55:34 +0300 | [diff] [blame] | 25 | * Copyright 2017 Nexenta Systems, Inc. |
Dan McDonald | 7199b8e | 2017-02-01 14:55:57 -0500 | [diff] [blame] | 26 | * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. |
Dan McDonald | 42c5ef0 | 2019-02-22 14:42:52 -0500 | [diff] [blame] | 27 | * Copyright 2019, Joyent, Inc. |
Ryan Goodfellow | 2514b11 | 2022-06-09 07:52:45 -0700 | [diff] [blame] | 28 | * Copyright 2022 Oxide Computer Company |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 29 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 30 | |
| 31 | #ifndef _INET_IP_H |
| 32 | #define _INET_IP_H |
| 33 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 34 | #ifdef __cplusplus |
| 35 | extern "C" { |
| 36 | #endif |
| 37 | |
| 38 | #include <sys/isa_defs.h> |
| 39 | #include <sys/types.h> |
| 40 | #include <inet/mib2.h> |
| 41 | #include <inet/nd.h> |
| 42 | #include <sys/atomic.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 43 | #include <net/if_dl.h> |
| 44 | #include <net/if.h> |
| 45 | #include <netinet/ip.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 46 | #include <netinet/igmp.h> |
dr146992 | 381a2a9 | 2006-10-20 16:37:58 -0700 | [diff] [blame] | 47 | #include <sys/neti.h> |
| 48 | #include <sys/hook.h> |
| 49 | #include <sys/hook_event.h> |
| 50 | #include <sys/hook_impl.h> |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 51 | #include <inet/ip_stack.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 52 | |
| 53 | #ifdef _KERNEL |
| 54 | #include <netinet/ip6.h> |
| 55 | #include <sys/avl.h> |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 56 | #include <sys/list.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 57 | #include <sys/vmem.h> |
| 58 | #include <sys/squeue.h> |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 59 | #include <net/route.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 60 | #include <sys/systm.h> |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 61 | #include <net/radix.h> |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 62 | #include <sys/modhash.h> |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 63 | |
| 64 | #ifdef DEBUG |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 65 | #define CONN_DEBUG |
| 66 | #endif |
| 67 | |
| 68 | #define IP_DEBUG |
| 69 | /* |
| 70 | * The mt-streams(9F) flags for the IP module; put here so that other |
| 71 | * "drivers" that are actually IP (e.g., ICMP, UDP) can use the same set |
| 72 | * of flags. |
| 73 | */ |
| 74 | #define IP_DEVMTFLAGS D_MP |
masputra | ff550d0 | 2005-10-22 22:50:14 -0700 | [diff] [blame] | 75 | #endif /* _KERNEL */ |
| 76 | |
| 77 | #define IP_MOD_NAME "ip" |
| 78 | #define IP_DEV_NAME "/dev/ip" |
| 79 | #define IP6_DEV_NAME "/dev/ip6" |
| 80 | |
| 81 | #define UDP_MOD_NAME "udp" |
| 82 | #define UDP_DEV_NAME "/dev/udp" |
| 83 | #define UDP6_DEV_NAME "/dev/udp6" |
| 84 | |
| 85 | #define TCP_MOD_NAME "tcp" |
| 86 | #define TCP_DEV_NAME "/dev/tcp" |
| 87 | #define TCP6_DEV_NAME "/dev/tcp6" |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 88 | |
kcpoon | 77c67f2 | 2006-04-03 08:39:23 -0700 | [diff] [blame] | 89 | #define SCTP_MOD_NAME "sctp" |
| 90 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 91 | #ifndef _IPADDR_T |
| 92 | #define _IPADDR_T |
| 93 | typedef uint32_t ipaddr_t; |
| 94 | #endif |
| 95 | |
| 96 | /* Number of bits in an address */ |
| 97 | #define IP_ABITS 32 |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 98 | #define IPV4_ABITS IP_ABITS |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 99 | #define IPV6_ABITS 128 |
Ravi Chandra Nallan | 7f125a5 | 2010-07-13 18:17:30 +0530 | [diff] [blame] | 100 | #define IP_MAX_HW_LEN 40 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 101 | |
| 102 | #define IP_HOST_MASK (ipaddr_t)0xffffffffU |
| 103 | |
| 104 | #define IP_CSUM(mp, off, sum) (~ip_cksum(mp, off, sum) & 0xFFFF) |
| 105 | #define IP_CSUM_PARTIAL(mp, off, sum) ip_cksum(mp, off, sum) |
| 106 | #define IP_BCSUM_PARTIAL(bp, len, sum) bcksum(bp, len, sum) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 107 | |
| 108 | #define ILL_FRAG_HASH_TBL_COUNT ((unsigned int)64) |
| 109 | #define ILL_FRAG_HASH_TBL_SIZE (ILL_FRAG_HASH_TBL_COUNT * sizeof (ipfb_t)) |
| 110 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 111 | #define IPV4_ADDR_LEN 4 |
| 112 | #define IP_ADDR_LEN IPV4_ADDR_LEN |
| 113 | #define IP_ARP_PROTO_TYPE 0x0800 |
| 114 | |
| 115 | #define IPV4_VERSION 4 |
| 116 | #define IP_VERSION IPV4_VERSION |
| 117 | #define IP_SIMPLE_HDR_LENGTH_IN_WORDS 5 |
| 118 | #define IP_SIMPLE_HDR_LENGTH 20 |
| 119 | #define IP_MAX_HDR_LENGTH 60 |
| 120 | |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 121 | #define IP_MAX_OPT_LENGTH (IP_MAX_HDR_LENGTH-IP_SIMPLE_HDR_LENGTH) |
| 122 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 123 | #define IP_MIN_MTU (IP_MAX_HDR_LENGTH + 8) /* 68 bytes */ |
| 124 | |
| 125 | /* |
| 126 | * XXX IP_MAXPACKET is defined in <netinet/ip.h> as well. At some point the |
| 127 | * 2 files should be cleaned up to remove all redundant definitions. |
| 128 | */ |
| 129 | #define IP_MAXPACKET 65535 |
| 130 | #define IP_SIMPLE_HDR_VERSION \ |
| 131 | ((IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS) |
| 132 | |
jpk | 45916cd | 2006-03-24 12:29:20 -0800 | [diff] [blame] | 133 | #define UDPH_SIZE 8 |
| 134 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 135 | /* |
| 136 | * Constants and type definitions to support IP IOCTL commands |
| 137 | */ |
| 138 | #define IP_IOCTL (('i'<<8)|'p') |
| 139 | #define IP_IOC_IRE_DELETE 4 |
| 140 | #define IP_IOC_IRE_DELETE_NO_REPLY 5 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 141 | #define IP_IOC_RTS_REQUEST 7 |
| 142 | |
| 143 | /* Common definitions used by IP IOCTL data structures */ |
| 144 | typedef struct ipllcmd_s { |
| 145 | uint_t ipllc_cmd; |
| 146 | uint_t ipllc_name_offset; |
| 147 | uint_t ipllc_name_length; |
| 148 | } ipllc_t; |
| 149 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 150 | /* IP IRE Delete Command Structure. */ |
| 151 | typedef struct ipid_s { |
| 152 | ipllc_t ipid_ipllc; |
| 153 | uint_t ipid_ire_type; |
| 154 | uint_t ipid_addr_offset; |
| 155 | uint_t ipid_addr_length; |
| 156 | uint_t ipid_mask_offset; |
| 157 | uint_t ipid_mask_length; |
| 158 | } ipid_t; |
| 159 | |
| 160 | #define ipid_cmd ipid_ipllc.ipllc_cmd |
| 161 | |
| 162 | #ifdef _KERNEL |
| 163 | /* |
| 164 | * Temporary state for ip options parser. |
| 165 | */ |
| 166 | typedef struct ipoptp_s |
| 167 | { |
| 168 | uint8_t *ipoptp_next; /* next option to look at */ |
| 169 | uint8_t *ipoptp_end; /* end of options */ |
| 170 | uint8_t *ipoptp_cur; /* start of current option */ |
| 171 | uint8_t ipoptp_len; /* length of current option */ |
| 172 | uint32_t ipoptp_flags; |
| 173 | } ipoptp_t; |
| 174 | |
| 175 | /* |
| 176 | * Flag(s) for ipoptp_flags |
| 177 | */ |
| 178 | #define IPOPTP_ERROR 0x00000001 |
| 179 | #endif /* _KERNEL */ |
| 180 | |
Richard Lowe | bbf2155 | 2022-02-26 16:40:47 -0600 | [diff] [blame] | 181 | /* Controls forwarding of IP packets, set via ipadm(8)/ndd(8) */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 182 | #define IP_FORWARD_NEVER 0 |
| 183 | #define IP_FORWARD_ALWAYS 1 |
| 184 | |
Girish Moodalbail | 6e91bba | 2010-03-26 17:53:11 -0400 | [diff] [blame] | 185 | #define WE_ARE_FORWARDING(ipst) ((ipst)->ips_ip_forwarding == IP_FORWARD_ALWAYS) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 186 | |
| 187 | #define IPH_HDR_LENGTH(ipha) \ |
| 188 | ((int)(((ipha_t *)ipha)->ipha_version_and_hdr_length & 0xF) << 2) |
| 189 | |
| 190 | #define IPH_HDR_VERSION(ipha) \ |
| 191 | ((int)(((ipha_t *)ipha)->ipha_version_and_hdr_length) >> 4) |
| 192 | |
| 193 | #ifdef _KERNEL |
| 194 | /* |
| 195 | * IP reassembly macros. We hide starting and ending offsets in b_next and |
| 196 | * b_prev of messages on the reassembly queue. The messages are chained using |
| 197 | * b_cont. These macros are used in ip_reassemble() so we don't have to see |
| 198 | * the ugly casts and assignments. |
| 199 | * Note that the offsets are <= 64k i.e. a uint_t is sufficient to represent |
| 200 | * them. |
| 201 | */ |
| 202 | #define IP_REASS_START(mp) ((uint_t)(uintptr_t)((mp)->b_next)) |
| 203 | #define IP_REASS_SET_START(mp, u) \ |
| 204 | ((mp)->b_next = (mblk_t *)(uintptr_t)(u)) |
| 205 | #define IP_REASS_END(mp) ((uint_t)(uintptr_t)((mp)->b_prev)) |
| 206 | #define IP_REASS_SET_END(mp, u) \ |
| 207 | ((mp)->b_prev = (mblk_t *)(uintptr_t)(u)) |
| 208 | |
| 209 | #define IP_REASS_COMPLETE 0x1 |
| 210 | #define IP_REASS_PARTIAL 0x2 |
| 211 | #define IP_REASS_FAILED 0x4 |
| 212 | |
| 213 | /* |
| 214 | * Test to determine whether this is a module instance of IP or a |
| 215 | * driver instance of IP. |
| 216 | */ |
| 217 | #define CONN_Q(q) (WR(q)->q_next == NULL) |
| 218 | |
| 219 | #define Q_TO_CONN(q) ((conn_t *)(q)->q_ptr) |
| 220 | #define Q_TO_TCP(q) (Q_TO_CONN((q))->conn_tcp) |
masputra | ff550d0 | 2005-10-22 22:50:14 -0700 | [diff] [blame] | 221 | #define Q_TO_UDP(q) (Q_TO_CONN((q))->conn_udp) |
nordmark | fc80c0d | 2007-10-11 22:57:36 -0700 | [diff] [blame] | 222 | #define Q_TO_ICMP(q) (Q_TO_CONN((q))->conn_icmp) |
| 223 | #define Q_TO_RTS(q) (Q_TO_CONN((q))->conn_rts) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 224 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 225 | #define CONNP_TO_WQ(connp) ((connp)->conn_wq) |
| 226 | #define CONNP_TO_RQ(connp) ((connp)->conn_rq) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 227 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 228 | #define GRAB_CONN_LOCK(q) { \ |
| 229 | if (q != NULL && CONN_Q(q)) \ |
| 230 | mutex_enter(&(Q_TO_CONN(q))->conn_lock); \ |
| 231 | } |
| 232 | |
| 233 | #define RELEASE_CONN_LOCK(q) { \ |
| 234 | if (q != NULL && CONN_Q(q)) \ |
| 235 | mutex_exit(&(Q_TO_CONN(q))->conn_lock); \ |
| 236 | } |
| 237 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 238 | /* |
Brian Ruthven | 838a4ff | 2009-12-22 00:58:06 +0000 | [diff] [blame] | 239 | * Ref counter macros for ioctls. This provides a guard for TCP to stop |
| 240 | * tcp_close from removing the rq/wq whilst an ioctl is still in flight on the |
| 241 | * stream. The ioctl could have been queued on e.g. an ipsq. tcp_close will wait |
| 242 | * until the ioctlref count is zero before proceeding. |
| 243 | * Ideally conn_oper_pending_ill would be used for this purpose. However, in the |
| 244 | * case where an ioctl is aborted or interrupted, it can be cleared prematurely. |
| 245 | * There are also some race possibilities between ip and the stream head which |
| 246 | * can also end up with conn_oper_pending_ill being cleared prematurely. So, to |
| 247 | * avoid these situations, we use a dedicated ref counter for ioctls which is |
| 248 | * used in addition to and in parallel with the normal conn_ref count. |
| 249 | */ |
| 250 | #define CONN_INC_IOCTLREF_LOCKED(connp) { \ |
| 251 | ASSERT(MUTEX_HELD(&(connp)->conn_lock)); \ |
| 252 | DTRACE_PROBE1(conn__inc__ioctlref, conn_t *, (connp)); \ |
| 253 | (connp)->conn_ioctlref++; \ |
| 254 | mutex_exit(&(connp)->conn_lock); \ |
| 255 | } |
| 256 | |
| 257 | #define CONN_INC_IOCTLREF(connp) { \ |
| 258 | mutex_enter(&(connp)->conn_lock); \ |
| 259 | CONN_INC_IOCTLREF_LOCKED(connp); \ |
| 260 | } |
| 261 | |
| 262 | #define CONN_DEC_IOCTLREF(connp) { \ |
| 263 | mutex_enter(&(connp)->conn_lock); \ |
| 264 | DTRACE_PROBE1(conn__dec__ioctlref, conn_t *, (connp)); \ |
| 265 | /* Make sure conn_ioctlref will not underflow. */ \ |
| 266 | ASSERT((connp)->conn_ioctlref != 0); \ |
| 267 | if ((--(connp)->conn_ioctlref == 0) && \ |
| 268 | ((connp)->conn_state_flags & CONN_CLOSING)) { \ |
| 269 | cv_broadcast(&(connp)->conn_cv); \ |
| 270 | } \ |
| 271 | mutex_exit(&(connp)->conn_lock); \ |
| 272 | } |
| 273 | |
| 274 | |
| 275 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 276 | * Complete the pending operation. Usually an ioctl. Can also |
| 277 | * be a bind or option management request that got enqueued |
| 278 | * in an ipsq_t. Called on completion of the operation. |
| 279 | */ |
| 280 | #define CONN_OPER_PENDING_DONE(connp) { \ |
| 281 | mutex_enter(&(connp)->conn_lock); \ |
| 282 | (connp)->conn_oper_pending_ill = NULL; \ |
| 283 | cv_broadcast(&(connp)->conn_refcv); \ |
| 284 | mutex_exit(&(connp)->conn_lock); \ |
| 285 | CONN_DEC_REF(connp); \ |
| 286 | } |
| 287 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 288 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 289 | * Values for squeue switch: |
| 290 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 291 | #define IP_SQUEUE_ENTER_NODRAIN 1 |
| 292 | #define IP_SQUEUE_ENTER 2 |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 293 | #define IP_SQUEUE_FILL 3 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 294 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 295 | extern int ip_squeue_flag; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 296 | |
| 297 | /* IP Fragmentation Reassembly Header */ |
| 298 | typedef struct ipf_s { |
| 299 | struct ipf_s *ipf_hash_next; |
| 300 | struct ipf_s **ipf_ptphn; /* Pointer to previous hash next. */ |
| 301 | uint32_t ipf_ident; /* Ident to match. */ |
| 302 | uint8_t ipf_protocol; /* Protocol to match. */ |
| 303 | uchar_t ipf_last_frag_seen : 1; /* Last fragment seen ? */ |
| 304 | time_t ipf_timestamp; /* Reassembly start time. */ |
| 305 | mblk_t *ipf_mp; /* mblk we live in. */ |
| 306 | mblk_t *ipf_tail_mp; /* Frag queue tail pointer. */ |
| 307 | int ipf_hole_cnt; /* Number of holes (hard-case). */ |
| 308 | int ipf_end; /* Tail end offset (0 -> hard-case). */ |
| 309 | uint_t ipf_gen; /* Frag queue generation */ |
| 310 | size_t ipf_count; /* Count of bytes used by frag */ |
| 311 | uint_t ipf_nf_hdr_len; /* Length of nonfragmented header */ |
| 312 | in6_addr_t ipf_v6src; /* IPv6 source address */ |
| 313 | in6_addr_t ipf_v6dst; /* IPv6 dest address */ |
| 314 | uint_t ipf_prev_nexthdr_offset; /* Offset for nexthdr value */ |
| 315 | uint8_t ipf_ecn; /* ECN info for the fragments */ |
| 316 | uint8_t ipf_num_dups; /* Number of times dup frags recvd */ |
masputra | ff550d0 | 2005-10-22 22:50:14 -0700 | [diff] [blame] | 317 | uint16_t ipf_checksum_flags; /* Hardware checksum flags */ |
| 318 | uint32_t ipf_checksum; /* Partial checksum of fragment data */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 319 | } ipf_t; |
| 320 | |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 321 | /* |
| 322 | * IPv4 Fragments |
| 323 | */ |
| 324 | #define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \ |
| 325 | (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \ |
| 326 | ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0)) |
| 327 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 328 | #define ipf_src V4_PART_OF_V6(ipf_v6src) |
| 329 | #define ipf_dst V4_PART_OF_V6(ipf_v6dst) |
| 330 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 331 | #endif /* _KERNEL */ |
| 332 | |
| 333 | /* ICMP types */ |
| 334 | #define ICMP_ECHO_REPLY 0 |
| 335 | #define ICMP_DEST_UNREACHABLE 3 |
| 336 | #define ICMP_SOURCE_QUENCH 4 |
| 337 | #define ICMP_REDIRECT 5 |
| 338 | #define ICMP_ECHO_REQUEST 8 |
| 339 | #define ICMP_ROUTER_ADVERTISEMENT 9 |
| 340 | #define ICMP_ROUTER_SOLICITATION 10 |
| 341 | #define ICMP_TIME_EXCEEDED 11 |
| 342 | #define ICMP_PARAM_PROBLEM 12 |
| 343 | #define ICMP_TIME_STAMP_REQUEST 13 |
| 344 | #define ICMP_TIME_STAMP_REPLY 14 |
| 345 | #define ICMP_INFO_REQUEST 15 |
| 346 | #define ICMP_INFO_REPLY 16 |
| 347 | #define ICMP_ADDRESS_MASK_REQUEST 17 |
| 348 | #define ICMP_ADDRESS_MASK_REPLY 18 |
| 349 | |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 350 | /* Evaluates to true if the ICMP type is an ICMP error */ |
| 351 | #define ICMP_IS_ERROR(type) ( \ |
| 352 | (type) == ICMP_DEST_UNREACHABLE || \ |
| 353 | (type) == ICMP_SOURCE_QUENCH || \ |
| 354 | (type) == ICMP_TIME_EXCEEDED || \ |
| 355 | (type) == ICMP_PARAM_PROBLEM) |
| 356 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 357 | /* ICMP_TIME_EXCEEDED codes */ |
| 358 | #define ICMP_TTL_EXCEEDED 0 |
| 359 | #define ICMP_REASSEMBLY_TIME_EXCEEDED 1 |
| 360 | |
| 361 | /* ICMP_DEST_UNREACHABLE codes */ |
| 362 | #define ICMP_NET_UNREACHABLE 0 |
| 363 | #define ICMP_HOST_UNREACHABLE 1 |
| 364 | #define ICMP_PROTOCOL_UNREACHABLE 2 |
| 365 | #define ICMP_PORT_UNREACHABLE 3 |
| 366 | #define ICMP_FRAGMENTATION_NEEDED 4 |
| 367 | #define ICMP_SOURCE_ROUTE_FAILED 5 |
| 368 | #define ICMP_DEST_NET_UNKNOWN 6 |
| 369 | #define ICMP_DEST_HOST_UNKNOWN 7 |
| 370 | #define ICMP_SRC_HOST_ISOLATED 8 |
| 371 | #define ICMP_DEST_NET_UNREACH_ADMIN 9 |
| 372 | #define ICMP_DEST_HOST_UNREACH_ADMIN 10 |
| 373 | #define ICMP_DEST_NET_UNREACH_TOS 11 |
| 374 | #define ICMP_DEST_HOST_UNREACH_TOS 12 |
| 375 | |
| 376 | /* ICMP Header Structure */ |
| 377 | typedef struct icmph_s { |
| 378 | uint8_t icmph_type; |
| 379 | uint8_t icmph_code; |
| 380 | uint16_t icmph_checksum; |
| 381 | union { |
| 382 | struct { /* ECHO request/response structure */ |
| 383 | uint16_t u_echo_ident; |
| 384 | uint16_t u_echo_seqnum; |
| 385 | } u_echo; |
| 386 | struct { /* Destination unreachable structure */ |
| 387 | uint16_t u_du_zero; |
| 388 | uint16_t u_du_mtu; |
| 389 | } u_du; |
| 390 | struct { /* Parameter problem structure */ |
| 391 | uint8_t u_pp_ptr; |
| 392 | uint8_t u_pp_rsvd[3]; |
| 393 | } u_pp; |
| 394 | struct { /* Redirect structure */ |
| 395 | ipaddr_t u_rd_gateway; |
| 396 | } u_rd; |
| 397 | } icmph_u; |
| 398 | } icmph_t; |
| 399 | |
| 400 | #define icmph_echo_ident icmph_u.u_echo.u_echo_ident |
| 401 | #define icmph_echo_seqnum icmph_u.u_echo.u_echo_seqnum |
| 402 | #define icmph_du_zero icmph_u.u_du.u_du_zero |
| 403 | #define icmph_du_mtu icmph_u.u_du.u_du_mtu |
| 404 | #define icmph_pp_ptr icmph_u.u_pp.u_pp_ptr |
| 405 | #define icmph_rd_gateway icmph_u.u_rd.u_rd_gateway |
| 406 | |
| 407 | #define ICMPH_SIZE 8 |
| 408 | |
| 409 | /* |
| 410 | * Minimum length of transport layer header included in an ICMP error |
| 411 | * message for it to be considered valid. |
| 412 | */ |
| 413 | #define ICMP_MIN_TP_HDR_LEN 8 |
| 414 | |
| 415 | /* Aligned IP header */ |
| 416 | typedef struct ipha_s { |
| 417 | uint8_t ipha_version_and_hdr_length; |
| 418 | uint8_t ipha_type_of_service; |
| 419 | uint16_t ipha_length; |
| 420 | uint16_t ipha_ident; |
| 421 | uint16_t ipha_fragment_offset_and_flags; |
| 422 | uint8_t ipha_ttl; |
| 423 | uint8_t ipha_protocol; |
| 424 | uint16_t ipha_hdr_checksum; |
| 425 | ipaddr_t ipha_src; |
| 426 | ipaddr_t ipha_dst; |
| 427 | } ipha_t; |
| 428 | |
brendan | 10e6dad | 2008-06-13 19:06:55 -0700 | [diff] [blame] | 429 | /* |
| 430 | * IP Flags |
| 431 | * |
| 432 | * Some of these constant names are copied for the DTrace IP provider in |
| 433 | * usr/src/lib/libdtrace/common/{ip.d.in, ip.sed.in}, which should be kept |
| 434 | * in sync. |
| 435 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 436 | #define IPH_DF 0x4000 /* Don't fragment */ |
| 437 | #define IPH_MF 0x2000 /* More fragments to come */ |
| 438 | #define IPH_OFFSET 0x1FFF /* Where the offset lives */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 439 | |
| 440 | /* Byte-order specific values */ |
| 441 | #ifdef _BIG_ENDIAN |
| 442 | #define IPH_DF_HTONS 0x4000 /* Don't fragment */ |
| 443 | #define IPH_MF_HTONS 0x2000 /* More fragments to come */ |
| 444 | #define IPH_OFFSET_HTONS 0x1FFF /* Where the offset lives */ |
| 445 | #else |
| 446 | #define IPH_DF_HTONS 0x0040 /* Don't fragment */ |
| 447 | #define IPH_MF_HTONS 0x0020 /* More fragments to come */ |
| 448 | #define IPH_OFFSET_HTONS 0xFF1F /* Where the offset lives */ |
| 449 | #endif |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 450 | |
| 451 | /* ECN code points for IPv4 TOS byte and IPv6 traffic class octet. */ |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 452 | #define IPH_ECN_NECT 0x0 /* Not ECN-Capable Transport */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 453 | #define IPH_ECN_ECT1 0x1 /* ECN-Capable Transport, ECT(1) */ |
| 454 | #define IPH_ECN_ECT0 0x2 /* ECN-Capable Transport, ECT(0) */ |
| 455 | #define IPH_ECN_CE 0x3 /* ECN-Congestion Experienced (CE) */ |
| 456 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 457 | struct ill_s; |
| 458 | |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 459 | typedef void ip_v6intfid_func_t(struct ill_s *, in6_addr_t *); |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 460 | typedef void ip_v6mapinfo_func_t(struct ill_s *, uchar_t *, uchar_t *); |
| 461 | typedef void ip_v4mapinfo_func_t(struct ill_s *, uchar_t *, uchar_t *); |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 462 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 463 | /* IP Mac info structure */ |
| 464 | typedef struct ip_m_s { |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 465 | t_uscalar_t ip_m_mac_type; /* From <sys/dlpi.h> */ |
| 466 | int ip_m_type; /* From <net/if_types.h> */ |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 467 | t_uscalar_t ip_m_ipv4sap; |
| 468 | t_uscalar_t ip_m_ipv6sap; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 469 | ip_v4mapinfo_func_t *ip_m_v4mapping; |
| 470 | ip_v6mapinfo_func_t *ip_m_v6mapping; |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 471 | ip_v6intfid_func_t *ip_m_v6intfid; |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 472 | ip_v6intfid_func_t *ip_m_v6destintfid; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 473 | } ip_m_t; |
| 474 | |
| 475 | /* |
| 476 | * The following functions attempt to reduce the link layer dependency |
| 477 | * of the IP stack. The current set of link specific operations are: |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 478 | * a. map from IPv4 class D (224.0/4) multicast address range or the |
| 479 | * IPv6 multicast address range (ff00::/8) to the link layer multicast |
| 480 | * address. |
| 481 | * b. derive the default IPv6 interface identifier from the interface. |
| 482 | * c. derive the default IPv6 destination interface identifier from |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 483 | * the interface (point-to-point only). |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 484 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 485 | extern void ip_mcast_mapping(struct ill_s *, uchar_t *, uchar_t *); |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 486 | /* ip_m_v6*intfid return void and are never NULL */ |
| 487 | #define MEDIA_V6INTFID(ip_m, ill, v6ptr) (ip_m)->ip_m_v6intfid(ill, v6ptr) |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 488 | #define MEDIA_V6DESTINTFID(ip_m, ill, v6ptr) \ |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 489 | (ip_m)->ip_m_v6destintfid(ill, v6ptr) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 490 | |
| 491 | /* Router entry types */ |
| 492 | #define IRE_BROADCAST 0x0001 /* Route entry for broadcast address */ |
| 493 | #define IRE_DEFAULT 0x0002 /* Route entry for default gateway */ |
| 494 | #define IRE_LOCAL 0x0004 /* Route entry for local address */ |
| 495 | #define IRE_LOOPBACK 0x0008 /* Route entry for loopback address */ |
| 496 | #define IRE_PREFIX 0x0010 /* Route entry for prefix routes */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 497 | #ifndef _KERNEL |
| 498 | /* Keep so user-level still compiles */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 499 | #define IRE_CACHE 0x0020 /* Cached Route entry */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 500 | #endif |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 501 | #define IRE_IF_NORESOLVER 0x0040 /* Route entry for local interface */ |
| 502 | /* net without any address mapping. */ |
| 503 | #define IRE_IF_RESOLVER 0x0080 /* Route entry for local interface */ |
| 504 | /* net with resolver. */ |
| 505 | #define IRE_HOST 0x0100 /* Host route entry */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 506 | /* Keep so user-level still compiles */ |
dd193516 | 6bdb8e6 | 2006-10-27 15:48:26 -0700 | [diff] [blame] | 507 | #define IRE_HOST_REDIRECT 0x0200 /* only used for T_SVR4_OPTMGMT_REQ */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 508 | #define IRE_IF_CLONE 0x0400 /* Per host clone of IRE_IF */ |
| 509 | #define IRE_MULTICAST 0x0800 /* Special - not in table */ |
| 510 | #define IRE_NOROUTE 0x1000 /* Special - not in table */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 511 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 512 | #define IRE_INTERFACE (IRE_IF_NORESOLVER | IRE_IF_RESOLVER) |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 513 | |
| 514 | #define IRE_IF_ALL (IRE_IF_NORESOLVER | IRE_IF_RESOLVER | \ |
| 515 | IRE_IF_CLONE) |
dd193516 | 6bdb8e6 | 2006-10-27 15:48:26 -0700 | [diff] [blame] | 516 | #define IRE_OFFSUBNET (IRE_DEFAULT | IRE_PREFIX | IRE_HOST) |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 517 | #define IRE_OFFLINK IRE_OFFSUBNET |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 518 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 519 | * Note that we view IRE_NOROUTE as ONLINK since we can "send" to them without |
| 520 | * going through a router; the result of sending will be an error/icmp error. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 521 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 522 | #define IRE_ONLINK (IRE_IF_ALL|IRE_LOCAL|IRE_LOOPBACK| \ |
| 523 | IRE_BROADCAST|IRE_MULTICAST|IRE_NOROUTE) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 524 | |
| 525 | /* Arguments to ire_flush_cache() */ |
| 526 | #define IRE_FLUSH_DELETE 0 |
| 527 | #define IRE_FLUSH_ADD 1 |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 528 | #define IRE_FLUSH_GWCHANGE 2 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 529 | |
| 530 | /* |
Erik Nordmark | 9e3469d | 2010-01-08 08:42:20 -0800 | [diff] [blame] | 531 | * Flags to ire_route_recursive |
| 532 | */ |
| 533 | #define IRR_NONE 0 |
| 534 | #define IRR_ALLOCATE 1 /* OK to allocate IRE_IF_CLONE */ |
| 535 | #define IRR_INCOMPLETE 2 /* OK to return incomplete chain */ |
| 536 | |
| 537 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 538 | * Open/close synchronization flags. |
| 539 | * These are kept in a separate field in the conn and the synchronization |
| 540 | * depends on the atomic 32 bit access to that field. |
| 541 | */ |
| 542 | #define CONN_CLOSING 0x01 /* ip_close waiting for ip_wsrv */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 543 | #define CONN_CONDEMNED 0x02 /* conn is closing, no more refs */ |
| 544 | #define CONN_INCIPIENT 0x04 /* conn not yet visible, no refs */ |
| 545 | #define CONN_QUIESCED 0x08 /* conn is now quiescent */ |
| 546 | #define CONN_UPDATE_ILL 0x10 /* conn_update_ill in progress */ |
georges | 325b806 | 2007-02-06 07:01:31 -0800 | [diff] [blame] | 547 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 548 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 549 | * Flags for dce_flags field. Specifies which information has been set. |
| 550 | * dce_ident is always present, but the other ones are identified by the flags. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 551 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 552 | #define DCEF_DEFAULT 0x0001 /* Default DCE - no pmtu or uinfo */ |
| 553 | #define DCEF_PMTU 0x0002 /* Different than interface MTU */ |
| 554 | #define DCEF_UINFO 0x0004 /* dce_uinfo set */ |
Dan McDonald | 7199b8e | 2017-02-01 14:55:57 -0500 | [diff] [blame] | 555 | #define DCEF_TOO_SMALL_PMTU 0x0008 /* Smaller than IPv4 MIN */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 556 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 557 | #ifdef _KERNEL |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 558 | /* |
| 559 | * Extra structures need for per-src-addr filtering (IGMPv3/MLDv2) |
| 560 | */ |
| 561 | #define MAX_FILTER_SIZE 64 |
| 562 | |
| 563 | typedef struct slist_s { |
| 564 | int sl_numsrc; |
| 565 | in6_addr_t sl_addr[MAX_FILTER_SIZE]; |
| 566 | } slist_t; |
| 567 | |
| 568 | /* |
| 569 | * Following struct is used to maintain retransmission state for |
| 570 | * a multicast group. One rtx_state_t struct is an in-line field |
| 571 | * of the ilm_t struct; the slist_ts in the rtx_state_t struct are |
| 572 | * alloc'd as needed. |
| 573 | */ |
| 574 | typedef struct rtx_state_s { |
| 575 | uint_t rtx_timer; /* retrans timer */ |
| 576 | int rtx_cnt; /* retrans count */ |
| 577 | int rtx_fmode_cnt; /* retrans count for fmode change */ |
| 578 | slist_t *rtx_allow; |
| 579 | slist_t *rtx_block; |
| 580 | } rtx_state_t; |
| 581 | |
| 582 | /* |
| 583 | * Used to construct list of multicast address records that will be |
| 584 | * sent in a single listener report. |
| 585 | */ |
| 586 | typedef struct mrec_s { |
| 587 | struct mrec_s *mrec_next; |
| 588 | uint8_t mrec_type; |
| 589 | uint8_t mrec_auxlen; /* currently unused */ |
| 590 | in6_addr_t mrec_group; |
| 591 | slist_t mrec_srcs; |
| 592 | } mrec_t; |
| 593 | |
| 594 | /* Group membership list per upper conn */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 595 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 596 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 597 | * We record the multicast information from the socket option in |
| 598 | * ilg_ifaddr/ilg_ifindex. This allows rejoining the group in the case when |
| 599 | * the ifaddr (or ifindex) disappears and later reappears, potentially on |
| 600 | * a different ill. The IPv6 multicast socket options and ioctls all specify |
| 601 | * the interface using an ifindex. For IPv4 some socket options/ioctls use |
| 602 | * the interface address and others use the index. We record here the method |
| 603 | * that was actually used (and leave the other of ilg_ifaddr or ilg_ifindex) |
| 604 | * at zero so that we can rejoin the way the application intended. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 605 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 606 | * We track the ill on which we will or already have joined an ilm using |
| 607 | * ilg_ill. When we have succeeded joining the ilm and have a refhold on it |
| 608 | * then we set ilg_ilm. Thus intentionally there is a window where ilg_ill is |
| 609 | * set and ilg_ilm is not set. This allows clearing ilg_ill as a signal that |
| 610 | * the ill is being unplumbed and the ilm should be discarded. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 611 | * |
| 612 | * ilg records the state of multicast memberships of a socket end point. |
| 613 | * ilm records the state of multicast memberships with the driver and is |
| 614 | * maintained per interface. |
| 615 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 616 | * The ilg state is protected by conn_ilg_lock. |
| 617 | * The ilg will not be freed until ilg_refcnt drops to zero. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 618 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 619 | typedef struct ilg_s { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 620 | struct ilg_s *ilg_next; |
| 621 | struct ilg_s **ilg_ptpn; |
| 622 | struct conn_s *ilg_connp; /* Back pointer to get lock */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 623 | in6_addr_t ilg_v6group; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 624 | ipaddr_t ilg_ifaddr; /* For some IPv4 cases */ |
| 625 | uint_t ilg_ifindex; /* IPv6 and some other IPv4 cases */ |
| 626 | struct ill_s *ilg_ill; /* Where ilm is joined. No refhold */ |
| 627 | struct ilm_s *ilg_ilm; /* With ilm_refhold */ |
| 628 | uint_t ilg_refcnt; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 629 | mcast_record_t ilg_fmode; /* MODE_IS_INCLUDE/MODE_IS_EXCLUDE */ |
| 630 | slist_t *ilg_filter; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 631 | boolean_t ilg_condemned; /* Conceptually deleted */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 632 | } ilg_t; |
| 633 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 634 | /* |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 635 | * Multicast address list entry for ill. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 636 | * ilm_ill is used by IPv4 and IPv6 |
| 637 | * |
| 638 | * The ilm state (and other multicast state on the ill) is protected by |
| 639 | * ill_mcast_lock. Operations that change state on both an ilg and ilm |
| 640 | * in addition use ill_mcast_serializer to ensure that we can't have |
| 641 | * interleaving between e.g., add and delete operations for the same conn_t, |
Sowmini Varadhan | f1c454b | 2010-01-11 10:29:23 -0500 | [diff] [blame] | 642 | * group, and ill. The ill_mcast_serializer is also used to ensure that |
| 643 | * multicast group joins do not occur on an interface that is in the process |
| 644 | * of joining an IPMP group. |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 645 | * |
| 646 | * The comment below (and for other netstack_t references) refers |
| 647 | * to the fact that we only do netstack_hold in particular cases, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 648 | * such as the references from open endpoints (ill_t and conn_t's |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 649 | * pointers). Internally within IP we rely on IP's ability to cleanup e.g. |
| 650 | * ire_t's when an ill goes away. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 651 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 652 | typedef struct ilm_s { |
| 653 | in6_addr_t ilm_v6addr; |
| 654 | int ilm_refcnt; |
| 655 | uint_t ilm_timer; /* IGMP/MLD query resp timer, in msec */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 656 | struct ilm_s *ilm_next; /* Linked list for each ill */ |
| 657 | uint_t ilm_state; /* state of the membership */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 658 | struct ill_s *ilm_ill; /* Back pointer to ill - ill_ilm_cnt */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 659 | zoneid_t ilm_zoneid; |
| 660 | int ilm_no_ilg_cnt; /* number of joins w/ no ilg */ |
| 661 | mcast_record_t ilm_fmode; /* MODE_IS_INCLUDE/MODE_IS_EXCLUDE */ |
| 662 | slist_t *ilm_filter; /* source filter list */ |
| 663 | slist_t *ilm_pendsrcs; /* relevant src addrs for pending req */ |
| 664 | rtx_state_t ilm_rtx; /* SCR retransmission state */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 665 | ipaddr_t ilm_ifaddr; /* For IPv4 netstat */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 666 | ip_stack_t *ilm_ipst; /* Does not have a netstack_hold */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 667 | } ilm_t; |
| 668 | |
| 669 | #define ilm_addr V4_PART_OF_V6(ilm_v6addr) |
| 670 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 671 | /* |
| 672 | * Soft reference to an IPsec SA. |
| 673 | * |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 674 | * On relative terms, conn's can be persistent (living as long as the |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 675 | * processes which create them), while SA's are ephemeral (dying when |
| 676 | * they hit their time-based or byte-based lifetimes). |
| 677 | * |
| 678 | * We could hold a hard reference to an SA from an ipsec_latch_t, |
| 679 | * but this would cause expired SA's to linger for a potentially |
| 680 | * unbounded time. |
| 681 | * |
| 682 | * Instead, we remember the hash bucket number and bucket generation |
| 683 | * in addition to the pointer. The bucket generation is incremented on |
| 684 | * each deletion. |
| 685 | */ |
| 686 | typedef struct ipsa_ref_s |
| 687 | { |
| 688 | struct ipsa_s *ipsr_sa; |
| 689 | struct isaf_s *ipsr_bucket; |
| 690 | uint64_t ipsr_gen; |
| 691 | } ipsa_ref_t; |
| 692 | |
| 693 | /* |
| 694 | * IPsec "latching" state. |
| 695 | * |
| 696 | * In the presence of IPsec policy, fully-bound conn's bind a connection |
| 697 | * to more than just the 5-tuple, but also a specific IPsec action and |
| 698 | * identity-pair. |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 699 | * The identity pair is accessed from both the receive and transmit side |
| 700 | * hence it is maintained in the ipsec_latch_t structure. conn_latch and |
| 701 | * ixa_ipsec_latch points to it. |
| 702 | * The policy and actions are stored in conn_latch_in_policy and |
| 703 | * conn_latch_in_action for the inbound side, and in ixa_ipsec_policy and |
| 704 | * ixa_ipsec_action for the transmit side. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 705 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 706 | * As an optimization, we also cache soft references to IPsec SA's in |
| 707 | * ip_xmit_attr_t so that we can fast-path around most of the work needed for |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 708 | * outbound IPsec SA selection. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 709 | */ |
| 710 | typedef struct ipsec_latch_s |
| 711 | { |
| 712 | kmutex_t ipl_lock; |
| 713 | uint32_t ipl_refcnt; |
| 714 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 715 | struct ipsid_s *ipl_local_cid; |
| 716 | struct ipsid_s *ipl_remote_cid; |
| 717 | unsigned int |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 718 | ipl_ids_latched : 1, |
| 719 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 720 | ipl_pad_to_bit_31 : 31; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 721 | } ipsec_latch_t; |
| 722 | |
| 723 | #define IPLATCH_REFHOLD(ipl) { \ |
Josef 'Jeff' Sipek | 1a5e258 | 2014-08-08 10:50:14 -0400 | [diff] [blame] | 724 | atomic_inc_32(&(ipl)->ipl_refcnt); \ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 725 | ASSERT((ipl)->ipl_refcnt != 0); \ |
| 726 | } |
| 727 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 728 | #define IPLATCH_REFRELE(ipl) { \ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 729 | ASSERT((ipl)->ipl_refcnt != 0); \ |
| 730 | membar_exit(); \ |
Josef 'Jeff' Sipek | 1a5e258 | 2014-08-08 10:50:14 -0400 | [diff] [blame] | 731 | if (atomic_dec_32_nv(&(ipl)->ipl_refcnt) == 0) \ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 732 | iplatch_free(ipl); \ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 733 | } |
| 734 | |
| 735 | /* |
| 736 | * peer identity structure. |
| 737 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 738 | typedef struct conn_s conn_t; |
| 739 | |
| 740 | /* |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 741 | * This is used to match an inbound/outbound datagram with policy. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 742 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 743 | typedef struct ipsec_selector { |
| 744 | in6_addr_t ips_local_addr_v6; |
| 745 | in6_addr_t ips_remote_addr_v6; |
| 746 | uint16_t ips_local_port; |
| 747 | uint16_t ips_remote_port; |
| 748 | uint8_t ips_icmp_type; |
| 749 | uint8_t ips_icmp_code; |
| 750 | uint8_t ips_protocol; |
| 751 | uint8_t ips_isv4 : 1, |
| 752 | ips_is_icmp_inv_acq: 1; |
| 753 | } ipsec_selector_t; |
| 754 | |
| 755 | /* |
| 756 | * Note that we put v4 addresses in the *first* 32-bit word of the |
| 757 | * selector rather than the last to simplify the prefix match/mask code |
| 758 | * in spd.c |
| 759 | */ |
| 760 | #define ips_local_addr_v4 ips_local_addr_v6.s6_addr32[0] |
| 761 | #define ips_remote_addr_v4 ips_remote_addr_v6.s6_addr32[0] |
| 762 | |
| 763 | /* Values used in IP by IPSEC Code */ |
| 764 | #define IPSEC_OUTBOUND B_TRUE |
| 765 | #define IPSEC_INBOUND B_FALSE |
| 766 | |
| 767 | /* |
| 768 | * There are two variants in policy failures. The packet may come in |
| 769 | * secure when not needed (IPSEC_POLICY_???_NOT_NEEDED) or it may not |
| 770 | * have the desired level of protection (IPSEC_POLICY_MISMATCH). |
| 771 | */ |
| 772 | #define IPSEC_POLICY_NOT_NEEDED 0 |
| 773 | #define IPSEC_POLICY_MISMATCH 1 |
| 774 | #define IPSEC_POLICY_AUTH_NOT_NEEDED 2 |
| 775 | #define IPSEC_POLICY_ENCR_NOT_NEEDED 3 |
| 776 | #define IPSEC_POLICY_SE_NOT_NEEDED 4 |
| 777 | #define IPSEC_POLICY_MAX 5 /* Always max + 1. */ |
| 778 | |
| 779 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 780 | * Check with IPSEC inbound policy if |
| 781 | * |
| 782 | * 1) per-socket policy is present - indicated by conn_in_enforce_policy. |
| 783 | * 2) Or if we have not cached policy on the conn and the global policy is |
| 784 | * non-empty. |
| 785 | */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 786 | #define CONN_INBOUND_POLICY_PRESENT(connp, ipss) \ |
| 787 | ((connp)->conn_in_enforce_policy || \ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 788 | (!((connp)->conn_policy_cached) && \ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 789 | (ipss)->ipsec_inbound_v4_policy_present)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 790 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 791 | #define CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) \ |
| 792 | ((connp)->conn_in_enforce_policy || \ |
| 793 | (!(connp)->conn_policy_cached && \ |
| 794 | (ipss)->ipsec_inbound_v6_policy_present)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 795 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 796 | #define CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) \ |
| 797 | ((connp)->conn_out_enforce_policy || \ |
| 798 | (!((connp)->conn_policy_cached) && \ |
| 799 | (ipss)->ipsec_outbound_v4_policy_present)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 800 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 801 | #define CONN_OUTBOUND_POLICY_PRESENT_V6(connp, ipss) \ |
| 802 | ((connp)->conn_out_enforce_policy || \ |
| 803 | (!(connp)->conn_policy_cached && \ |
| 804 | (ipss)->ipsec_outbound_v6_policy_present)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 805 | |
| 806 | /* |
| 807 | * Information cached in IRE for upper layer protocol (ULP). |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 808 | */ |
| 809 | typedef struct iulp_s { |
| 810 | boolean_t iulp_set; /* Is any metric set? */ |
| 811 | uint32_t iulp_ssthresh; /* Slow start threshold (TCP). */ |
| 812 | clock_t iulp_rtt; /* Guestimate in millisecs. */ |
| 813 | clock_t iulp_rtt_sd; /* Cached value of RTT variance. */ |
| 814 | uint32_t iulp_spipe; /* Send pipe size. */ |
| 815 | uint32_t iulp_rpipe; /* Receive pipe size. */ |
| 816 | uint32_t iulp_rtomax; /* Max round trip timeout. */ |
| 817 | uint32_t iulp_sack; /* Use SACK option (TCP)? */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 818 | uint32_t iulp_mtu; /* Setable with routing sockets */ |
| 819 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 820 | uint32_t |
| 821 | iulp_tstamp_ok : 1, /* Use timestamp option (TCP)? */ |
| 822 | iulp_wscale_ok : 1, /* Use window scale option (TCP)? */ |
| 823 | iulp_ecn_ok : 1, /* Enable ECN (for TCP)? */ |
| 824 | iulp_pmtud_ok : 1, /* Enable PMTUd? */ |
| 825 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 826 | /* These three are passed out by ip_set_destination */ |
| 827 | iulp_localnet: 1, /* IRE_ONLINK */ |
| 828 | iulp_loopback: 1, /* IRE_LOOPBACK */ |
| 829 | iulp_local: 1, /* IRE_LOCAL */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 830 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 831 | iulp_not_used : 25; |
| 832 | } iulp_t; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 833 | |
| 834 | /* |
meem | 3344d75 | 2010-03-27 02:33:20 -0400 | [diff] [blame] | 835 | * The conn drain list structure (idl_t), protected by idl_lock. Each conn_t |
| 836 | * inserted in the list points back at this idl_t using conn_idl, and is |
| 837 | * chained by conn_drain_next and conn_drain_prev, which are also protected by |
| 838 | * idl_lock. When flow control is relieved, either ip_wsrv() (STREAMS) or |
| 839 | * ill_flow_enable() (non-STREAMS) will call conn_drain(). |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 840 | * |
| 841 | * The conn drain list, idl_t, itself is part of tx cookie list structure. |
| 842 | * A tx cookie list points to a blocked Tx ring and contains the list of |
| 843 | * all conn's that are blocked due to the flow-controlled Tx ring (via |
| 844 | * the idl drain list). Note that a link can have multiple Tx rings. The |
| 845 | * drain list will store the conn's blocked due to Tx ring being flow |
| 846 | * controlled. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 847 | */ |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 848 | |
| 849 | typedef uintptr_t ip_mac_tx_cookie_t; |
| 850 | typedef struct idl_s idl_t; |
| 851 | typedef struct idl_tx_list_s idl_tx_list_t; |
| 852 | |
| 853 | struct idl_tx_list_s { |
| 854 | ip_mac_tx_cookie_t txl_cookie; |
| 855 | kmutex_t txl_lock; /* Lock for this list */ |
| 856 | idl_t *txl_drain_list; |
| 857 | int txl_drain_index; |
| 858 | }; |
| 859 | |
| 860 | struct idl_s { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 861 | conn_t *idl_conn; /* Head of drain list */ |
| 862 | kmutex_t idl_lock; /* Lock for this list */ |
Venugopal Iyer | ae6aa22 | 2009-02-17 01:31:30 -0800 | [diff] [blame] | 863 | idl_tx_list_t *idl_itl; |
| 864 | }; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 865 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 866 | /* |
| 867 | * Interface route structure which holds the necessary information to recreate |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 868 | * routes that are tied to an interface i.e. have ire_ill set. |
| 869 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 870 | * These routes which were initially created via a routing socket or via the |
| 871 | * SIOCADDRT ioctl may be gateway routes (RTF_GATEWAY being set) or may be |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 872 | * traditional interface routes. When an ill comes back up after being |
| 873 | * down, this information will be used to recreate the routes. These |
| 874 | * are part of an mblk_t chain that hangs off of the ILL (ill_saved_ire_mp). |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 875 | */ |
| 876 | typedef struct ifrt_s { |
| 877 | ushort_t ifrt_type; /* Type of IRE */ |
| 878 | in6_addr_t ifrt_v6addr; /* Address IRE represents. */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 879 | in6_addr_t ifrt_v6gateway_addr; /* Gateway if IRE_OFFLINK */ |
| 880 | in6_addr_t ifrt_v6setsrc_addr; /* Src addr if RTF_SETSRC */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 881 | in6_addr_t ifrt_v6mask; /* Mask for matching IRE. */ |
| 882 | uint32_t ifrt_flags; /* flags related to route */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 883 | iulp_t ifrt_metrics; /* Routing socket metrics */ |
| 884 | zoneid_t ifrt_zoneid; /* zoneid for route */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 885 | } ifrt_t; |
| 886 | |
| 887 | #define ifrt_addr V4_PART_OF_V6(ifrt_v6addr) |
| 888 | #define ifrt_gateway_addr V4_PART_OF_V6(ifrt_v6gateway_addr) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 889 | #define ifrt_mask V4_PART_OF_V6(ifrt_v6mask) |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 890 | #define ifrt_setsrc_addr V4_PART_OF_V6(ifrt_v6setsrc_addr) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 891 | |
| 892 | /* Number of IP addresses that can be hosted on a physical interface */ |
| 893 | #define MAX_ADDRS_PER_IF 8192 |
| 894 | /* |
| 895 | * Number of Source addresses to be considered for source address |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 896 | * selection. Used by ipif_select_source_v4/v6. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 897 | */ |
| 898 | #define MAX_IPIF_SELECT_SOURCE 50 |
| 899 | |
| 900 | #ifdef IP_DEBUG |
| 901 | /* |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 902 | * Trace refholds and refreles for debugging. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 903 | */ |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 904 | #define TR_STACK_DEPTH 14 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 905 | typedef struct tr_buf_s { |
| 906 | int tr_depth; |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 907 | clock_t tr_time; |
| 908 | pc_t tr_stack[TR_STACK_DEPTH]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 909 | } tr_buf_t; |
| 910 | |
| 911 | typedef struct th_trace_s { |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 912 | int th_refcnt; |
| 913 | uint_t th_trace_lastref; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 914 | kthread_t *th_id; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 915 | #define TR_BUF_MAX 38 |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 916 | tr_buf_t th_trbuf[TR_BUF_MAX]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 917 | } th_trace_t; |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 918 | |
| 919 | typedef struct th_hash_s { |
| 920 | list_node_t thh_link; |
| 921 | mod_hash_t *thh_hash; |
| 922 | ip_stack_t *thh_ipst; |
| 923 | } th_hash_t; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 924 | #endif |
| 925 | |
| 926 | /* The following are ipif_state_flags */ |
| 927 | #define IPIF_CONDEMNED 0x1 /* The ipif is being removed */ |
| 928 | #define IPIF_CHANGING 0x2 /* A critcal ipif field is changing */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 929 | #define IPIF_SET_LINKLOCAL 0x10 /* transient flag during bringup */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 930 | |
| 931 | /* IP interface structure, one per local address */ |
| 932 | typedef struct ipif_s { |
| 933 | struct ipif_s *ipif_next; |
| 934 | struct ill_s *ipif_ill; /* Back pointer to our ill */ |
| 935 | int ipif_id; /* Logical unit number */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 936 | in6_addr_t ipif_v6lcl_addr; /* Local IP address for this if. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 937 | in6_addr_t ipif_v6subnet; /* Subnet prefix for this if. */ |
| 938 | in6_addr_t ipif_v6net_mask; /* Net mask for this interface. */ |
| 939 | in6_addr_t ipif_v6brd_addr; /* Broadcast addr for this interface. */ |
| 940 | in6_addr_t ipif_v6pp_dst_addr; /* Point-to-point dest address. */ |
| 941 | uint64_t ipif_flags; /* Interface flags. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 942 | uint_t ipif_ire_type; /* IRE_LOCAL or IRE_LOOPBACK */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 943 | |
| 944 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 945 | * The packet count in the ipif contain the sum of the |
| 946 | * packet counts in dead IRE_LOCAL/LOOPBACK for this ipif. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 947 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 948 | uint_t ipif_ib_pkt_count; /* Inbound packets for our dead IREs */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 949 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 950 | /* Exclusive bit fields, protected by ipsq_t */ |
| 951 | unsigned int |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 952 | ipif_was_up : 1, /* ipif was up before */ |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 953 | ipif_addr_ready : 1, /* DAD is done */ |
| 954 | ipif_was_dup : 1, /* DAD had failed */ |
Sowmini Varadhan | 3efde6d | 2009-05-07 20:59:19 -0400 | [diff] [blame] | 955 | ipif_added_nce : 1, /* nce added for local address */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 956 | |
| 957 | ipif_pad_to_31 : 28; |
| 958 | |
| 959 | ilm_t *ipif_allhosts_ilm; /* For all-nodes join */ |
| 960 | ilm_t *ipif_solmulti_ilm; /* For IPv6 solicited multicast join */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 961 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 962 | uint_t ipif_seqid; /* unique index across all ills */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 963 | uint_t ipif_state_flags; /* See IPIF_* flag defs above */ |
| 964 | uint_t ipif_refcnt; /* active consistent reader cnt */ |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 965 | |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 966 | zoneid_t ipif_zoneid; /* zone ID number */ |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 967 | timeout_id_t ipif_recovery_id; /* Timer for DAD recovery */ |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 968 | boolean_t ipif_trace_disable; /* True when alloc fails */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 969 | /* |
| 970 | * For an IPMP interface, ipif_bound_ill tracks the ill whose hardware |
| 971 | * information this ipif is associated with via ARP/NDP. We can use |
| 972 | * an ill pointer (rather than an index) because only ills that are |
| 973 | * part of a group will be pointed to, and an ill cannot disappear |
| 974 | * while it's in a group. |
| 975 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 976 | struct ill_s *ipif_bound_ill; |
| 977 | struct ipif_s *ipif_bound_next; /* bound ipif chain */ |
| 978 | boolean_t ipif_bound; /* B_TRUE if we successfully bound */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 979 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 980 | struct ire_s *ipif_ire_local; /* Our IRE_LOCAL or LOOPBACK */ |
Erik Nordmark | 0e0e37a | 2009-11-17 11:42:22 -0800 | [diff] [blame] | 981 | struct ire_s *ipif_ire_if; /* Our IRE_INTERFACE */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 982 | } ipif_t; |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 983 | |
| 984 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 985 | * The following table lists the protection levels of the various members |
| 986 | * of the ipif_t. The following notation is used. |
| 987 | * |
| 988 | * Write once - Written to only once at the time of bringing up |
| 989 | * the interface and can be safely read after the bringup without any lock. |
| 990 | * |
| 991 | * ipsq - Need to execute in the ipsq to perform the indicated access. |
| 992 | * |
| 993 | * ill_lock - Need to hold this mutex to perform the indicated access. |
| 994 | * |
| 995 | * ill_g_lock - Need to hold this rw lock as reader/writer for read access or |
| 996 | * write access respectively. |
| 997 | * |
| 998 | * down ill - Written to only when the ill is down (i.e all ipifs are down) |
| 999 | * up ill - Read only when the ill is up (i.e. at least 1 ipif is up) |
| 1000 | * |
| 1001 | * Table of ipif_t members and their protection |
| 1002 | * |
meem | 8df01f7 | 2007-05-30 16:02:35 -0700 | [diff] [blame] | 1003 | * ipif_next ipsq + ill_lock + ipsq OR ill_lock OR |
| 1004 | * ill_g_lock ill_g_lock |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1005 | * ipif_ill ipsq + down ipif write once |
| 1006 | * ipif_id ipsq + down ipif write once |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1007 | * ipif_v6lcl_addr ipsq + down ipif up ipif |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1008 | * ipif_v6subnet ipsq + down ipif up ipif |
| 1009 | * ipif_v6net_mask ipsq + down ipif up ipif |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1010 | * |
| 1011 | * ipif_v6brd_addr |
| 1012 | * ipif_v6pp_dst_addr |
| 1013 | * ipif_flags ill_lock ill_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1014 | * ipif_ire_type ipsq + down ill up ill |
| 1015 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1016 | * ipif_ib_pkt_count Approx |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1017 | * |
| 1018 | * bit fields ill_lock ill_lock |
| 1019 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1020 | * ipif_allhosts_ilm ipsq ipsq |
| 1021 | * ipif_solmulti_ilm ipsq ipsq |
| 1022 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1023 | * ipif_seqid ipsq Write once |
| 1024 | * |
| 1025 | * ipif_state_flags ill_lock ill_lock |
| 1026 | * ipif_refcnt ill_lock ill_lock |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1027 | * ipif_bound_ill ipsq + ipmp_lock ipsq OR ipmp_lock |
| 1028 | * ipif_bound_next ipsq ipsq |
| 1029 | * ipif_bound ipsq ipsq |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1030 | * |
| 1031 | * ipif_ire_local ipsq + ips_ill_g_lock ipsq OR ips_ill_g_lock |
Erik Nordmark | 0e0e37a | 2009-11-17 11:42:22 -0800 | [diff] [blame] | 1032 | * ipif_ire_if ipsq + ips_ill_g_lock ipsq OR ips_ill_g_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1033 | */ |
| 1034 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1035 | /* |
| 1036 | * Return values from ip_laddr_verify_{v4,v6} |
| 1037 | */ |
| 1038 | typedef enum { IPVL_UNICAST_UP, IPVL_UNICAST_DOWN, IPVL_MCAST, IPVL_BCAST, |
| 1039 | IPVL_BAD} ip_laddr_t; |
| 1040 | |
| 1041 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1042 | #define IP_TR_HASH(tid) ((((uintptr_t)tid) >> 6) & (IP_TR_HASH_MAX - 1)) |
| 1043 | |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 1044 | #ifdef DEBUG |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1045 | #define IPIF_TRACE_REF(ipif) ipif_trace_ref(ipif) |
| 1046 | #define ILL_TRACE_REF(ill) ill_trace_ref(ill) |
| 1047 | #define IPIF_UNTRACE_REF(ipif) ipif_untrace_ref(ipif) |
| 1048 | #define ILL_UNTRACE_REF(ill) ill_untrace_ref(ill) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1049 | #else |
| 1050 | #define IPIF_TRACE_REF(ipif) |
| 1051 | #define ILL_TRACE_REF(ill) |
| 1052 | #define IPIF_UNTRACE_REF(ipif) |
| 1053 | #define ILL_UNTRACE_REF(ill) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1054 | #endif |
| 1055 | |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 1056 | /* IPv4 compatibility macros */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1057 | #define ipif_lcl_addr V4_PART_OF_V6(ipif_v6lcl_addr) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1058 | #define ipif_subnet V4_PART_OF_V6(ipif_v6subnet) |
| 1059 | #define ipif_net_mask V4_PART_OF_V6(ipif_v6net_mask) |
| 1060 | #define ipif_brd_addr V4_PART_OF_V6(ipif_v6brd_addr) |
| 1061 | #define ipif_pp_dst_addr V4_PART_OF_V6(ipif_v6pp_dst_addr) |
| 1062 | |
| 1063 | /* Macros for easy backreferences to the ill. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1064 | #define ipif_isv6 ipif_ill->ill_isv6 |
| 1065 | |
| 1066 | #define SIOCLIFADDR_NDX 112 /* ndx of SIOCLIFADDR in the ndx ioctl table */ |
| 1067 | |
| 1068 | /* |
| 1069 | * mode value for ip_ioctl_finish for finishing an ioctl |
| 1070 | */ |
| 1071 | #define CONN_CLOSE 1 /* No mi_copy */ |
| 1072 | #define COPYOUT 2 /* do an mi_copyout if needed */ |
| 1073 | #define NO_COPYOUT 3 /* do an mi_copy_done */ |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1074 | #define IPI2MODE(ipi) ((ipi)->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1075 | |
| 1076 | /* |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1077 | * The IP-MT design revolves around the serialization objects ipsq_t (IPSQ) |
| 1078 | * and ipxop_t (exclusive operation or "xop"). Becoming "writer" on an IPSQ |
| 1079 | * ensures that no other threads can become "writer" on any IPSQs sharing that |
| 1080 | * IPSQ's xop until the writer thread is done. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1081 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1082 | * Each phyint points to one IPSQ that remains fixed over the phyint's life. |
| 1083 | * Each IPSQ points to one xop that can change over the IPSQ's life. If a |
| 1084 | * phyint is *not* in an IPMP group, then its IPSQ will refer to the IPSQ's |
| 1085 | * "own" xop (ipsq_ownxop). If a phyint *is* part of an IPMP group, then its |
| 1086 | * IPSQ will refer to the "group" xop, which is shorthand for the xop of the |
| 1087 | * IPSQ of the IPMP meta-interface's phyint. Thus, all phyints that are part |
| 1088 | * of the same IPMP group will have their IPSQ's point to the group xop, and |
| 1089 | * thus becoming "writer" on any phyint in the group will prevent any other |
| 1090 | * writer on any other phyint in the group. All IPSQs sharing the same xop |
| 1091 | * are chained together through ipsq_next (in the degenerate common case, |
| 1092 | * ipsq_next simply refers to itself). Note that the group xop is guaranteed |
| 1093 | * to exist at least as long as there are members in the group, since the IPMP |
| 1094 | * meta-interface can only be destroyed if the group is empty. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1095 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1096 | * Incoming exclusive operation requests are enqueued on the IPSQ they arrived |
| 1097 | * on rather than the xop. This makes switching xop's (as would happen when a |
| 1098 | * phyint leaves an IPMP group) simple, because after the phyint leaves the |
| 1099 | * group, any operations enqueued on its IPSQ can be safely processed with |
| 1100 | * respect to its new xop, and any operations enqueued on the IPSQs of its |
| 1101 | * former group can be processed with respect to their existing group xop. |
| 1102 | * Even so, switching xops is a subtle dance; see ipsq_dq() for details. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1103 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1104 | * An IPSQ's "own" xop is embedded within the IPSQ itself since they have have |
| 1105 | * identical lifetimes, and because doing so simplifies pointer management. |
| 1106 | * While each phyint and IPSQ point to each other, it is not possible to free |
| 1107 | * the IPSQ when the phyint is freed, since we may still *inside* the IPSQ |
| 1108 | * when the phyint is being freed. Thus, ipsq_phyint is set to NULL when the |
| 1109 | * phyint is freed, and the IPSQ free is later done in ipsq_exit(). |
| 1110 | * |
| 1111 | * ipsq_t synchronization: read write |
| 1112 | * |
| 1113 | * ipsq_xopq_mphead ipx_lock ipx_lock |
| 1114 | * ipsq_xopq_mptail ipx_lock ipx_lock |
| 1115 | * ipsq_xop_switch_mp ipsq_lock ipsq_lock |
| 1116 | * ipsq_phyint write once write once |
| 1117 | * ipsq_next RW_READER ill_g_lock RW_WRITER ill_g_lock |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1118 | * ipsq_xop ipsq_lock or ipsq ipsq_lock + ipsq |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1119 | * ipsq_swxop ipsq ipsq |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1120 | * ipsq_ownxop see ipxop_t see ipxop_t |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1121 | * ipsq_ipst write once write once |
| 1122 | * |
| 1123 | * ipxop_t synchronization: read write |
| 1124 | * |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1125 | * ipx_writer ipx_lock ipx_lock |
| 1126 | * ipx_xop_queued ipx_lock ipx_lock |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1127 | * ipx_mphead ipx_lock ipx_lock |
| 1128 | * ipx_mptail ipx_lock ipx_lock |
| 1129 | * ipx_ipsq write once write once |
| 1130 | * ips_ipsq_queued ipx_lock ipx_lock |
| 1131 | * ipx_waitfor ipsq or ipx_lock ipsq + ipx_lock |
| 1132 | * ipx_reentry_cnt ipsq or ipx_lock ipsq + ipx_lock |
| 1133 | * ipx_current_done ipsq ipsq |
| 1134 | * ipx_current_ioctl ipsq ipsq |
| 1135 | * ipx_current_ipif ipsq or ipx_lock ipsq + ipx_lock |
| 1136 | * ipx_pending_ipif ipsq or ipx_lock ipsq + ipx_lock |
| 1137 | * ipx_pending_mp ipsq or ipx_lock ipsq + ipx_lock |
| 1138 | * ipx_forced ipsq ipsq |
| 1139 | * ipx_depth ipsq ipsq |
| 1140 | * ipx_stack ipsq ipsq |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1141 | */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1142 | typedef struct ipxop_s { |
| 1143 | kmutex_t ipx_lock; /* see above */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1144 | kthread_t *ipx_writer; /* current owner */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1145 | mblk_t *ipx_mphead; /* messages tied to this op */ |
| 1146 | mblk_t *ipx_mptail; |
| 1147 | struct ipsq_s *ipx_ipsq; /* associated ipsq */ |
| 1148 | boolean_t ipx_ipsq_queued; /* ipsq using xop has queued op */ |
| 1149 | int ipx_waitfor; /* waiting; values encoded below */ |
| 1150 | int ipx_reentry_cnt; |
| 1151 | boolean_t ipx_current_done; /* is the current operation done? */ |
| 1152 | int ipx_current_ioctl; /* current ioctl, or 0 if no ioctl */ |
| 1153 | ipif_t *ipx_current_ipif; /* ipif for current op */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1154 | ipif_t *ipx_pending_ipif; /* ipif for ipx_pending_mp */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1155 | mblk_t *ipx_pending_mp; /* current ioctl mp while waiting */ |
| 1156 | boolean_t ipx_forced; /* debugging aid */ |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 1157 | #ifdef DEBUG |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1158 | int ipx_depth; /* debugging aid */ |
| 1159 | #define IPX_STACK_DEPTH 15 |
| 1160 | pc_t ipx_stack[IPX_STACK_DEPTH]; /* debugging aid */ |
carlsonj | 6a8288c | 2007-09-11 04:26:06 -0700 | [diff] [blame] | 1161 | #endif |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1162 | } ipxop_t; |
| 1163 | |
| 1164 | typedef struct ipsq_s { |
| 1165 | kmutex_t ipsq_lock; /* see above */ |
| 1166 | mblk_t *ipsq_switch_mp; /* op to handle right after switch */ |
| 1167 | mblk_t *ipsq_xopq_mphead; /* list of excl ops (mostly ioctls) */ |
| 1168 | mblk_t *ipsq_xopq_mptail; |
| 1169 | struct phyint *ipsq_phyint; /* associated phyint */ |
| 1170 | struct ipsq_s *ipsq_next; /* next ipsq sharing ipsq_xop */ |
| 1171 | struct ipxop_s *ipsq_xop; /* current xop synchronization info */ |
| 1172 | struct ipxop_s *ipsq_swxop; /* switch xop to on ipsq_exit() */ |
| 1173 | struct ipxop_s ipsq_ownxop; /* our own xop (may not be in-use) */ |
| 1174 | ip_stack_t *ipsq_ipst; /* does not have a netstack_hold */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1175 | } ipsq_t; |
| 1176 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1177 | /* |
| 1178 | * ipx_waitfor values: |
| 1179 | */ |
| 1180 | enum { |
| 1181 | IPIF_DOWN = 1, /* ipif_down() waiting for refcnts to drop */ |
| 1182 | ILL_DOWN, /* ill_down() waiting for refcnts to drop */ |
| 1183 | IPIF_FREE, /* ipif_free() waiting for refcnts to drop */ |
| 1184 | ILL_FREE /* ill unplumb waiting for refcnts to drop */ |
| 1185 | }; |
| 1186 | |
| 1187 | /* Operation types for ipsq_try_enter() */ |
| 1188 | #define CUR_OP 0 /* request writer within current operation */ |
| 1189 | #define NEW_OP 1 /* request writer for a new operation */ |
| 1190 | #define SWITCH_OP 2 /* request writer once IPSQ XOP switches */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1191 | |
| 1192 | /* |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1193 | * Kstats tracked on each IPMP meta-interface. Order here must match |
| 1194 | * ipmp_kstats[] in ip/ipmp.c. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1195 | */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1196 | enum { |
| 1197 | IPMP_KSTAT_OBYTES, IPMP_KSTAT_OBYTES64, IPMP_KSTAT_RBYTES, |
| 1198 | IPMP_KSTAT_RBYTES64, IPMP_KSTAT_OPACKETS, IPMP_KSTAT_OPACKETS64, |
| 1199 | IPMP_KSTAT_OERRORS, IPMP_KSTAT_IPACKETS, IPMP_KSTAT_IPACKETS64, |
| 1200 | IPMP_KSTAT_IERRORS, IPMP_KSTAT_MULTIRCV, IPMP_KSTAT_MULTIXMT, |
| 1201 | IPMP_KSTAT_BRDCSTRCV, IPMP_KSTAT_BRDCSTXMT, IPMP_KSTAT_LINK_UP, |
| 1202 | IPMP_KSTAT_MAX /* keep last */ |
| 1203 | }; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1204 | |
| 1205 | /* |
| 1206 | * phyint represents state that is common to both IPv4 and IPv6 interfaces. |
| 1207 | * There is a separate ill_t representing IPv4 and IPv6 which has a |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 1208 | * backpointer to the phyint structure for accessing common state. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1209 | */ |
| 1210 | typedef struct phyint { |
| 1211 | struct ill_s *phyint_illv4; |
| 1212 | struct ill_s *phyint_illv6; |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1213 | uint_t phyint_ifindex; /* SIOCSLIFINDEX */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1214 | uint64_t phyint_flags; |
| 1215 | avl_node_t phyint_avl_by_index; /* avl tree by index */ |
| 1216 | avl_node_t phyint_avl_by_name; /* avl tree by name */ |
| 1217 | kmutex_t phyint_lock; |
| 1218 | struct ipsq_s *phyint_ipsq; /* back pointer to ipsq */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1219 | struct ipmp_grp_s *phyint_grp; /* associated IPMP group */ |
| 1220 | char phyint_name[LIFNAMSIZ]; /* physical interface name */ |
| 1221 | uint64_t phyint_kstats0[IPMP_KSTAT_MAX]; /* baseline kstats */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1222 | } phyint_t; |
| 1223 | |
| 1224 | #define CACHE_ALIGN_SIZE 64 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1225 | #define CACHE_ALIGN(align_struct) P2ROUNDUP(sizeof (struct align_struct),\ |
| 1226 | CACHE_ALIGN_SIZE) |
| 1227 | struct _phyint_list_s_ { |
| 1228 | avl_tree_t phyint_list_avl_by_index; /* avl tree by index */ |
| 1229 | avl_tree_t phyint_list_avl_by_name; /* avl tree by name */ |
| 1230 | }; |
| 1231 | |
| 1232 | typedef union phyint_list_u { |
| 1233 | struct _phyint_list_s_ phyint_list_s; |
| 1234 | char phyint_list_filler[CACHE_ALIGN(_phyint_list_s_)]; |
| 1235 | } phyint_list_t; |
| 1236 | |
| 1237 | #define phyint_list_avl_by_index phyint_list_s.phyint_list_avl_by_index |
| 1238 | #define phyint_list_avl_by_name phyint_list_s.phyint_list_avl_by_name |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1239 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1240 | /* |
| 1241 | * Fragmentation hash bucket |
| 1242 | */ |
| 1243 | typedef struct ipfb_s { |
| 1244 | struct ipf_s *ipfb_ipf; /* List of ... */ |
| 1245 | size_t ipfb_count; /* Count of bytes used by frag(s) */ |
| 1246 | kmutex_t ipfb_lock; /* Protect all ipf in list */ |
| 1247 | uint_t ipfb_frag_pkts; /* num of distinct fragmented pkts */ |
| 1248 | } ipfb_t; |
| 1249 | |
| 1250 | /* |
| 1251 | * IRE bucket structure. Usually there is an array of such structures, |
| 1252 | * each pointing to a linked list of ires. irb_refcnt counts the number |
| 1253 | * of walkers of a given hash bucket. Usually the reference count is |
| 1254 | * bumped up if the walker wants no IRES to be DELETED while walking the |
| 1255 | * list. Bumping up does not PREVENT ADDITION. This allows walking a given |
| 1256 | * hash bucket without stumbling up on a free pointer. |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1257 | * |
| 1258 | * irb_t structures in ip_ftable are dynamically allocated and freed. |
apersson | 3173664 | 2006-12-19 17:33:00 -0800 | [diff] [blame] | 1259 | * In order to identify the irb_t structures that can be safely kmem_free'd |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1260 | * we need to ensure that |
| 1261 | * - the irb_refcnt is quiescent, indicating no other walkers, |
| 1262 | * - no other threads or ire's are holding references to the irb, |
| 1263 | * i.e., irb_nire == 0, |
| 1264 | * - there are no active ire's in the bucket, i.e., irb_ire_cnt == 0 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1265 | */ |
| 1266 | typedef struct irb { |
| 1267 | struct ire_s *irb_ire; /* First ire in this bucket */ |
| 1268 | /* Should be first in this struct */ |
| 1269 | krwlock_t irb_lock; /* Protect this bucket */ |
| 1270 | uint_t irb_refcnt; /* Protected by irb_lock */ |
| 1271 | uchar_t irb_marks; /* CONDEMNED ires in this bucket ? */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1272 | #define IRB_MARK_CONDEMNED 0x0001 /* Contains some IRE_IS_CONDEMNED */ |
| 1273 | #define IRB_MARK_DYNAMIC 0x0002 /* Dynamically allocated */ |
| 1274 | /* Once IPv6 uses radix then IRB_MARK_DYNAMIC will be always be set */ |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1275 | uint_t irb_ire_cnt; /* Num of active IRE in this bucket */ |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1276 | int irb_nire; /* Num of ftable ire's that ref irb */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1277 | ip_stack_t *irb_ipst; /* Does not have a netstack_hold */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1278 | } irb_t; |
| 1279 | |
Ravi Chandra Nallan | 7f125a5 | 2010-07-13 18:17:30 +0530 | [diff] [blame] | 1280 | /* |
| 1281 | * This is the structure used to store the multicast physical addresses |
| 1282 | * that an interface has joined. |
| 1283 | * The refcnt keeps track of the number of multicast IP addresses mapping |
| 1284 | * to a physical multicast address. |
| 1285 | */ |
| 1286 | typedef struct multiphysaddr_s { |
| 1287 | struct multiphysaddr_s *mpa_next; |
| 1288 | char mpa_addr[IP_MAX_HW_LEN]; |
| 1289 | int mpa_refcnt; |
| 1290 | } multiphysaddr_t; |
| 1291 | |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1292 | #define IRB2RT(irb) (rt_t *)((caddr_t)(irb) - offsetof(rt_t, rt_irb)) |
| 1293 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1294 | /* Forward declarations */ |
| 1295 | struct dce_s; |
| 1296 | typedef struct dce_s dce_t; |
| 1297 | struct ire_s; |
| 1298 | typedef struct ire_s ire_t; |
| 1299 | struct ncec_s; |
| 1300 | typedef struct ncec_s ncec_t; |
| 1301 | struct nce_s; |
| 1302 | typedef struct nce_s nce_t; |
| 1303 | struct ip_recv_attr_s; |
| 1304 | typedef struct ip_recv_attr_s ip_recv_attr_t; |
| 1305 | struct ip_xmit_attr_s; |
| 1306 | typedef struct ip_xmit_attr_s ip_xmit_attr_t; |
| 1307 | |
| 1308 | struct tsol_ire_gw_secattr_s; |
| 1309 | typedef struct tsol_ire_gw_secattr_s tsol_ire_gw_secattr_t; |
| 1310 | |
| 1311 | /* |
| 1312 | * This is a structure for a one-element route cache that is passed |
| 1313 | * by reference between ip_input and ill_inputfn. |
| 1314 | */ |
| 1315 | typedef struct { |
| 1316 | ire_t *rtc_ire; |
| 1317 | ipaddr_t rtc_ipaddr; |
| 1318 | in6_addr_t rtc_ip6addr; |
| 1319 | } rtc_t; |
| 1320 | |
| 1321 | /* |
| 1322 | * Note: Temporarily use 64 bits, and will probably go back to 32 bits after |
| 1323 | * more cleanup work is done. |
| 1324 | */ |
| 1325 | typedef uint64_t iaflags_t; |
| 1326 | |
| 1327 | /* The ill input function pointer type */ |
| 1328 | typedef void (*pfillinput_t)(mblk_t *, void *, void *, ip_recv_attr_t *, |
| 1329 | rtc_t *); |
| 1330 | |
| 1331 | /* The ire receive function pointer type */ |
| 1332 | typedef void (*pfirerecv_t)(ire_t *, mblk_t *, void *, ip_recv_attr_t *); |
| 1333 | |
| 1334 | /* The ire send and postfrag function pointer types */ |
| 1335 | typedef int (*pfiresend_t)(ire_t *, mblk_t *, void *, |
| 1336 | ip_xmit_attr_t *, uint32_t *); |
| 1337 | typedef int (*pfirepostfrag_t)(mblk_t *, nce_t *, iaflags_t, uint_t, uint32_t, |
| 1338 | zoneid_t, zoneid_t, uintptr_t *); |
| 1339 | |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1340 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1341 | #define IP_V4_G_HEAD 0 |
| 1342 | #define IP_V6_G_HEAD 1 |
| 1343 | |
| 1344 | #define MAX_G_HEADS 2 |
| 1345 | |
| 1346 | /* |
| 1347 | * unpadded ill_if structure |
| 1348 | */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1349 | struct _ill_if_s_ { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1350 | union ill_if_u *illif_next; |
| 1351 | union ill_if_u *illif_prev; |
| 1352 | avl_tree_t illif_avl_by_ppa; /* AVL tree sorted on ppa */ |
| 1353 | vmem_t *illif_ppa_arena; /* ppa index space */ |
| 1354 | uint16_t illif_mcast_v1; /* hints for */ |
| 1355 | uint16_t illif_mcast_v2; /* [igmp|mld]_slowtimo */ |
| 1356 | int illif_name_len; /* name length */ |
| 1357 | char illif_name[LIFNAMSIZ]; /* name of interface type */ |
| 1358 | }; |
| 1359 | |
| 1360 | /* cache aligned ill_if structure */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1361 | typedef union ill_if_u { |
| 1362 | struct _ill_if_s_ ill_if_s; |
| 1363 | char illif_filler[CACHE_ALIGN(_ill_if_s_)]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1364 | } ill_if_t; |
| 1365 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1366 | #define illif_next ill_if_s.illif_next |
| 1367 | #define illif_prev ill_if_s.illif_prev |
| 1368 | #define illif_avl_by_ppa ill_if_s.illif_avl_by_ppa |
| 1369 | #define illif_ppa_arena ill_if_s.illif_ppa_arena |
| 1370 | #define illif_mcast_v1 ill_if_s.illif_mcast_v1 |
| 1371 | #define illif_mcast_v2 ill_if_s.illif_mcast_v2 |
| 1372 | #define illif_name ill_if_s.illif_name |
| 1373 | #define illif_name_len ill_if_s.illif_name_len |
| 1374 | |
| 1375 | typedef struct ill_walk_context_s { |
| 1376 | int ctx_current_list; /* current list being searched */ |
| 1377 | int ctx_last_list; /* last list to search */ |
| 1378 | } ill_walk_context_t; |
| 1379 | |
| 1380 | /* |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1381 | * ill_g_heads structure, one for IPV4 and one for IPV6 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1382 | */ |
| 1383 | struct _ill_g_head_s_ { |
| 1384 | ill_if_t *ill_g_list_head; |
| 1385 | ill_if_t *ill_g_list_tail; |
| 1386 | }; |
| 1387 | |
| 1388 | typedef union ill_g_head_u { |
| 1389 | struct _ill_g_head_s_ ill_g_head_s; |
| 1390 | char ill_g_head_filler[CACHE_ALIGN(_ill_g_head_s_)]; |
| 1391 | } ill_g_head_t; |
| 1392 | |
| 1393 | #define ill_g_list_head ill_g_head_s.ill_g_list_head |
| 1394 | #define ill_g_list_tail ill_g_head_s.ill_g_list_tail |
| 1395 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1396 | #define IP_V4_ILL_G_LIST(ipst) \ |
| 1397 | (ipst)->ips_ill_g_heads[IP_V4_G_HEAD].ill_g_list_head |
| 1398 | #define IP_V6_ILL_G_LIST(ipst) \ |
| 1399 | (ipst)->ips_ill_g_heads[IP_V6_G_HEAD].ill_g_list_head |
| 1400 | #define IP_VX_ILL_G_LIST(i, ipst) \ |
| 1401 | (ipst)->ips_ill_g_heads[i].ill_g_list_head |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1402 | |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1403 | #define ILL_START_WALK_V4(ctx_ptr, ipst) \ |
| 1404 | ill_first(IP_V4_G_HEAD, IP_V4_G_HEAD, ctx_ptr, ipst) |
| 1405 | #define ILL_START_WALK_V6(ctx_ptr, ipst) \ |
| 1406 | ill_first(IP_V6_G_HEAD, IP_V6_G_HEAD, ctx_ptr, ipst) |
| 1407 | #define ILL_START_WALK_ALL(ctx_ptr, ipst) \ |
| 1408 | ill_first(MAX_G_HEADS, MAX_G_HEADS, ctx_ptr, ipst) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1409 | |
| 1410 | /* |
| 1411 | * Capabilities, possible flags for ill_capabilities. |
| 1412 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1413 | #define ILL_CAPAB_LSO 0x04 /* Large Send Offload */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1414 | #define ILL_CAPAB_HCKSUM 0x08 /* Hardware checksumming */ |
| 1415 | #define ILL_CAPAB_ZEROCOPY 0x10 /* Zero-copy */ |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1416 | #define ILL_CAPAB_DLD 0x20 /* DLD capabilities */ |
| 1417 | #define ILL_CAPAB_DLD_POLL 0x40 /* Polling */ |
| 1418 | #define ILL_CAPAB_DLD_DIRECT 0x80 /* Direct function call */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1419 | |
| 1420 | /* |
| 1421 | * Per-ill Hardware Checksumming capbilities. |
| 1422 | */ |
| 1423 | typedef struct ill_hcksum_capab_s ill_hcksum_capab_t; |
| 1424 | |
| 1425 | /* |
| 1426 | * Per-ill Zero-copy capabilities. |
| 1427 | */ |
| 1428 | typedef struct ill_zerocopy_capab_s ill_zerocopy_capab_t; |
| 1429 | |
| 1430 | /* |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1431 | * DLD capbilities. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1432 | */ |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1433 | typedef struct ill_dld_capab_s ill_dld_capab_t; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1434 | |
| 1435 | /* |
| 1436 | * Per-ill polling resource map. |
| 1437 | */ |
| 1438 | typedef struct ill_rx_ring ill_rx_ring_t; |
| 1439 | |
yl150051 | 8347601 | 2006-11-13 20:44:19 -0800 | [diff] [blame] | 1440 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1441 | * Per-ill Large Send Offload capabilities. |
yl150051 | 8347601 | 2006-11-13 20:44:19 -0800 | [diff] [blame] | 1442 | */ |
| 1443 | typedef struct ill_lso_capab_s ill_lso_capab_t; |
| 1444 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1445 | /* The following are ill_state_flags */ |
| 1446 | #define ILL_LL_SUBNET_PENDING 0x01 /* Waiting for DL_INFO_ACK from drv */ |
| 1447 | #define ILL_CONDEMNED 0x02 /* No more new ref's to the ILL */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1448 | #define ILL_DL_UNBIND_IN_PROGRESS 0x04 /* UNBIND_REQ is sent */ |
Sowmini Varadhan | 2ea22bf | 2010-06-07 10:10:19 -0400 | [diff] [blame] | 1449 | /* |
| 1450 | * ILL_DOWN_IN_PROGRESS is set to ensure the following: |
| 1451 | * - no packets are sent to the driver after the DL_UNBIND_REQ is sent, |
| 1452 | * - no longstanding references will be acquired on objects that are being |
| 1453 | * brought down. |
| 1454 | */ |
| 1455 | #define ILL_DOWN_IN_PROGRESS 0x08 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1456 | |
| 1457 | /* Is this an ILL whose source address is used by other ILL's ? */ |
| 1458 | #define IS_USESRC_ILL(ill) \ |
| 1459 | (((ill)->ill_usesrc_ifindex == 0) && \ |
kcpoon | 5dddb8b | 2006-01-06 00:24:46 -0800 | [diff] [blame] | 1460 | ((ill)->ill_usesrc_grp_next != NULL)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1461 | |
| 1462 | /* Is this a client/consumer of the usesrc ILL ? */ |
| 1463 | #define IS_USESRC_CLI_ILL(ill) \ |
| 1464 | (((ill)->ill_usesrc_ifindex != 0) && \ |
kcpoon | 5dddb8b | 2006-01-06 00:24:46 -0800 | [diff] [blame] | 1465 | ((ill)->ill_usesrc_grp_next != NULL)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1466 | |
| 1467 | /* Is this an virtual network interface (vni) ILL ? */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1468 | #define IS_VNI(ill) \ |
sangeeta | c793af9 | 2006-08-11 05:59:29 -0700 | [diff] [blame] | 1469 | (((ill)->ill_phyint->phyint_flags & (PHYI_LOOPBACK|PHYI_VIRTUAL)) == \ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1470 | PHYI_VIRTUAL) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1471 | |
kcpoon | 48de1bd | 2007-06-13 04:53:06 -0700 | [diff] [blame] | 1472 | /* Is this a loopback ILL? */ |
| 1473 | #define IS_LOOPBACK(ill) \ |
| 1474 | ((ill)->ill_phyint->phyint_flags & PHYI_LOOPBACK) |
| 1475 | |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1476 | /* Is this an IPMP meta-interface ILL? */ |
| 1477 | #define IS_IPMP(ill) \ |
| 1478 | ((ill)->ill_phyint->phyint_flags & PHYI_IPMP) |
| 1479 | |
| 1480 | /* Is this ILL under an IPMP meta-interface? (aka "in a group?") */ |
| 1481 | #define IS_UNDER_IPMP(ill) \ |
| 1482 | ((ill)->ill_grp != NULL && !IS_IPMP(ill)) |
| 1483 | |
| 1484 | /* Is ill1 in the same illgrp as ill2? */ |
| 1485 | #define IS_IN_SAME_ILLGRP(ill1, ill2) \ |
| 1486 | ((ill1)->ill_grp != NULL && ((ill1)->ill_grp == (ill2)->ill_grp)) |
| 1487 | |
| 1488 | /* Is ill1 on the same LAN as ill2? */ |
| 1489 | #define IS_ON_SAME_LAN(ill1, ill2) \ |
| 1490 | ((ill1) == (ill2) || IS_IN_SAME_ILLGRP(ill1, ill2)) |
| 1491 | |
| 1492 | #define ILL_OTHER(ill) \ |
| 1493 | ((ill)->ill_isv6 ? (ill)->ill_phyint->phyint_illv4 : \ |
| 1494 | (ill)->ill_phyint->phyint_illv6) |
| 1495 | |
| 1496 | /* |
| 1497 | * IPMP group ILL state structure -- up to two per IPMP group (V4 and V6). |
| 1498 | * Created when the V4 and/or V6 IPMP meta-interface is I_PLINK'd. It is |
| 1499 | * guaranteed to persist while there are interfaces of that type in the group. |
| 1500 | * In general, most fields are accessed outside of the IPSQ (e.g., in the |
| 1501 | * datapath), and thus use locks in addition to the IPSQ for protection. |
| 1502 | * |
| 1503 | * synchronization: read write |
| 1504 | * |
| 1505 | * ig_if ipsq or ill_g_lock ipsq and ill_g_lock |
| 1506 | * ig_actif ipsq or ipmp_lock ipsq and ipmp_lock |
| 1507 | * ig_nactif ipsq or ipmp_lock ipsq and ipmp_lock |
| 1508 | * ig_next_ill ipsq or ipmp_lock ipsq and ipmp_lock |
| 1509 | * ig_ipmp_ill write once write once |
| 1510 | * ig_cast_ill ipsq or ipmp_lock ipsq and ipmp_lock |
| 1511 | * ig_arpent ipsq ipsq |
| 1512 | * ig_mtu ipsq ipsq |
Erik Nordmark | 1eee170 | 2010-08-16 15:30:54 -0700 | [diff] [blame] | 1513 | * ig_mc_mtu ipsq ipsq |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1514 | */ |
| 1515 | typedef struct ipmp_illgrp_s { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1516 | list_t ig_if; /* list of all interfaces */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1517 | list_t ig_actif; /* list of active interfaces */ |
| 1518 | uint_t ig_nactif; /* number of active interfaces */ |
| 1519 | struct ill_s *ig_next_ill; /* next active interface to use */ |
| 1520 | struct ill_s *ig_ipmp_ill; /* backpointer to IPMP meta-interface */ |
| 1521 | struct ill_s *ig_cast_ill; /* nominated ill for multi/broadcast */ |
| 1522 | list_t ig_arpent; /* list of ARP entries */ |
Erik Nordmark | 1eee170 | 2010-08-16 15:30:54 -0700 | [diff] [blame] | 1523 | uint_t ig_mtu; /* ig_ipmp_ill->ill_mtu */ |
| 1524 | uint_t ig_mc_mtu; /* ig_ipmp_ill->ill_mc_mtu */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1525 | } ipmp_illgrp_t; |
| 1526 | |
| 1527 | /* |
| 1528 | * IPMP group state structure -- one per IPMP group. Created when the |
| 1529 | * IPMP meta-interface is plumbed; it is guaranteed to persist while there |
| 1530 | * are interfaces in it. |
| 1531 | * |
| 1532 | * ipmp_grp_t synchronization: read write |
| 1533 | * |
| 1534 | * gr_name ipmp_lock ipmp_lock |
| 1535 | * gr_ifname write once write once |
| 1536 | * gr_mactype ipmp_lock ipmp_lock |
| 1537 | * gr_phyint write once write once |
| 1538 | * gr_nif ipmp_lock ipmp_lock |
| 1539 | * gr_nactif ipsq ipsq |
| 1540 | * gr_v4 ipmp_lock ipmp_lock |
| 1541 | * gr_v6 ipmp_lock ipmp_lock |
| 1542 | * gr_nv4 ipmp_lock ipmp_lock |
| 1543 | * gr_nv6 ipmp_lock ipmp_lock |
| 1544 | * gr_pendv4 ipmp_lock ipmp_lock |
| 1545 | * gr_pendv6 ipmp_lock ipmp_lock |
| 1546 | * gr_linkdownmp ipsq ipsq |
| 1547 | * gr_ksp ipmp_lock ipmp_lock |
| 1548 | * gr_kstats0 atomic atomic |
| 1549 | */ |
| 1550 | typedef struct ipmp_grp_s { |
| 1551 | char gr_name[LIFGRNAMSIZ]; /* group name */ |
| 1552 | char gr_ifname[LIFNAMSIZ]; /* interface name */ |
| 1553 | t_uscalar_t gr_mactype; /* DLPI mactype of group */ |
| 1554 | phyint_t *gr_phyint; /* IPMP group phyint */ |
| 1555 | uint_t gr_nif; /* number of interfaces in group */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1556 | uint_t gr_nactif; /* number of active interfaces */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1557 | ipmp_illgrp_t *gr_v4; /* V4 group information */ |
| 1558 | ipmp_illgrp_t *gr_v6; /* V6 group information */ |
| 1559 | uint_t gr_nv4; /* number of ills in V4 group */ |
| 1560 | uint_t gr_nv6; /* number of ills in V6 group */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1561 | uint_t gr_pendv4; /* number of pending ills in V4 group */ |
| 1562 | uint_t gr_pendv6; /* number of pending ills in V6 group */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1563 | mblk_t *gr_linkdownmp; /* message used to bring link down */ |
| 1564 | kstat_t *gr_ksp; /* group kstat pointer */ |
| 1565 | uint64_t gr_kstats0[IPMP_KSTAT_MAX]; /* baseline group kstats */ |
| 1566 | } ipmp_grp_t; |
| 1567 | |
| 1568 | /* |
| 1569 | * IPMP ARP entry -- one per SIOCS*ARP entry tied to the group. Used to keep |
| 1570 | * ARP up-to-date as the active set of interfaces in the group changes. |
| 1571 | */ |
| 1572 | typedef struct ipmp_arpent_s { |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1573 | ipaddr_t ia_ipaddr; /* IP address for this entry */ |
| 1574 | boolean_t ia_proxyarp; /* proxy ARP entry? */ |
| 1575 | boolean_t ia_notified; /* ARP notified about this entry? */ |
| 1576 | list_node_t ia_node; /* next ARP entry in list */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1577 | uint16_t ia_flags; /* nce_flags for the address */ |
| 1578 | size_t ia_lladdr_len; |
| 1579 | uchar_t *ia_lladdr; |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1580 | } ipmp_arpent_t; |
| 1581 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1582 | struct arl_s; |
| 1583 | |
| 1584 | /* |
| 1585 | * Per-ill capabilities. |
| 1586 | */ |
| 1587 | struct ill_hcksum_capab_s { |
| 1588 | uint_t ill_hcksum_version; /* interface version */ |
| 1589 | uint_t ill_hcksum_txflags; /* capabilities on transmit */ |
| 1590 | }; |
| 1591 | |
| 1592 | struct ill_zerocopy_capab_s { |
| 1593 | uint_t ill_zerocopy_version; /* interface version */ |
| 1594 | uint_t ill_zerocopy_flags; /* capabilities */ |
| 1595 | }; |
| 1596 | |
| 1597 | struct ill_lso_capab_s { |
| 1598 | uint_t ill_lso_flags; /* capabilities */ |
Robert Mustacchi | 62366fb | 2020-04-01 15:30:20 +0000 | [diff] [blame] | 1599 | uint_t ill_lso_max_tcpv4; /* maximum size of payload */ |
| 1600 | uint_t ill_lso_max_tcpv6; /* maximum size of payload */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1601 | }; |
| 1602 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1603 | /* |
| 1604 | * IP Lower level Structure. |
| 1605 | * Instance data structure in ip_open when there is a device below us. |
| 1606 | */ |
| 1607 | typedef struct ill_s { |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1608 | pfillinput_t ill_inputfn; /* Fast input function selector */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1609 | ill_if_t *ill_ifptr; /* pointer to interface type */ |
| 1610 | queue_t *ill_rq; /* Read queue. */ |
| 1611 | queue_t *ill_wq; /* Write queue. */ |
| 1612 | |
| 1613 | int ill_error; /* Error value sent up by device. */ |
| 1614 | |
| 1615 | ipif_t *ill_ipif; /* Interface chain for this ILL. */ |
| 1616 | |
| 1617 | uint_t ill_ipif_up_count; /* Number of IPIFs currently up. */ |
| 1618 | uint_t ill_max_frag; /* Max IDU from DLPI. */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1619 | uint_t ill_current_frag; /* Current IDU from DLPI. */ |
| 1620 | uint_t ill_mtu; /* User-specified MTU; SIOCSLIFMTU */ |
Erik Nordmark | 1eee170 | 2010-08-16 15:30:54 -0700 | [diff] [blame] | 1621 | uint_t ill_mc_mtu; /* MTU for multi/broadcast */ |
Girish Moodalbail | 6e91bba | 2010-03-26 17:53:11 -0400 | [diff] [blame] | 1622 | uint_t ill_metric; /* BSD if metric, for compatibility. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1623 | char *ill_name; /* Our name. */ |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 1624 | uint_t ill_ipif_dup_count; /* Number of duplicate addresses. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1625 | uint_t ill_name_length; /* Name length, incl. terminator. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1626 | uint_t ill_net_type; /* IRE_IF_RESOLVER/IRE_IF_NORESOLVER. */ |
| 1627 | /* |
| 1628 | * Physical Point of Attachment num. If DLPI style 1 provider |
| 1629 | * then this is derived from the devname. |
| 1630 | */ |
| 1631 | uint_t ill_ppa; |
| 1632 | t_uscalar_t ill_sap; |
| 1633 | t_scalar_t ill_sap_length; /* Including sign (for position) */ |
| 1634 | uint_t ill_phys_addr_length; /* Excluding the sap. */ |
| 1635 | uint_t ill_bcast_addr_length; /* Only set when the DL provider */ |
| 1636 | /* supports broadcast. */ |
| 1637 | t_uscalar_t ill_mactype; |
| 1638 | uint8_t *ill_frag_ptr; /* Reassembly state. */ |
| 1639 | timeout_id_t ill_frag_timer_id; /* timeout id for the frag timer */ |
| 1640 | ipfb_t *ill_frag_hash_tbl; /* Fragment hash list head. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1641 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1642 | krwlock_t ill_mcast_lock; /* Protects multicast state */ |
| 1643 | kmutex_t ill_mcast_serializer; /* Serialize across ilg and ilm state */ |
meem | 7924222 | 2008-07-29 18:39:05 -0700 | [diff] [blame] | 1644 | ilm_t *ill_ilm; /* Multicast membership for ill */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1645 | uint_t ill_global_timer; /* for IGMPv3/MLDv2 general queries */ |
| 1646 | int ill_mcast_type; /* type of router which is querier */ |
| 1647 | /* on this interface */ |
| 1648 | uint16_t ill_mcast_v1_time; /* # slow timeouts since last v1 qry */ |
| 1649 | uint16_t ill_mcast_v2_time; /* # slow timeouts since last v2 qry */ |
| 1650 | uint8_t ill_mcast_v1_tset; /* 1 => timer is set; 0 => not set */ |
| 1651 | uint8_t ill_mcast_v2_tset; /* 1 => timer is set; 0 => not set */ |
| 1652 | |
| 1653 | uint8_t ill_mcast_rv; /* IGMPv3/MLDv2 robustness variable */ |
| 1654 | int ill_mcast_qi; /* IGMPv3/MLDv2 query interval var */ |
| 1655 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1656 | /* |
| 1657 | * All non-NULL cells between 'ill_first_mp_to_free' and |
| 1658 | * 'ill_last_mp_to_free' are freed in ill_delete. |
| 1659 | */ |
| 1660 | #define ill_first_mp_to_free ill_bcast_mp |
| 1661 | mblk_t *ill_bcast_mp; /* DLPI header for broadcasts. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1662 | mblk_t *ill_unbind_mp; /* unbind mp from ill_dl_up() */ |
Philip Kirk | b127ac4 | 2008-11-06 06:47:54 -0500 | [diff] [blame] | 1663 | mblk_t *ill_promiscoff_mp; /* for ill_leave_allmulti() */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1664 | mblk_t *ill_dlpi_deferred; /* b_next chain of control messages */ |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1665 | mblk_t *ill_dest_addr_mp; /* mblk which holds ill_dest_addr */ |
Cathy Zhou | 5d460ea | 2009-03-17 20:14:50 -0700 | [diff] [blame] | 1666 | mblk_t *ill_replumb_mp; /* replumb mp from ill_replumb() */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1667 | mblk_t *ill_phys_addr_mp; /* mblk which holds ill_phys_addr */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1668 | mblk_t *ill_mcast_deferred; /* b_next chain of IGMP/MLD packets */ |
| 1669 | #define ill_last_mp_to_free ill_mcast_deferred |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1670 | |
| 1671 | cred_t *ill_credp; /* opener's credentials */ |
| 1672 | uint8_t *ill_phys_addr; /* ill_phys_addr_mp->b_rptr + off */ |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1673 | uint8_t *ill_dest_addr; /* ill_dest_addr_mp->b_rptr + off */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1674 | |
| 1675 | uint_t ill_state_flags; /* see ILL_* flags above */ |
| 1676 | |
| 1677 | /* Following bit fields protected by ipsq_t */ |
| 1678 | uint_t |
| 1679 | ill_needs_attach : 1, |
| 1680 | ill_reserved : 1, |
| 1681 | ill_isv6 : 1, |
| 1682 | ill_dlpi_style_set : 1, |
| 1683 | |
| 1684 | ill_ifname_pending : 1, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1685 | ill_logical_down : 1, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1686 | ill_dl_up : 1, |
| 1687 | ill_up_ipifs : 1, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1688 | |
carlsonj | 69bb4bb | 2006-08-14 14:10:48 -0700 | [diff] [blame] | 1689 | ill_note_link : 1, /* supports link-up notification */ |
yz147064 | 8fb46f2 | 2007-09-21 07:56:36 -0700 | [diff] [blame] | 1690 | ill_capab_reneg : 1, /* capability renegotiation to be done */ |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1691 | ill_dld_capab_inprog : 1, /* direct dld capab call in prog */ |
Philip Kirk | b127ac4 | 2008-11-06 06:47:54 -0500 | [diff] [blame] | 1692 | ill_need_recover_multicast : 1, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1693 | |
| 1694 | ill_replumbing : 1, |
| 1695 | ill_arl_dlpi_pending : 1, |
Sowmini Varadhan | f1c454b | 2010-01-11 10:29:23 -0500 | [diff] [blame] | 1696 | ill_grp_pending : 1, |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1697 | |
Sowmini Varadhan | f1c454b | 2010-01-11 10:29:23 -0500 | [diff] [blame] | 1698 | ill_pad_to_bit_31 : 17; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1699 | |
| 1700 | /* Following bit fields protected by ill_lock */ |
| 1701 | uint_t |
| 1702 | ill_fragtimer_executing : 1, |
| 1703 | ill_fragtimer_needrestart : 1, |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1704 | ill_manual_token : 1, /* system won't override ill_token */ |
Girish Moodalbail | 6e91bba | 2010-03-26 17:53:11 -0400 | [diff] [blame] | 1705 | /* |
| 1706 | * ill_manual_linklocal : system will not change the |
| 1707 | * linklocal whenever ill_token changes. |
| 1708 | */ |
| 1709 | ill_manual_linklocal : 1, |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1710 | |
Sebastien Roy | 792bd77 | 2009-12-21 15:22:35 -0500 | [diff] [blame] | 1711 | ill_manual_dst_linklocal : 1, /* same for pt-pt dst linklocal */ |
| 1712 | |
Dan McDonald | 42c5ef0 | 2019-02-22 14:42:52 -0500 | [diff] [blame] | 1713 | ill_mcast_ncec_cleanup : 1, /* Reaping mcast ncecs. */ |
| 1714 | ill_pad_bit_31 : 26; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1715 | |
| 1716 | /* |
| 1717 | * Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'. |
| 1718 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1719 | int ill_muxid; /* muxid returned from plink */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1720 | |
georges | 0a5d959 | 2008-05-30 10:00:54 -0700 | [diff] [blame] | 1721 | /* Used for IP frag reassembly throttling on a per ILL basis. */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1722 | uint_t ill_ipf_gen; /* Generation of next fragment queue */ |
georges | 0a5d959 | 2008-05-30 10:00:54 -0700 | [diff] [blame] | 1723 | uint_t ill_frag_count; /* Count of all reassembly mblk bytes */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1724 | uint_t ill_frag_free_num_pkts; /* num of fragmented packets to free */ |
| 1725 | clock_t ill_last_frag_clean_time; /* time when frag's were pruned */ |
| 1726 | int ill_type; /* From <net/if_types.h> */ |
ja97890 | 4d87631 | 2006-10-10 06:05:36 -0700 | [diff] [blame] | 1727 | uint_t ill_dlpi_multicast_state; /* See below IDS_* */ |
| 1728 | uint_t ill_dlpi_fastpath_state; /* See below IDS_* */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1729 | |
| 1730 | /* |
| 1731 | * Capabilities related fields. |
| 1732 | */ |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1733 | uint_t ill_dlpi_capab_state; /* State of capability query, IDCS_* */ |
| 1734 | uint_t ill_capab_pending_cnt; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1735 | uint64_t ill_capabilities; /* Enabled capabilities, ILL_CAPAB_* */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1736 | ill_hcksum_capab_t *ill_hcksum_capab; /* H/W cksumming capabilities */ |
| 1737 | ill_zerocopy_capab_t *ill_zerocopy_capab; /* Zero-copy capabilities */ |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1738 | ill_dld_capab_t *ill_dld_capab; /* DLD capabilities */ |
| 1739 | ill_lso_capab_t *ill_lso_capab; /* Large Segment Offload capabilities */ |
| 1740 | mblk_t *ill_capab_reset_mp; /* Preallocated mblk for capab reset */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1741 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1742 | uint8_t ill_max_hops; /* Maximum hops for any logical interface */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1743 | uint_t ill_user_mtu; /* User-specified MTU via SIOCSLIFLNKINFO */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1744 | uint32_t ill_reachable_time; /* Value for ND algorithm in msec */ |
| 1745 | uint32_t ill_reachable_retrans_time; /* Value for ND algorithm msec */ |
| 1746 | uint_t ill_max_buf; /* Max # of req to buffer for ND */ |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1747 | in6_addr_t ill_token; /* IPv6 interface id */ |
| 1748 | in6_addr_t ill_dest_token; /* Destination IPv6 interface id */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1749 | uint_t ill_token_length; |
| 1750 | uint32_t ill_xmit_count; /* ndp max multicast xmits */ |
apersson | 3173664 | 2006-12-19 17:33:00 -0800 | [diff] [blame] | 1751 | mib2_ipIfStatsEntry_t *ill_ip_mib; /* ver indep. interface mib */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1752 | mib2_ipv6IfIcmpEntry_t *ill_icmp6_mib; /* Per interface mib */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1753 | |
| 1754 | phyint_t *ill_phyint; |
| 1755 | uint64_t ill_flags; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1756 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1757 | kmutex_t ill_lock; /* Please see table below */ |
| 1758 | /* |
| 1759 | * The ill_nd_lla* fields handle the link layer address option |
| 1760 | * from neighbor discovery. This is used for external IPv6 |
| 1761 | * address resolution. |
| 1762 | */ |
| 1763 | mblk_t *ill_nd_lla_mp; /* mblk which holds ill_nd_lla */ |
| 1764 | uint8_t *ill_nd_lla; /* Link Layer Address */ |
| 1765 | uint_t ill_nd_lla_len; /* Link Layer Address length */ |
| 1766 | /* |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1767 | * We have 4 phys_addr_req's sent down. This field keeps track |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1768 | * of which one is pending. |
| 1769 | */ |
| 1770 | t_uscalar_t ill_phys_addr_pend; /* which dl_phys_addr_req pending */ |
| 1771 | /* |
| 1772 | * Used to save errors that occur during plumbing |
| 1773 | */ |
| 1774 | uint_t ill_ifname_pending_err; |
| 1775 | avl_node_t ill_avl_byppa; /* avl node based on ppa */ |
Dan McDonald | 42c5ef0 | 2019-02-22 14:42:52 -0500 | [diff] [blame] | 1776 | uint_t ill_mcast_nces; /* Number of NCEs that are multicast. */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1777 | list_t ill_nce; /* pointer to nce_s list */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1778 | uint_t ill_refcnt; /* active refcnt by threads */ |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1779 | uint_t ill_ire_cnt; /* ires associated with this ill */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1780 | kcondvar_t ill_cv; |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1781 | uint_t ill_ncec_cnt; /* ncecs associated with this ill */ |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1782 | uint_t ill_nce_cnt; /* nces associated with this ill */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1783 | uint_t ill_waiters; /* threads waiting in ipsq_enter */ |
| 1784 | /* |
| 1785 | * Contains the upper read queue pointer of the module immediately |
| 1786 | * beneath IP. This field allows IP to validate sub-capability |
| 1787 | * acknowledgments coming up from downstream. |
| 1788 | */ |
| 1789 | queue_t *ill_lmod_rq; /* read queue pointer of module below */ |
| 1790 | uint_t ill_lmod_cnt; /* number of modules beneath IP */ |
| 1791 | ip_m_t *ill_media; /* media specific params/functions */ |
| 1792 | t_uscalar_t ill_dlpi_pending; /* Last DLPI primitive issued */ |
| 1793 | uint_t ill_usesrc_ifindex; /* use src addr from this ILL */ |
| 1794 | struct ill_s *ill_usesrc_grp_next; /* Next ILL in the usesrc group */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1795 | boolean_t ill_trace_disable; /* True when alloc fails */ |
dh155122 | f4b3ec6 | 2007-01-19 16:59:38 -0800 | [diff] [blame] | 1796 | zoneid_t ill_zoneid; |
| 1797 | ip_stack_t *ill_ipst; /* Corresponds to a netstack_hold */ |
meem | e704a8f | 2007-10-30 11:15:43 -0700 | [diff] [blame] | 1798 | uint32_t ill_dhcpinit; /* IP_DHCPINIT_IFs for ill */ |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1799 | void *ill_flownotify_mh; /* Tx flow ctl, mac cb handle */ |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1800 | uint_t ill_ilm_cnt; /* ilms referencing this ill */ |
Philip Kirk | b127ac4 | 2008-11-06 06:47:54 -0500 | [diff] [blame] | 1801 | uint_t ill_ipallmulti_cnt; /* ip_join_allmulti() calls */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1802 | ilm_t *ill_ipallmulti_ilm; |
| 1803 | |
| 1804 | mblk_t *ill_saved_ire_mp; /* Allocated for each extra IRE */ |
| 1805 | /* with ire_ill set so they can */ |
| 1806 | /* survive the ill going down and up. */ |
| 1807 | kmutex_t ill_saved_ire_lock; /* Protects ill_saved_ire_mp, cnt */ |
| 1808 | uint_t ill_saved_ire_cnt; /* # entries */ |
| 1809 | struct arl_ill_common_s *ill_common; |
| 1810 | ire_t *ill_ire_multicast; /* IRE_MULTICAST for ill */ |
| 1811 | clock_t ill_defend_start; /* start of 1 hour period */ |
| 1812 | uint_t ill_defend_count; /* # of announce/defends per ill */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1813 | /* |
| 1814 | * IPMP fields. |
| 1815 | */ |
| 1816 | ipmp_illgrp_t *ill_grp; /* IPMP group information */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 1817 | list_node_t ill_actnode; /* next active ill in group */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1818 | list_node_t ill_grpnode; /* next ill in group */ |
| 1819 | ipif_t *ill_src_ipif; /* source address selection rotor */ |
| 1820 | ipif_t *ill_move_ipif; /* ipif awaiting move to new ill */ |
| 1821 | boolean_t ill_nom_cast; /* nominated for mcast/bcast */ |
| 1822 | uint_t ill_bound_cnt; /* # of data addresses bound to ill */ |
| 1823 | ipif_t *ill_bound_ipif; /* ipif chain bound to ill */ |
| 1824 | timeout_id_t ill_refresh_tid; /* ill refresh retry timeout id */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1825 | |
| 1826 | uint32_t ill_mrouter_cnt; /* mrouter allmulti joins */ |
Sowmini Varadhan | 550b6e4 | 2010-07-01 17:10:52 -0400 | [diff] [blame] | 1827 | uint32_t ill_allowed_ips_cnt; |
| 1828 | in6_addr_t *ill_allowed_ips; |
Ravi Chandra Nallan | 7f125a5 | 2010-07-13 18:17:30 +0530 | [diff] [blame] | 1829 | |
| 1830 | /* list of multicast physical addresses joined on this ill */ |
| 1831 | multiphysaddr_t *ill_mphysaddr_list; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1832 | } ill_t; |
| 1833 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1834 | /* |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1835 | * ILL_FREE_OK() means that there are no incoming pointer references |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 1836 | * to the ill. |
| 1837 | */ |
| 1838 | #define ILL_FREE_OK(ill) \ |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1839 | ((ill)->ill_ire_cnt == 0 && (ill)->ill_ilm_cnt == 0 && \ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1840 | (ill)->ill_ncec_cnt == 0 && (ill)->ill_nce_cnt == 0) |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 1841 | |
| 1842 | /* |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1843 | * An ipif/ill can be marked down only when the ire and ncec references |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 1844 | * to that ipif/ill goes to zero. ILL_DOWN_OK() is a necessary condition |
| 1845 | * quiescence checks. See comments above IPIF_DOWN_OK for details |
| 1846 | * on why ires and nces are selectively considered for this macro. |
| 1847 | */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1848 | #define ILL_DOWN_OK(ill) \ |
| 1849 | (ill->ill_ire_cnt == 0 && ill->ill_ncec_cnt == 0 && \ |
| 1850 | ill->ill_nce_cnt == 0) |
sowmini | 968d2fd | 2008-03-21 06:08:04 -0700 | [diff] [blame] | 1851 | |
| 1852 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1853 | * The following table lists the protection levels of the various members |
| 1854 | * of the ill_t. Same notation as that used for ipif_t above is used. |
| 1855 | * |
| 1856 | * Write Read |
| 1857 | * |
| 1858 | * ill_ifptr ill_g_lock + s Write once |
| 1859 | * ill_rq ipsq Write once |
| 1860 | * ill_wq ipsq Write once |
| 1861 | * |
| 1862 | * ill_error ipsq None |
| 1863 | * ill_ipif ill_g_lock + ipsq ill_g_lock OR ipsq |
meem | 8df01f7 | 2007-05-30 16:02:35 -0700 | [diff] [blame] | 1864 | * ill_ipif_up_count ill_lock + ipsq ill_lock OR ipsq |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1865 | * ill_max_frag ill_lock ill_lock |
| 1866 | * ill_current_frag ill_lock ill_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1867 | * |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1868 | * ill_name ill_g_lock + ipsq Write once |
| 1869 | * ill_name_length ill_g_lock + ipsq Write once |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1870 | * ill_ndd_name ipsq Write once |
| 1871 | * ill_net_type ipsq Write once |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1872 | * ill_ppa ill_g_lock + ipsq Write once |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1873 | * ill_sap ipsq + down ill Write once |
| 1874 | * ill_sap_length ipsq + down ill Write once |
| 1875 | * ill_phys_addr_length ipsq + down ill Write once |
| 1876 | * |
| 1877 | * ill_bcast_addr_length ipsq ipsq |
| 1878 | * ill_mactype ipsq ipsq |
| 1879 | * ill_frag_ptr ipsq ipsq |
| 1880 | * |
| 1881 | * ill_frag_timer_id ill_lock ill_lock |
| 1882 | * ill_frag_hash_tbl ipsq up ill |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1883 | * ill_ilm ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1884 | * ill_global_timer ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1885 | * ill_mcast_type ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1886 | * ill_mcast_v1_time ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1887 | * ill_mcast_v2_time ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1888 | * ill_mcast_v1_tset ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1889 | * ill_mcast_v2_tset ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1890 | * ill_mcast_rv ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
| 1891 | * ill_mcast_qi ill_mcast_lock(WRITER) ill_mcast_lock(READER) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1892 | * |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1893 | * ill_down_mp ipsq ipsq |
meem | 8df01f7 | 2007-05-30 16:02:35 -0700 | [diff] [blame] | 1894 | * ill_dlpi_deferred ill_lock ill_lock |
Thirumalai Srinivasan | 7571834 | 2009-07-07 10:46:23 -0700 | [diff] [blame] | 1895 | * ill_dlpi_pending ipsq + ill_lock ipsq or ill_lock or |
| 1896 | * absence of ipsq writer. |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1897 | * ill_phys_addr_mp ipsq + down ill only when ill is up |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1898 | * ill_mcast_deferred ill_lock ill_lock |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1899 | * ill_phys_addr ipsq + down ill only when ill is up |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1900 | * ill_dest_addr_mp ipsq + down ill only when ill is up |
| 1901 | * ill_dest_addr ipsq + down ill only when ill is up |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1902 | * |
| 1903 | * ill_state_flags ill_lock ill_lock |
| 1904 | * exclusive bit flags ipsq_t ipsq_t |
| 1905 | * shared bit flags ill_lock ill_lock |
| 1906 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1907 | * ill_muxid ipsq Not atomic |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1908 | * |
| 1909 | * ill_ipf_gen Not atomic |
georges | 0a5d959 | 2008-05-30 10:00:54 -0700 | [diff] [blame] | 1910 | * ill_frag_count atomics atomics |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1911 | * ill_type ipsq + down ill only when ill is up |
| 1912 | * ill_dlpi_multicast_state ill_lock ill_lock |
| 1913 | * ill_dlpi_fastpath_state ill_lock ill_lock |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1914 | * ill_dlpi_capab_state ipsq ipsq |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1915 | * ill_max_hops ipsq Not atomic |
| 1916 | * |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1917 | * ill_mtu ill_lock None |
Erik Nordmark | 1eee170 | 2010-08-16 15:30:54 -0700 | [diff] [blame] | 1918 | * ill_mc_mtu ill_lock None |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1919 | * |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1920 | * ill_user_mtu ipsq + ill_lock ill_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1921 | * ill_reachable_time ipsq + ill_lock ill_lock |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1922 | * ill_reachable_retrans_time ipsq + ill_lock ill_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1923 | * ill_max_buf ipsq + ill_lock ill_lock |
| 1924 | * |
| 1925 | * Next 2 fields need ill_lock because of the get ioctls. They should not |
| 1926 | * report partially updated results without executing in the ipsq. |
| 1927 | * ill_token ipsq + ill_lock ill_lock |
| 1928 | * ill_token_length ipsq + ill_lock ill_lock |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 1929 | * ill_dest_token ipsq + down ill only when ill is up |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1930 | * ill_xmit_count ipsq + down ill write once |
| 1931 | * ill_ip6_mib ipsq + down ill only when ill is up |
| 1932 | * ill_icmp6_mib ipsq + down ill only when ill is up |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1933 | * |
| 1934 | * ill_phyint ipsq, ill_g_lock, ill_lock Any of them |
| 1935 | * ill_flags ill_lock ill_lock |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1936 | * ill_nd_lla_mp ipsq + down ill only when ill is up |
| 1937 | * ill_nd_lla ipsq + down ill only when ill is up |
| 1938 | * ill_nd_lla_len ipsq + down ill only when ill is up |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1939 | * ill_phys_addr_pend ipsq + down ill only when ill is up |
| 1940 | * ill_ifname_pending_err ipsq ipsq |
meem | b051ecf | 2006-12-27 21:32:46 -0800 | [diff] [blame] | 1941 | * ill_avl_byppa ipsq, ill_g_lock write once |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1942 | * |
| 1943 | * ill_fastpath_list ill_lock ill_lock |
| 1944 | * ill_refcnt ill_lock ill_lock |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1945 | * ill_ire_cnt ill_lock ill_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1946 | * ill_cv ill_lock ill_lock |
Dan McDonald | 42c5ef0 | 2019-02-22 14:42:52 -0500 | [diff] [blame] | 1947 | * ill_mcast_nces ill_lock ill_lock |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1948 | * ill_ncec_cnt ill_lock ill_lock |
sowmini | 384ad17 | 2008-04-08 12:13:12 -0700 | [diff] [blame] | 1949 | * ill_nce_cnt ill_lock ill_lock |
| 1950 | * ill_ilm_cnt ill_lock ill_lock |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1951 | * ill_src_ipif ill_g_lock ill_g_lock |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1952 | * ill_trace ill_lock ill_lock |
| 1953 | * ill_usesrc_grp_next ill_g_usesrc_lock ill_g_usesrc_lock |
meem | e704a8f | 2007-10-30 11:15:43 -0700 | [diff] [blame] | 1954 | * ill_dhcpinit atomics atomics |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 1955 | * ill_flownotify_mh write once write once |
| 1956 | * ill_capab_pending_cnt ipsq ipsq |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1957 | * ill_ipallmulti_cnt ill_lock ill_lock |
| 1958 | * ill_ipallmulti_ilm ill_lock ill_lock |
| 1959 | * ill_saved_ire_mp ill_saved_ire_lock ill_saved_ire_lock |
| 1960 | * ill_saved_ire_cnt ill_saved_ire_lock ill_saved_ire_lock |
| 1961 | * ill_arl ??? ??? |
| 1962 | * ill_ire_multicast ipsq + quiescent none |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1963 | * ill_bound_ipif ipsq ipsq |
| 1964 | * ill_actnode ipsq + ipmp_lock ipsq OR ipmp_lock |
| 1965 | * ill_grpnode ipsq + ill_g_lock ipsq OR ill_g_lock |
| 1966 | * ill_src_ipif ill_g_lock ill_g_lock |
| 1967 | * ill_move_ipif ipsq ipsq |
| 1968 | * ill_nom_cast ipsq ipsq OR advisory |
| 1969 | * ill_refresh_tid ill_lock ill_lock |
| 1970 | * ill_grp (for IPMP ill) write once write once |
| 1971 | * ill_grp (for underlying ill) ipsq + ill_g_lock ipsq OR ill_g_lock |
Sowmini Varadhan | f1c454b | 2010-01-11 10:29:23 -0500 | [diff] [blame] | 1972 | * ill_grp_pending ill_mcast_serializer ill_mcast_serializer |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 1973 | * ill_mrouter_cnt atomics atomics |
Ravi Chandra Nallan | 7f125a5 | 2010-07-13 18:17:30 +0530 | [diff] [blame] | 1974 | * ill_mphysaddr_list ill_lock ill_lock |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 1975 | * |
| 1976 | * NOTE: It's OK to make heuristic decisions on an underlying interface |
| 1977 | * by using IS_UNDER_IPMP() or comparing ill_grp's raw pointer value. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1978 | */ |
| 1979 | |
| 1980 | /* |
| 1981 | * For ioctl restart mechanism see ip_reprocess_ioctl() |
| 1982 | */ |
| 1983 | struct ip_ioctl_cmd_s; |
| 1984 | |
| 1985 | typedef int (*ifunc_t)(ipif_t *, struct sockaddr_in *, queue_t *, mblk_t *, |
| 1986 | struct ip_ioctl_cmd_s *, void *); |
| 1987 | |
| 1988 | typedef struct ip_ioctl_cmd_s { |
| 1989 | int ipi_cmd; |
| 1990 | size_t ipi_copyin_size; |
| 1991 | uint_t ipi_flags; |
| 1992 | uint_t ipi_cmd_type; |
| 1993 | ifunc_t ipi_func; |
| 1994 | ifunc_t ipi_func_restart; |
| 1995 | } ip_ioctl_cmd_t; |
| 1996 | |
| 1997 | /* |
| 1998 | * ipi_cmd_type: |
| 1999 | * |
| 2000 | * IF_CMD 1 old style ifreq cmd |
| 2001 | * LIF_CMD 2 new style lifreq cmd |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 2002 | * ARP_CMD 3 arpreq cmd |
| 2003 | * XARP_CMD 4 xarpreq cmd |
| 2004 | * MSFILT_CMD 5 multicast source filter cmd |
| 2005 | * MISC_CMD 6 misc cmd (not a more specific one above) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2006 | */ |
| 2007 | |
Sebastien Roy | 2b24ab6 | 2009-09-22 22:04:45 -0400 | [diff] [blame] | 2008 | enum { IF_CMD = 1, LIF_CMD, ARP_CMD, XARP_CMD, MSFILT_CMD, MISC_CMD }; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2009 | |
| 2010 | #define IPI_DONTCARE 0 /* For ioctl encoded values that don't matter */ |
| 2011 | |
| 2012 | /* Flag values in ipi_flags */ |
Ryan Goodfellow | 2514b11 | 2022-06-09 07:52:45 -0700 | [diff] [blame] | 2013 | #define IPI_PRIV 0x1 /* Command requires PRIV_SYS_IP_CONFIG */ |
meem | 98e93c2 | 2007-08-31 12:48:28 -0700 | [diff] [blame] | 2014 | #define IPI_MODOK 0x2 /* Permitted on mod instance of IP */ |
| 2015 | #define IPI_WR 0x4 /* Need to grab writer access */ |
| 2016 | #define IPI_GET_CMD 0x8 /* branch to mi_copyout on success */ |
meem | e11c3f4 | 2009-01-06 20:16:25 -0500 | [diff] [blame] | 2017 | /* unused 0x10 */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2018 | #define IPI_NULL_BCONT 0x20 /* ioctl has not data and hence no b_cont */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2019 | |
| 2020 | extern ip_ioctl_cmd_t ip_ndx_ioctl_table[]; |
| 2021 | extern ip_ioctl_cmd_t ip_misc_ioctl_table[]; |
| 2022 | extern int ip_ndx_ioctl_count; |
| 2023 | extern int ip_misc_ioctl_count; |
| 2024 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2025 | /* Passed down by ARP to IP during I_PLINK/I_PUNLINK */ |
| 2026 | typedef struct ipmx_s { |
| 2027 | char ipmx_name[LIFNAMSIZ]; /* if name */ |
| 2028 | uint_t |
| 2029 | ipmx_arpdev_stream : 1, /* This is the arp stream */ |
| 2030 | ipmx_notused : 31; |
| 2031 | } ipmx_t; |
| 2032 | |
| 2033 | /* |
| 2034 | * State for detecting if a driver supports certain features. |
| 2035 | * Support for DL_ENABMULTI_REQ uses ill_dlpi_multicast_state. |
| 2036 | * Support for DLPI M_DATA fastpath uses ill_dlpi_fastpath_state. |
| 2037 | */ |
ja97890 | 4d87631 | 2006-10-10 06:05:36 -0700 | [diff] [blame] | 2038 | #define IDS_UNKNOWN 0 /* No DLPI request sent */ |
| 2039 | #define IDS_INPROGRESS 1 /* DLPI request sent */ |
| 2040 | #define IDS_OK 2 /* DLPI request completed successfully */ |
| 2041 | #define IDS_FAILED 3 /* DLPI request failed */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2042 | |
Eric Cheng | da14ceb | 2008-12-04 18:16:10 -0800 | [diff] [blame] | 2043 | /* Support for DL_CAPABILITY_REQ uses ill_dlpi_capab_state. */ |
| 2044 | enum { |
| 2045 | IDCS_UNKNOWN, |
| 2046 | IDCS_PROBE_SENT, |
| 2047 | IDCS_OK, |
| 2048 | IDCS_RESET_SENT, |
| 2049 | IDCS_RENEG, |
| 2050 | IDCS_FAILED |
| 2051 | }; |
| 2052 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2053 | /* Extended NDP Management Structure */ |
| 2054 | typedef struct ipndp_s { |
| 2055 | ndgetf_t ip_ndp_getf; |
| 2056 | ndsetf_t ip_ndp_setf; |
| 2057 | caddr_t ip_ndp_data; |
| 2058 | char *ip_ndp_name; |
| 2059 | } ipndp_t; |
| 2060 | |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 2061 | /* IXA Notification types */ |
| 2062 | typedef enum { |
| 2063 | IXAN_LSO, /* LSO capability change */ |
| 2064 | IXAN_PMTU, /* PMTU change */ |
| 2065 | IXAN_ZCOPY /* ZEROCOPY capability change */ |
| 2066 | } ixa_notify_type_t; |
| 2067 | |
| 2068 | typedef uint_t ixa_notify_arg_t; |
| 2069 | |
| 2070 | typedef void (*ixa_notify_t)(void *, ip_xmit_attr_t *ixa, ixa_notify_type_t, |
| 2071 | ixa_notify_arg_t); |
| 2072 | |
| 2073 | /* |
| 2074 | * Attribute flags that are common to the transmit and receive attributes |
| 2075 | */ |
| 2076 | #define IAF_IS_IPV4 0x80000000 /* ipsec_*_v4 */ |
| 2077 | #define IAF_TRUSTED_ICMP 0x40000000 /* ipsec_*_icmp_loopback */ |
| 2078 | #define IAF_NO_LOOP_ZONEID_SET 0x20000000 /* Zone that shouldn't have */ |
| 2079 | /* a copy */ |
| 2080 | #define IAF_LOOPBACK_COPY 0x10000000 /* For multi and broadcast */ |
| 2081 | |
| 2082 | #define IAF_MASK 0xf0000000 /* Flags that are common */ |
| 2083 | |
| 2084 | /* |
| 2085 | * Transmit side attributes used between the transport protocols and IP as |
| 2086 | * well as inside IP. It is also used to cache information in the conn_t i.e. |
| 2087 | * replaces conn_ire and the IPsec caching in the conn_t. |
| 2088 | */ |
| 2089 | struct ip_xmit_attr_s { |
| 2090 | iaflags_t ixa_flags; /* IXAF_*. See below */ |
| 2091 | |
| 2092 | uint32_t ixa_free_flags; /* IXA_FREE_*. See below */ |
| 2093 | uint32_t ixa_refcnt; /* Using atomics */ |
| 2094 | |
| 2095 | /* |
| 2096 | * Always initialized independently of ixa_flags settings. |
| 2097 | * Used by ip_xmit so we keep them up front for cache locality. |
| 2098 | */ |
| 2099 | uint32_t ixa_xmit_hint; /* For ECMP and GLD TX ring fanout */ |
| 2100 | uint_t ixa_pktlen; /* Always set. For frag and stats */ |
| 2101 | zoneid_t ixa_zoneid; /* Assumed always set */ |
| 2102 | |
| 2103 | /* Always set for conn_ip_output(); might be stale */ |
| 2104 | /* |
| 2105 | * Since TCP keeps the conn_t around past the process going away |
| 2106 | * we need to use the "notr" (e.g, ire_refhold_notr) for ixa_ire, |
| 2107 | * ixa_nce, and ixa_dce. |
| 2108 | */ |
| 2109 | ire_t *ixa_ire; /* Forwarding table entry */ |
| 2110 | uint_t ixa_ire_generation; |
| 2111 | nce_t *ixa_nce; /* Neighbor cache entry */ |
| 2112 | dce_t *ixa_dce; /* Destination cache entry */ |
| 2113 | uint_t ixa_dce_generation; |
| 2114 | uint_t ixa_src_generation; /* If IXAF_VERIFY_SOURCE */ |
| 2115 | |
| 2116 | uint32_t ixa_src_preferences; /* prefs for src addr select */ |
| 2117 | uint32_t ixa_pmtu; /* IXAF_VERIFY_PMTU */ |
| 2118 | |
| 2119 | /* Set by ULP if IXAF_VERIFY_PMTU; otherwise set by IP */ |
| 2120 | uint32_t ixa_fragsize; |
| 2121 | |
| 2122 | int8_t ixa_use_min_mtu; /* IXAF_USE_MIN_MTU values */ |
| 2123 | |
| 2124 | pfirepostfrag_t ixa_postfragfn; /* Set internally in IP */ |
| 2125 | |
| 2126 | in6_addr_t ixa_nexthop_v6; /* IXAF_NEXTHOP_SET */ |
| 2127 | #define ixa_nexthop_v4 V4_PART_OF_V6(ixa_nexthop_v6) |
| 2128 | |
| 2129 | zoneid_t ixa_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ |
| 2130 | |
| 2131 | uint_t ixa_scopeid; /* For IPv6 link-locals */ |
| 2132 | |
| 2133 | uint_t ixa_broadcast_ttl; /* IXAF_BROACAST_TTL_SET */ |
| 2134 | |
| 2135 | uint_t ixa_multicast_ttl; /* Assumed set for multicast */ |
| 2136 | uint_t ixa_multicast_ifindex; /* Assumed set for multicast */ |
| 2137 | ipaddr_t ixa_multicast_ifaddr; /* Assumed set for multicast */ |
| 2138 | |
| 2139 | int ixa_raw_cksum_offset; /* If IXAF_SET_RAW_CKSUM */ |
| 2140 | |
| 2141 | uint32_t ixa_ident; /* For IPv6 fragment header */ |
| 2142 | |
Alan Maguire | 9cd928f | 2010-05-27 17:29:51 -0400 | [diff] [blame] | 2143 | uint64_t ixa_conn_id; /* Used by DTrace */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 2144 | /* |
| 2145 | * Cached LSO information. |
| 2146 | */ |
| 2147 | ill_lso_capab_t ixa_lso_capab; /* Valid when IXAF_LSO_CAPAB */ |
| 2148 | |
| 2149 | uint64_t ixa_ipsec_policy_gen; /* Generation from iph_gen */ |
| 2150 | /* |
| 2151 | * The following IPsec fields are only initialized when |
| 2152 | * IXAF_IPSEC_SECURE is set. Otherwise they contain garbage. |
| 2153 | */ |
| 2154 | ipsec_latch_t *ixa_ipsec_latch; /* Just the ids */ |
Toomas Soome | 8a06b3d | 2018-10-15 22:13:16 +0300 | [diff] [blame] | 2155 | struct ipsa_s *ixa_ipsec_ah_sa; /* Hard reference SA for AH */ |
| 2156 | struct ipsa_s *ixa_ipsec_esp_sa; /* Hard reference SA for ESP */ |
| 2157 | struct ipsec_policy_s *ixa_ipsec_policy; /* why are we here? */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 2158 | struct ipsec_action_s *ixa_ipsec_action; /* For reflected packets */ |
| 2159 | ipsa_ref_t ixa_ipsec_ref[2]; /* Soft reference to SA */ |
| 2160 | /* 0: ESP, 1: AH */ |
| 2161 | |
| 2162 | /* |
| 2163 | * The selectors here are potentially different than the SPD rule's |
| 2164 | * selectors, and we need to have both available for IKEv2. |
| 2165 | * |
| 2166 | * NOTE: "Source" and "Dest" are w.r.t. outbound datagrams. Ports can |
| 2167 | * be zero, and the protocol number is needed to make the ports |
| 2168 | * significant. |
| 2169 | */ |
| 2170 | uint16_t ixa_ipsec_src_port; /* Source port number of d-gram. */ |
| 2171 | uint16_t ixa_ipsec_dst_port; /* Destination port number of d-gram. */ |
| 2172 | uint8_t ixa_ipsec_icmp_type; /* ICMP type of d-gram */ |
| 2173 | uint8_t ixa_ipsec_icmp_code; /* ICMP code of d-gram */ |
| 2174 | |
| 2175 | sa_family_t ixa_ipsec_inaf; /* Inner address family */ |
| 2176 | #define IXA_MAX_ADDRLEN 4 /* Max addr len. (in 32-bit words) */ |
| 2177 | uint32_t ixa_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ |
| 2178 | uint32_t ixa_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ |
| 2179 | uint8_t ixa_ipsec_insrcpfx; /* Inner source prefix */ |
| 2180 | uint8_t ixa_ipsec_indstpfx; /* Inner destination prefix */ |
| 2181 | |
| 2182 | uint8_t ixa_ipsec_proto; /* IP protocol number for d-gram. */ |
| 2183 | |
| 2184 | /* Always initialized independently of ixa_flags settings */ |
| 2185 | uint_t ixa_ifindex; /* Assumed always set */ |
| 2186 | uint16_t ixa_ip_hdr_length; /* Points to ULP header */ |
| 2187 | uint8_t ixa_protocol; /* Protocol number for ULP cksum */ |
| 2188 | ts_label_t *ixa_tsl; /* Always set. NULL if not TX */ |
| 2189 | ip_stack_t *ixa_ipst; /* Always set */ |
| 2190 | uint32_t ixa_extra_ident; /* Set if LSO */ |
| 2191 | cred_t *ixa_cred; /* For getpeerucred */ |
| 2192 | pid_t ixa_cpid; /* For getpeerucred */ |
| 2193 | |
| 2194 | #ifdef DEBUG |
| 2195 | kthread_t *ixa_curthread; /* For serialization assert */ |
| 2196 | #endif |
| 2197 | squeue_t *ixa_sqp; /* Set from conn_sqp as a hint */ |
| 2198 | uintptr_t ixa_cookie; /* cookie to use for tx flow control */ |
| 2199 | |
| 2200 | /* |
| 2201 | * Must be set by ULP if any of IXAF_VERIFY_LSO, IXAF_VERIFY_PMTU, |
| 2202 | * or IXAF_VERIFY_ZCOPY is set. |
| 2203 | */ |
| 2204 | ixa_notify_t ixa_notify; /* Registered upcall notify function */ |
| 2205 | void *ixa_notify_cookie; /* ULP cookie for ixa_notify */ |
Jerry Jelinek | 7c6d702 | 2012-02-13 19:50:26 +0000 | [diff] [blame] | 2206 | |
| 2207 | uint_t ixa_tcpcleanup; /* Used by conn_ixa_cleanup */ |
Erik Nordmark | bd670b3 | 2009-11-11 11:49:49 -0800 | [diff] [blame] | 2208 | }; |
| 2209 | |
| 2210 | /* |
| 2211 | * Flags to indicate which transmit attributes are set. |
| 2212 | * Split into "xxx_SET" ones which indicate that the "xxx" field it set, and |
| 2213 | * single flags. |
| 2214 | */ |
| 2215 | #define IXAF_REACH_CONF 0x00000001 /* Reachability confirmation */ |
| 2216 | #define IXAF_BROADCAST_TTL_SET 0x00000002 /* ixa_broadcast_ttl valid */ |
| 2217 | #define IXAF_SET_SOURCE 0x00000004 /* Replace if broadcast */ |
| 2218 | #define IXAF_USE_MIN_MTU 0x00000008 /* IPV6_USE_MIN_MTU */ |
| 2219 | |
| 2220 | #define IXAF_DONTFRAG 0x00000010 /* IP*_DONTFRAG */ |
| 2221 | #define IXAF_VERIFY_PMTU 0x00000020 /* ixa_pmtu/ixa_fragsize set */ |
| 2222 | #define IXAF_PMTU_DISCOVERY 0x00000040 /* Crea
|