blob: 7aac9b655a18885231a85e492e99a365fdc1e735 [file] [log] [blame]
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
Marcel Telkaac3e5be2010-06-10 08:35:29 +020023 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
Andy Fiddaman221e47f2020-09-18 20:04:57 +000024 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
Erik Nordmarkbd670b32009-11-11 11:49:49 -080025 */
26/* Copyright (c) 1990 Mentat Inc. */
27
28#include <sys/types.h>
29#include <sys/stream.h>
30#include <sys/strsun.h>
31#define _SUN_TPI_VERSION 2
32#include <sys/tihdr.h>
33#include <sys/xti_inet.h>
34#include <sys/ucred.h>
35#include <sys/zone.h>
36#include <sys/ddi.h>
37#include <sys/sunddi.h>
38#include <sys/cmn_err.h>
39#include <sys/debug.h>
40#include <sys/atomic.h>
41#include <sys/policy.h>
42
43#include <sys/systm.h>
44#include <sys/param.h>
45#include <sys/kmem.h>
46#include <sys/sdt.h>
47#include <sys/socket.h>
48#include <sys/ethernet.h>
49#include <sys/mac.h>
50#include <net/if.h>
51#include <net/if_types.h>
52#include <net/if_arp.h>
53#include <net/route.h>
54#include <sys/sockio.h>
55#include <netinet/in.h>
56#include <net/if_dl.h>
57
58#include <inet/common.h>
59#include <inet/mi.h>
60#include <inet/mib2.h>
61#include <inet/nd.h>
62#include <inet/arp.h>
63#include <inet/snmpcom.h>
64#include <inet/kstatcom.h>
65
66#include <netinet/igmp_var.h>
67#include <netinet/ip6.h>
68#include <netinet/icmp6.h>
69#include <netinet/sctp.h>
70
71#include <inet/ip.h>
72#include <inet/ip_impl.h>
73#include <inet/ip6.h>
74#include <inet/ip6_asp.h>
75#include <inet/tcp.h>
76#include <inet/ip_multi.h>
77#include <inet/ip_if.h>
78#include <inet/ip_ire.h>
79#include <inet/ip_ftable.h>
80#include <inet/ip_rts.h>
81#include <inet/optcom.h>
82#include <inet/ip_ndp.h>
83#include <inet/ip_listutils.h>
84#include <netinet/igmp.h>
85#include <netinet/ip_mroute.h>
86#include <netinet/udp.h>
87#include <inet/ipp_common.h>
88
89#include <net/pfkeyv2.h>
90#include <inet/sadb.h>
91#include <inet/ipsec_impl.h>
92#include <inet/ipdrop.h>
93#include <inet/ip_netinfo.h>
94
95#include <inet/ipclassifier.h>
96#include <inet/sctp_ip.h>
97#include <inet/sctp/sctp_impl.h>
98#include <inet/udp_impl.h>
99#include <sys/sunddi.h>
100
101#include <sys/tsol/label.h>
102#include <sys/tsol/tnet.h>
103
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800104/*
105 * Return how much size is needed for the different ancillary data items
106 */
107uint_t
108conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
109 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
110{
111 uint_t ancil_size;
112 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
113
114 /*
115 * If IP_RECVDSTADDR is set we include the destination IP
116 * address as an option. With IP_RECVOPTS we include all
117 * the IP options.
118 */
119 ancil_size = 0;
120 if (recv_ancillary.crb_recvdstaddr &&
121 (ira->ira_flags & IRAF_IS_IPV4)) {
122 ancil_size += sizeof (struct T_opthdr) +
123 sizeof (struct in_addr);
124 IP_STAT(ipst, conn_in_recvdstaddr);
125 }
126
127 /*
128 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
129 * are different
130 */
131 if (recv_ancillary.crb_ip_recvpktinfo &&
132 connp->conn_family == AF_INET) {
133 ancil_size += sizeof (struct T_opthdr) +
134 sizeof (struct in_pktinfo);
135 IP_STAT(ipst, conn_in_recvpktinfo);
136 }
137
138 if ((recv_ancillary.crb_recvopts) &&
139 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
140 ancil_size += sizeof (struct T_opthdr) +
141 ipp->ipp_ipv4_options_len;
142 IP_STAT(ipst, conn_in_recvopts);
143 }
144
145 if (recv_ancillary.crb_recvslla) {
146 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
147 ill_t *ill;
148
149 /* Make sure ira_l2src is setup if not already */
150 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
151 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
152 ipst);
153 if (ill != NULL) {
154 ip_setl2src(mp, ira, ill);
155 ill_refrele(ill);
156 }
157 }
158 ancil_size += sizeof (struct T_opthdr) +
159 sizeof (struct sockaddr_dl);
160 IP_STAT(ipst, conn_in_recvslla);
161 }
162
163 if (recv_ancillary.crb_recvif) {
164 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
165 IP_STAT(ipst, conn_in_recvif);
166 }
167
168 /*
169 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
170 * are different
171 */
172 if (recv_ancillary.crb_ip_recvpktinfo &&
173 connp->conn_family == AF_INET6) {
174 ancil_size += sizeof (struct T_opthdr) +
175 sizeof (struct in6_pktinfo);
176 IP_STAT(ipst, conn_in_recvpktinfo);
177 }
178
179 if (recv_ancillary.crb_ipv6_recvhoplimit) {
180 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
181 IP_STAT(ipst, conn_in_recvhoplimit);
182 }
183
184 if (recv_ancillary.crb_ipv6_recvtclass) {
185 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
186 IP_STAT(ipst, conn_in_recvtclass);
187 }
188
189 if (recv_ancillary.crb_ipv6_recvhopopts &&
190 (ipp->ipp_fields & IPPF_HOPOPTS)) {
191 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
192 IP_STAT(ipst, conn_in_recvhopopts);
193 }
194 /*
195 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
196 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
197 * options that appear before a routing header.
198 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
199 */
200 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
201 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
202 (recv_ancillary.crb_ipv6_recvdstopts &&
203 recv_ancillary.crb_ipv6_recvrthdr)) {
204 ancil_size += sizeof (struct T_opthdr) +
205 ipp->ipp_rthdrdstoptslen;
206 IP_STAT(ipst, conn_in_recvrthdrdstopts);
207 }
208 }
209 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
210 (ipp->ipp_fields & IPPF_RTHDR)) {
211 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
212 IP_STAT(ipst, conn_in_recvrthdr);
213 }
214 if ((recv_ancillary.crb_ipv6_recvdstopts ||
215 recv_ancillary.crb_old_ipv6_recvdstopts) &&
216 (ipp->ipp_fields & IPPF_DSTOPTS)) {
217 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
218 IP_STAT(ipst, conn_in_recvdstopts);
219 }
220 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
Casper H.S. Dik67dbe2b2009-11-20 20:58:43 +0100221 ancil_size += sizeof (struct T_opthdr) +
222 ucredminsize(ira->ira_cred);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800223 IP_STAT(ipst, conn_in_recvucred);
224 }
225
226 /*
227 * If SO_TIMESTAMP is set allocate the appropriate sized
228 * buffer. Since gethrestime() expects a pointer aligned
229 * argument, we allocate space necessary for extra
230 * alignment (even though it might not be used).
231 */
232 if (recv_ancillary.crb_timestamp) {
233 ancil_size += sizeof (struct T_opthdr) +
234 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
235 IP_STAT(ipst, conn_in_timestamp);
236 }
237
238 /*
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000239 * If IP_RECVTOS is set allocate the appropriately sized buffer
240 */
241 if (recv_ancillary.crb_recvtos &&
242 (ira->ira_flags & IRAF_IS_IPV4)) {
243 ancil_size += sizeof (struct T_opthdr) +
244 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
245 IP_STAT(ipst, conn_in_recvtos);
246 }
247
248 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800249 * If IP_RECVTTL is set allocate the appropriate sized buffer
250 */
251 if (recv_ancillary.crb_recvttl &&
252 (ira->ira_flags & IRAF_IS_IPV4)) {
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000253 ancil_size += sizeof (struct T_opthdr) +
254 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800255 IP_STAT(ipst, conn_in_recvttl);
256 }
257
258 return (ancil_size);
259}
260
261/*
262 * Lay down the ancillary data items at "ancil_buf".
263 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
264 * large buffer - ancil_size.
265 */
266void
267conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
268 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
269{
270 /*
271 * Copy in destination address before options to avoid
272 * any padding issues.
273 */
274 if (recv_ancillary.crb_recvdstaddr &&
275 (ira->ira_flags & IRAF_IS_IPV4)) {
276 struct T_opthdr *toh;
277 ipaddr_t *dstptr;
278
279 toh = (struct T_opthdr *)ancil_buf;
280 toh->level = IPPROTO_IP;
281 toh->name = IP_RECVDSTADDR;
282 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
283 toh->status = 0;
284 ancil_buf += sizeof (struct T_opthdr);
285 dstptr = (ipaddr_t *)ancil_buf;
286 *dstptr = ipp->ipp_addr_v4;
287 ancil_buf += sizeof (ipaddr_t);
288 ancil_size -= toh->len;
289 }
290
291 /*
292 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
293 * are different
294 */
295 if (recv_ancillary.crb_ip_recvpktinfo &&
296 connp->conn_family == AF_INET) {
297 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
298 struct T_opthdr *toh;
299 struct in_pktinfo *pktinfop;
300 ill_t *ill;
301 ipif_t *ipif;
302
303 toh = (struct T_opthdr *)ancil_buf;
304 toh->level = IPPROTO_IP;
305 toh->name = IP_PKTINFO;
306 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
307 toh->status = 0;
308 ancil_buf += sizeof (struct T_opthdr);
309 pktinfop = (struct in_pktinfo *)ancil_buf;
310
311 pktinfop->ipi_ifindex = ira->ira_ruifindex;
312 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
313
314 /* Find a good address to report */
315 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
316 if (ill != NULL) {
317 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
318 if (ipif != NULL) {
319 pktinfop->ipi_spec_dst.s_addr =
320 ipif->ipif_lcl_addr;
321 ipif_refrele(ipif);
322 }
323 ill_refrele(ill);
324 }
325 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
326 ancil_buf += sizeof (struct in_pktinfo);
327 ancil_size -= toh->len;
328 }
329
330 if ((recv_ancillary.crb_recvopts) &&
331 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
332 struct T_opthdr *toh;
333
334 toh = (struct T_opthdr *)ancil_buf;
335 toh->level = IPPROTO_IP;
336 toh->name = IP_RECVOPTS;
337 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
338 toh->status = 0;
339 ancil_buf += sizeof (struct T_opthdr);
340 bcopy(ipp->ipp_ipv4_options, ancil_buf,
341 ipp->ipp_ipv4_options_len);
342 ancil_buf += ipp->ipp_ipv4_options_len;
343 ancil_size -= toh->len;
344 }
345
346 if (recv_ancillary.crb_recvslla) {
347 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
348 struct T_opthdr *toh;
349 struct sockaddr_dl *dstptr;
350 ill_t *ill;
351 int alen = 0;
352
353 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
354 if (ill != NULL)
355 alen = ill->ill_phys_addr_length;
356
357 /*
358 * For loopback multicast and broadcast the packet arrives
359 * with ira_ruifdex being the physical interface, but
360 * ira_l2src is all zero since ip_postfrag_loopback doesn't
361 * know our l2src. We don't report the address in that case.
362 */
363 if (ira->ira_flags & IRAF_LOOPBACK)
364 alen = 0;
365
366 toh = (struct T_opthdr *)ancil_buf;
367 toh->level = IPPROTO_IP;
368 toh->name = IP_RECVSLLA;
369 toh->len = sizeof (struct T_opthdr) +
370 sizeof (struct sockaddr_dl);
371 toh->status = 0;
372 ancil_buf += sizeof (struct T_opthdr);
373 dstptr = (struct sockaddr_dl *)ancil_buf;
374 dstptr->sdl_family = AF_LINK;
375 dstptr->sdl_index = ira->ira_ruifindex;
376 if (ill != NULL)
377 dstptr->sdl_type = ill->ill_type;
378 else
379 dstptr->sdl_type = 0;
380 dstptr->sdl_nlen = 0;
381 dstptr->sdl_alen = alen;
382 dstptr->sdl_slen = 0;
383 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
384 ancil_buf += sizeof (struct sockaddr_dl);
385 ancil_size -= toh->len;
386 if (ill != NULL)
387 ill_refrele(ill);
388 }
389
390 if (recv_ancillary.crb_recvif) {
391 struct T_opthdr *toh;
392 uint_t *dstptr;
393
394 toh = (struct T_opthdr *)ancil_buf;
395 toh->level = IPPROTO_IP;
396 toh->name = IP_RECVIF;
397 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
398 toh->status = 0;
399 ancil_buf += sizeof (struct T_opthdr);
400 dstptr = (uint_t *)ancil_buf;
401 *dstptr = ira->ira_ruifindex;
402 ancil_buf += sizeof (uint_t);
403 ancil_size -= toh->len;
404 }
405
406 /*
407 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
408 * are different
409 */
410 if (recv_ancillary.crb_ip_recvpktinfo &&
411 connp->conn_family == AF_INET6) {
412 struct T_opthdr *toh;
413 struct in6_pktinfo *pkti;
414
415 toh = (struct T_opthdr *)ancil_buf;
416 toh->level = IPPROTO_IPV6;
417 toh->name = IPV6_PKTINFO;
418 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
419 toh->status = 0;
420 ancil_buf += sizeof (struct T_opthdr);
421 pkti = (struct in6_pktinfo *)ancil_buf;
422 if (ira->ira_flags & IRAF_IS_IPV4) {
423 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
424 &pkti->ipi6_addr);
425 } else {
426 pkti->ipi6_addr = ipp->ipp_addr;
427 }
428 pkti->ipi6_ifindex = ira->ira_ruifindex;
429
430 ancil_buf += sizeof (*pkti);
431 ancil_size -= toh->len;
432 }
433 if (recv_ancillary.crb_ipv6_recvhoplimit) {
434 struct T_opthdr *toh;
435
436 toh = (struct T_opthdr *)ancil_buf;
437 toh->level = IPPROTO_IPV6;
438 toh->name = IPV6_HOPLIMIT;
439 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
440 toh->status = 0;
441 ancil_buf += sizeof (struct T_opthdr);
442 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
443 ancil_buf += sizeof (uint_t);
444 ancil_size -= toh->len;
445 }
446 if (recv_ancillary.crb_ipv6_recvtclass) {
447 struct T_opthdr *toh;
448
449 toh = (struct T_opthdr *)ancil_buf;
450 toh->level = IPPROTO_IPV6;
451 toh->name = IPV6_TCLASS;
452 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
453 toh->status = 0;
454 ancil_buf += sizeof (struct T_opthdr);
455
456 if (ira->ira_flags & IRAF_IS_IPV4)
457 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
458 else
459 *(uint_t *)ancil_buf = ipp->ipp_tclass;
460 ancil_buf += sizeof (uint_t);
461 ancil_size -= toh->len;
462 }
463 if (recv_ancillary.crb_ipv6_recvhopopts &&
464 (ipp->ipp_fields & IPPF_HOPOPTS)) {
465 struct T_opthdr *toh;
466
467 toh = (struct T_opthdr *)ancil_buf;
468 toh->level = IPPROTO_IPV6;
469 toh->name = IPV6_HOPOPTS;
470 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
471 toh->status = 0;
472 ancil_buf += sizeof (struct T_opthdr);
473 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
474 ancil_buf += ipp->ipp_hopoptslen;
475 ancil_size -= toh->len;
476 }
477 /*
478 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
479 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
480 * options that appear before a routing header.
481 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
482 */
483 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
484 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
485 (recv_ancillary.crb_ipv6_recvdstopts &&
486 recv_ancillary.crb_ipv6_recvrthdr)) {
487 struct T_opthdr *toh;
488
489 toh = (struct T_opthdr *)ancil_buf;
490 toh->level = IPPROTO_IPV6;
491 toh->name = IPV6_DSTOPTS;
492 toh->len = sizeof (struct T_opthdr) +
493 ipp->ipp_rthdrdstoptslen;
494 toh->status = 0;
495 ancil_buf += sizeof (struct T_opthdr);
496 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
497 ipp->ipp_rthdrdstoptslen);
498 ancil_buf += ipp->ipp_rthdrdstoptslen;
499 ancil_size -= toh->len;
500 }
501 }
502 if (recv_ancillary.crb_ipv6_recvrthdr &&
503 (ipp->ipp_fields & IPPF_RTHDR)) {
504 struct T_opthdr *toh;
505
506 toh = (struct T_opthdr *)ancil_buf;
507 toh->level = IPPROTO_IPV6;
508 toh->name = IPV6_RTHDR;
509 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
510 toh->status = 0;
511 ancil_buf += sizeof (struct T_opthdr);
512 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
513 ancil_buf += ipp->ipp_rthdrlen;
514 ancil_size -= toh->len;
515 }
516 if ((recv_ancillary.crb_ipv6_recvdstopts ||
517 recv_ancillary.crb_old_ipv6_recvdstopts) &&
518 (ipp->ipp_fields & IPPF_DSTOPTS)) {
519 struct T_opthdr *toh;
520
521 toh = (struct T_opthdr *)ancil_buf;
522 toh->level = IPPROTO_IPV6;
523 toh->name = IPV6_DSTOPTS;
524 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
525 toh->status = 0;
526 ancil_buf += sizeof (struct T_opthdr);
527 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
528 ancil_buf += ipp->ipp_dstoptslen;
529 ancil_size -= toh->len;
530 }
531
532 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
533 struct T_opthdr *toh;
534 cred_t *rcr = connp->conn_cred;
535
536 toh = (struct T_opthdr *)ancil_buf;
537 toh->level = SOL_SOCKET;
538 toh->name = SCM_UCRED;
Casper H.S. Dik67dbe2b2009-11-20 20:58:43 +0100539 toh->len = sizeof (struct T_opthdr) +
540 ucredminsize(ira->ira_cred);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800541 toh->status = 0;
542 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
543 ancil_buf += toh->len;
544 ancil_size -= toh->len;
545 }
546 if (recv_ancillary.crb_timestamp) {
547 struct T_opthdr *toh;
548
549 toh = (struct T_opthdr *)ancil_buf;
550 toh->level = SOL_SOCKET;
551 toh->name = SCM_TIMESTAMP;
552 toh->len = sizeof (struct T_opthdr) +
553 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
554 toh->status = 0;
555 ancil_buf += sizeof (struct T_opthdr);
556 /* Align for gethrestime() */
557 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
558 sizeof (intptr_t));
559 gethrestime((timestruc_t *)ancil_buf);
560 ancil_buf = (uchar_t *)toh + toh->len;
561 ancil_size -= toh->len;
562 }
563
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000564 if (recv_ancillary.crb_recvtos &&
565 (ira->ira_flags & IRAF_IS_IPV4)) {
566 struct T_opthdr *toh;
567 uint8_t *dstptr;
568
569 toh = (struct T_opthdr *)ancil_buf;
570 toh->level = IPPROTO_IP;
571 toh->name = IP_RECVTOS;
572 toh->len = sizeof (struct T_opthdr) +
573 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
574 toh->status = 0;
575 ancil_buf += sizeof (struct T_opthdr);
576 dstptr = (uint8_t *)ancil_buf;
577 *dstptr = ipp->ipp_type_of_service;
578 ancil_buf = (uchar_t *)toh + toh->len;
579 ancil_size -= toh->len;
580 ASSERT(__TPI_TOPT_ISALIGNED(toh));
581 }
582
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800583 if (recv_ancillary.crb_recvttl &&
584 (ira->ira_flags & IRAF_IS_IPV4)) {
585 struct T_opthdr *toh;
586 uint8_t *dstptr;
587
588 toh = (struct T_opthdr *)ancil_buf;
589 toh->level = IPPROTO_IP;
590 toh->name = IP_RECVTTL;
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000591 toh->len = sizeof (struct T_opthdr) +
592 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800593 toh->status = 0;
594 ancil_buf += sizeof (struct T_opthdr);
595 dstptr = (uint8_t *)ancil_buf;
596 *dstptr = ipp->ipp_hoplimit;
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000597 ancil_buf = (uchar_t *)toh + toh->len;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800598 ancil_size -= toh->len;
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000599 ASSERT(__TPI_TOPT_ISALIGNED(toh));
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800600 }
601
602 /* Consumed all of allocated space */
603 ASSERT(ancil_size == 0);
604
605}
606
607/*
608 * This routine retrieves the current status of socket options.
609 * It returns the size of the option retrieved, or -1.
610 */
611int
612conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
613 uchar_t *ptr)
614{
615 int *i1 = (int *)ptr;
616 conn_t *connp = coa->coa_connp;
617 ip_xmit_attr_t *ixa = coa->coa_ixa;
618 ip_pkt_t *ipp = coa->coa_ipp;
619 ip_stack_t *ipst = ixa->ixa_ipst;
620 uint_t len;
621
622 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
623
624 switch (level) {
625 case SOL_SOCKET:
626 switch (name) {
627 case SO_DEBUG:
628 *i1 = connp->conn_debug ? SO_DEBUG : 0;
629 break; /* goto sizeof (int) option return */
630 case SO_KEEPALIVE:
631 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
632 break;
633 case SO_LINGER: {
634 struct linger *lgr = (struct linger *)ptr;
635
636 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
637 lgr->l_linger = connp->conn_lingertime;
638 }
639 return (sizeof (struct linger));
640
641 case SO_OOBINLINE:
642 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
643 break;
644 case SO_REUSEADDR:
645 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
646 break; /* goto sizeof (int) option return */
647 case SO_TYPE:
648 *i1 = connp->conn_so_type;
649 break; /* goto sizeof (int) option return */
650 case SO_DONTROUTE:
651 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
652 SO_DONTROUTE : 0;
653 break; /* goto sizeof (int) option return */
654 case SO_USELOOPBACK:
655 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
656 break; /* goto sizeof (int) option return */
657 case SO_BROADCAST:
658 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
659 break; /* goto sizeof (int) option return */
660
661 case SO_SNDBUF:
662 *i1 = connp->conn_sndbuf;
663 break; /* goto sizeof (int) option return */
664 case SO_RCVBUF:
665 *i1 = connp->conn_rcvbuf;
666 break; /* goto sizeof (int) option return */
667 case SO_RCVTIMEO:
668 case SO_SNDTIMEO:
669 /*
670 * Pass these two options in order for third part
671 * protocol usage. Here just return directly.
672 */
673 *i1 = 0;
674 break;
675 case SO_DGRAM_ERRIND:
676 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
677 break; /* goto sizeof (int) option return */
678 case SO_RECVUCRED:
679 *i1 = connp->conn_recv_ancillary.crb_recvucred;
680 break; /* goto sizeof (int) option return */
681 case SO_TIMESTAMP:
682 *i1 = connp->conn_recv_ancillary.crb_timestamp;
683 break; /* goto sizeof (int) option return */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800684 case SO_VRRP:
685 *i1 = connp->conn_isvrrp;
686 break; /* goto sizeof (int) option return */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800687 case SO_ANON_MLP:
688 *i1 = connp->conn_anon_mlp;
689 break; /* goto sizeof (int) option return */
690 case SO_MAC_EXEMPT:
691 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
692 break; /* goto sizeof (int) option return */
693 case SO_MAC_IMPLICIT:
694 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
695 break; /* goto sizeof (int) option return */
696 case SO_ALLZONES:
697 *i1 = connp->conn_allzones;
698 break; /* goto sizeof (int) option return */
699 case SO_EXCLBIND:
700 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
701 break;
702 case SO_PROTOTYPE:
703 *i1 = connp->conn_proto;
704 break;
705
706 case SO_DOMAIN:
707 *i1 = connp->conn_family;
708 break;
709 default:
710 return (-1);
711 }
712 break;
713 case IPPROTO_IP:
714 if (connp->conn_family != AF_INET)
715 return (-1);
716 switch (name) {
717 case IP_OPTIONS:
718 case T_IP_OPTIONS:
719 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
720 return (0);
721
722 len = ipp->ipp_ipv4_options_len;
723 if (len > 0) {
724 bcopy(ipp->ipp_ipv4_options, ptr, len);
725 }
726 return (len);
727
728 case IP_PKTINFO: {
729 /*
730 * This also handles IP_RECVPKTINFO.
731 * IP_PKTINFO and IP_RECVPKTINFO have same value.
732 * Differentiation is based on the size of the
733 * argument passed in.
734 */
735 struct in_pktinfo *pktinfo;
736
737#ifdef notdef
738 /* optcom doesn't provide a length with "get" */
739 if (inlen == sizeof (int)) {
740 /* This is IP_RECVPKTINFO option. */
741 *i1 = connp->conn_recv_ancillary.
742 crb_ip_recvpktinfo;
743 return (sizeof (int));
744 }
745#endif
746 /* XXX assumes that caller has room for max size! */
747
748 pktinfo = (struct in_pktinfo *)ptr;
749 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
750 if (ipp->ipp_fields & IPPF_ADDR)
751 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
752 else
753 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
754 return (sizeof (struct in_pktinfo));
755 }
756 case IP_DONTFRAG:
757 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
758 return (sizeof (int));
759 case IP_TOS:
760 case T_IP_TOS:
761 *i1 = (int)ipp->ipp_type_of_service;
762 break; /* goto sizeof (int) option return */
763 case IP_TTL:
764 *i1 = (int)ipp->ipp_unicast_hops;
765 break; /* goto sizeof (int) option return */
766 case IP_DHCPINIT_IF:
767 return (-1);
768 case IP_NEXTHOP:
769 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
770 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
771 return (sizeof (ipaddr_t));
772 } else {
773 return (0);
774 }
775
776 case IP_MULTICAST_IF:
777 /* 0 address if not set */
778 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
779 return (sizeof (ipaddr_t));
780 case IP_MULTICAST_TTL:
781 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
782 return (sizeof (uchar_t));
783 case IP_MULTICAST_LOOP:
784 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
785 return (sizeof (uint8_t));
786 case IP_RECVOPTS:
787 *i1 = connp->conn_recv_ancillary.crb_recvopts;
788 break; /* goto sizeof (int) option return */
789 case IP_RECVDSTADDR:
790 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
791 break; /* goto sizeof (int) option return */
792 case IP_RECVIF:
793 *i1 = connp->conn_recv_ancillary.crb_recvif;
794 break; /* goto sizeof (int) option return */
795 case IP_RECVSLLA:
796 *i1 = connp->conn_recv_ancillary.crb_recvslla;
797 break; /* goto sizeof (int) option return */
798 case IP_RECVTTL:
799 *i1 = connp->conn_recv_ancillary.crb_recvttl;
800 break; /* goto sizeof (int) option return */
Andy Fiddaman221e47f2020-09-18 20:04:57 +0000801 case IP_RECVTOS:
802 *i1 = connp->conn_recv_ancillary.crb_recvtos;
803 break; /* goto sizeof (int) option return */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800804 case IP_ADD_MEMBERSHIP:
805 case IP_DROP_MEMBERSHIP:
806 case MCAST_JOIN_GROUP:
807 case MCAST_LEAVE_GROUP:
808 case IP_BLOCK_SOURCE:
809 case IP_UNBLOCK_SOURCE:
810 case IP_ADD_SOURCE_MEMBERSHIP:
811 case IP_DROP_SOURCE_MEMBERSHIP:
812 case MCAST_BLOCK_SOURCE:
813 case MCAST_UNBLOCK_SOURCE:
814 case MCAST_JOIN_SOURCE_GROUP:
815 case MCAST_LEAVE_SOURCE_GROUP:
816 case MRT_INIT:
817 case MRT_DONE:
818 case MRT_ADD_VIF:
819 case MRT_DEL_VIF:
820 case MRT_ADD_MFC:
821 case MRT_DEL_MFC:
822 /* cannot "get" the value for these */
823 return (-1);
824 case MRT_VERSION:
825 case MRT_ASSERT:
826 (void) ip_mrouter_get(name, connp, ptr);
827 return (sizeof (int));
828 case IP_SEC_OPT:
829 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
830 IPSEC_AF_V4));
831 case IP_BOUND_IF:
832 /* Zero if not set */
833 *i1 = connp->conn_bound_if;
834 break; /* goto sizeof (int) option return */
835 case IP_UNSPEC_SRC:
836 *i1 = connp->conn_unspec_src;
837 break; /* goto sizeof (int) option return */
838 case IP_BROADCAST_TTL:
839 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
840 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
841 else
842 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
843 return (sizeof (uchar_t));
844 default:
845 return (-1);
846 }
847 break;
848 case IPPROTO_IPV6:
849 if (connp->conn_family != AF_INET6)
850 return (-1);
851 switch (name) {
852 case IPV6_UNICAST_HOPS:
853 *i1 = (int)ipp->ipp_unicast_hops;
854 break; /* goto sizeof (int) option return */
855 case IPV6_MULTICAST_IF:
856 /* 0 index if not set */
857 *i1 = ixa->ixa_multicast_ifindex;
858 break; /* goto sizeof (int) option return */
859 case IPV6_MULTICAST_HOPS:
860 *i1 = ixa->ixa_multicast_ttl;
861 break; /* goto sizeof (int) option return */
862 case IPV6_MULTICAST_LOOP:
863 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
864 break; /* goto sizeof (int) option return */
865 case IPV6_JOIN_GROUP:
866 case IPV6_LEAVE_GROUP:
867 case MCAST_JOIN_GROUP:
868 case MCAST_LEAVE_GROUP:
869 case MCAST_BLOCK_SOURCE:
870 case MCAST_UNBLOCK_SOURCE:
871 case MCAST_JOIN_SOURCE_GROUP:
872 case MCAST_LEAVE_SOURCE_GROUP:
873 /* cannot "get" the value for these */
874 return (-1);
875 case IPV6_BOUND_IF:
876 /* Zero if not set */
877 *i1 = connp->conn_bound_if;
878 break; /* goto sizeof (int) option return */
879 case IPV6_UNSPEC_SRC:
880 *i1 = connp->conn_unspec_src;
881 break; /* goto sizeof (int) option return */
882 case IPV6_RECVPKTINFO:
883 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
884 break; /* goto sizeof (int) option return */
885 case IPV6_RECVTCLASS:
886 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
887 break; /* goto sizeof (int) option return */
888 case IPV6_RECVPATHMTU:
889 *i1 = connp->conn_ipv6_recvpathmtu;
890 break; /* goto sizeof (int) option return */
891 case IPV6_RECVHOPLIMIT:
892 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
893 break; /* goto sizeof (int) option return */
894 case IPV6_RECVHOPOPTS:
895 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
896 break; /* goto sizeof (int) option return */
897 case IPV6_RECVDSTOPTS:
898 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
899 break; /* goto sizeof (int) option return */
900 case _OLD_IPV6_RECVDSTOPTS:
901 *i1 =
902 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
903 break; /* goto sizeof (int) option return */
904 case IPV6_RECVRTHDRDSTOPTS:
905 *i1 = connp->conn_recv_ancillary.
906 crb_ipv6_recvrthdrdstopts;
907 break; /* goto sizeof (int) option return */
908 case IPV6_RECVRTHDR:
909 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
910 break; /* goto sizeof (int) option return */
911 case IPV6_PKTINFO: {
912 /* XXX assumes that caller has room for max size! */
913 struct in6_pktinfo *pkti;
914
915 pkti = (struct in6_pktinfo *)ptr;
916 pkti->ipi6_ifindex = ixa->ixa_ifindex;
917 if (ipp->ipp_fields & IPPF_ADDR)
918 pkti->ipi6_addr = ipp->ipp_addr;
919 else
920 pkti->ipi6_addr = ipv6_all_zeros;
921 return (sizeof (struct in6_pktinfo));
922 }
923 case IPV6_TCLASS:
924 *i1 = ipp->ipp_tclass;
925 break; /* goto sizeof (int) option return */
926 case IPV6_NEXTHOP: {
927 sin6_t *sin6 = (sin6_t *)ptr;
928
929 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
930 return (0);
931
932 *sin6 = sin6_null;
933 sin6->sin6_family = AF_INET6;
934 sin6->sin6_addr = ixa->ixa_nexthop_v6;
935
936 return (sizeof (sin6_t));
937 }
938 case IPV6_HOPOPTS:
939 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
940 return (0);
941 bcopy(ipp->ipp_hopopts, ptr,
942 ipp->ipp_hopoptslen);
943 return (ipp->ipp_hopoptslen);
944 case IPV6_RTHDRDSTOPTS:
945 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
946 return (0);
947 bcopy(ipp->ipp_rthdrdstopts, ptr,
948 ipp->ipp_rthdrdstoptslen);
949 return (ipp->ipp_rthdrdstoptslen);
950 case IPV6_RTHDR:
951 if (!(ipp->ipp_fields & IPPF_RTHDR))
952 return (0);
953 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
954 return (ipp->ipp_rthdrlen);
955 case IPV6_DSTOPTS:
956 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
957 return (0);
958 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
959 return (ipp->ipp_dstoptslen);
960 case IPV6_PATHMTU:
961 return (ip_fill_mtuinfo(connp, ixa,
962 (struct ip6_mtuinfo *)ptr));
963 case IPV6_SEC_OPT:
964 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
965 IPSEC_AF_V6));
966 case IPV6_SRC_PREFERENCES:
967 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
968 case IPV6_DONTFRAG:
969 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
970 return (sizeof (int));
971 case IPV6_USE_MIN_MTU:
972 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
973 *i1 = ixa->ixa_use_min_mtu;
974 else
975 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
976 break;
977 case IPV6_V6ONLY:
978 *i1 = connp->conn_ipv6_v6only;
979 return (sizeof (int));
980 default:
981 return (-1);
982 }
983 break;
984 case IPPROTO_UDP:
985 switch (name) {
986 case UDP_ANONPRIVBIND:
987 *i1 = connp->conn_anon_priv_bind;
988 break;
989 case UDP_EXCLBIND:
990 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
991 break;
992 default:
993 return (-1);
994 }
995 break;
996 case IPPROTO_TCP:
997 switch (name) {
998 case TCP_RECVDSTADDR:
999 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
1000 break;
1001 case TCP_ANONPRIVBIND:
1002 *i1 = connp->conn_anon_priv_bind;
1003 break;
1004 case TCP_EXCLBIND:
1005 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
1006 break;
1007 default:
1008 return (-1);
1009 }
1010 break;
1011 default:
1012 return (-1);
1013 }
1014 return (sizeof (int));
1015}
1016
1017static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
1018 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1019static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
1020 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1021static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
1022 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1023static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1024 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1025static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1026 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1027
1028/*
1029 * This routine sets the most common socket options including some
1030 * that are transport/ULP specific.
1031 * It returns errno or zero.
1032 *
1033 * For fixed length options, there is no sanity check
1034 * of passed in length is done. It is assumed *_optcom_req()
1035 * routines do the right thing.
1036 */
1037int
1038conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1039 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1040{
1041 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1042
1043 /* We have different functions for different levels */
1044 switch (level) {
1045 case SOL_SOCKET:
1046 return (conn_opt_set_socket(coa, name, inlen, invalp,
1047 checkonly, cr));
1048 case IPPROTO_IP:
1049 return (conn_opt_set_ip(coa, name, inlen, invalp,
1050 checkonly, cr));
1051 case IPPROTO_IPV6:
1052 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1053 checkonly, cr));
1054 case IPPROTO_UDP:
1055 return (conn_opt_set_udp(coa, name, inlen, invalp,
1056 checkonly, cr));
1057 case IPPROTO_TCP:
1058 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1059 checkonly, cr));
1060 default:
1061 return (0);
1062 }
1063}
1064
1065/*
1066 * Handle SOL_SOCKET
1067 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1068 * it implement their own checks and setting of conn_proto.
1069 */
1070/* ARGSUSED1 */
1071static int
1072conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1073 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1074{
1075 conn_t *connp = coa->coa_connp;
1076 ip_xmit_attr_t *ixa = coa->coa_ixa;
1077 int *i1 = (int *)invalp;
1078 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1079
1080 switch (name) {
1081 case SO_ALLZONES:
1082 if (IPCL_IS_BOUND(connp))
1083 return (EINVAL);
1084 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001085 case SO_VRRP:
1086 if (secpolicy_ip_config(cr, checkonly) != 0)
1087 return (EACCES);
1088 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001089 case SO_MAC_EXEMPT:
1090 if (secpolicy_net_mac_aware(cr) != 0)
1091 return (EACCES);
1092 if (IPCL_IS_BOUND(connp))
1093 return (EINVAL);
1094 break;
1095 case SO_MAC_IMPLICIT:
1096 if (secpolicy_net_mac_implicit(cr) != 0)
1097 return (EACCES);
1098 break;
1099 }
1100 if (checkonly)
1101 return (0);
1102
1103 mutex_enter(&connp->conn_lock);
1104 /* Here we set the actual option value */
1105 switch (name) {
1106 case SO_DEBUG:
1107 connp->conn_debug = onoff;
1108 break;
1109 case SO_KEEPALIVE:
1110 connp->conn_keepalive = onoff;
1111 break;
1112 case SO_LINGER: {
1113 struct linger *lgr = (struct linger *)invalp;
1114
1115 if (lgr->l_onoff) {
1116 connp->conn_linger = 1;
1117 connp->conn_lingertime = lgr->l_linger;
1118 } else {
1119 connp->conn_linger = 0;
1120 connp->conn_lingertime = 0;
1121 }
1122 break;
1123 }
1124 case SO_OOBINLINE:
1125 connp->conn_oobinline = onoff;
1126 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1127 break;
1128 case SO_REUSEADDR:
1129 connp->conn_reuseaddr = onoff;
1130 break;
1131 case SO_DONTROUTE:
1132 if (onoff)
1133 ixa->ixa_flags |= IXAF_DONTROUTE;
1134 else
1135 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1136 coa->coa_changed |= COA_ROUTE_CHANGED;
1137 break;
1138 case SO_USELOOPBACK:
1139 connp->conn_useloopback = onoff;
1140 break;
1141 case SO_BROADCAST:
1142 connp->conn_broadcast = onoff;
1143 break;
1144 case SO_SNDBUF:
1145 /* ULP has range checked the value */
1146 connp->conn_sndbuf = *i1;
1147 coa->coa_changed |= COA_SNDBUF_CHANGED;
1148 break;
1149 case SO_RCVBUF:
1150 /* ULP has range checked the value */
1151 connp->conn_rcvbuf = *i1;
1152 coa->coa_changed |= COA_RCVBUF_CHANGED;
1153 break;
1154 case SO_RCVTIMEO:
1155 case SO_SNDTIMEO:
1156 /*
1157 * Pass these two options in order for third part
1158 * protocol usage.
1159 */
1160 break;
1161 case SO_DGRAM_ERRIND:
1162 connp->conn_dgram_errind = onoff;
1163 break;
1164 case SO_RECVUCRED:
1165 connp->conn_recv_ancillary.crb_recvucred = onoff;
1166 break;
1167 case SO_ALLZONES:
1168 connp->conn_allzones = onoff;
1169 coa->coa_changed |= COA_ROUTE_CHANGED;
1170 if (onoff)
1171 ixa->ixa_zoneid = ALL_ZONES;
1172 else
1173 ixa->ixa_zoneid = connp->conn_zoneid;
1174 break;
1175 case SO_TIMESTAMP:
1176 connp->conn_recv_ancillary.crb_timestamp = onoff;
1177 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001178 case SO_VRRP:
1179 connp->conn_isvrrp = onoff;
1180 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001181 case SO_ANON_MLP:
1182 connp->conn_anon_mlp = onoff;
1183 break;
1184 case SO_MAC_EXEMPT:
1185 connp->conn_mac_mode = onoff ?
1186 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1187 break;
1188 case SO_MAC_IMPLICIT:
1189 connp->conn_mac_mode = onoff ?
1190 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1191 break;
1192 case SO_EXCLBIND:
1193 connp->conn_exclbind = onoff;
1194 break;
1195 }
1196 mutex_exit(&connp->conn_lock);
1197 return (0);
1198}
1199
1200/* Handle IPPROTO_IP */
1201static int
1202conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1203 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1204{
1205 conn_t *connp = coa->coa_connp;
1206 ip_xmit_attr_t *ixa = coa->coa_ixa;
1207 ip_pkt_t *ipp = coa->coa_ipp;
1208 int *i1 = (int *)invalp;
1209 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1210 ipaddr_t addr = (ipaddr_t)*i1;
1211 uint_t ifindex;
1212 zoneid_t zoneid = IPCL_ZONEID(connp);
1213 ipif_t *ipif;
1214 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1215 int error;
1216
1217 if (connp->conn_family != AF_INET)
1218 return (EINVAL);
1219
Toomas Soomeab82c292019-12-28 14:24:51 +02001220 ifindex = UINT_MAX;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001221 switch (name) {
1222 case IP_TTL:
1223 /* Don't allow zero */
1224 if (*i1 < 1 || *i1 > 255)
1225 return (EINVAL);
1226 break;
1227 case IP_MULTICAST_IF:
1228 if (addr == INADDR_ANY) {
1229 /* Clear */
1230 ifindex = 0;
1231 break;
1232 }
1233 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1234 if (ipif == NULL)
1235 return (EHOSTUNREACH);
1236 /* not supported by the virtual network iface */
1237 if (IS_VNI(ipif->ipif_ill)) {
1238 ipif_refrele(ipif);
1239 return (EINVAL);
1240 }
1241 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1242 ipif_refrele(ipif);
1243 break;
1244 case IP_NEXTHOP: {
1245 ire_t *ire;
1246
1247 if (addr == INADDR_ANY) {
1248 /* Clear */
1249 break;
1250 }
1251 /* Verify that the next-hop is on-link */
1252 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1253 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1254 if (ire == NULL)
1255 return (EHOSTUNREACH);
1256 ire_refrele(ire);
1257 break;
1258 }
1259 case IP_OPTIONS:
1260 case T_IP_OPTIONS: {
1261 uint_t newlen;
1262
1263 if (ipp->ipp_fields & IPPF_LABEL_V4)
1264 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1265 else
1266 newlen = inlen;
1267 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1268 return (EINVAL);
1269 }
1270 break;
1271 }
1272 case IP_PKTINFO: {
1273 struct in_pktinfo *pktinfo;
1274
1275 /* Two different valid lengths */
1276 if (inlen != sizeof (int) &&
1277 inlen != sizeof (struct in_pktinfo))
1278 return (EINVAL);
1279 if (inlen == sizeof (int))
1280 break;
1281
1282 pktinfo = (struct in_pktinfo *)invalp;
1283 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1284 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1285 zoneid, ipst, B_FALSE)) {
1286 case IPVL_UNICAST_UP:
1287 case IPVL_UNICAST_DOWN:
1288 break;
1289 default:
1290 return (EADDRNOTAVAIL);
1291 }
1292 }
Erik Nordmarkb1b66e02010-01-04 23:29:24 -08001293 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1294 B_FALSE, ipst))
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001295 return (ENXIO);
1296 break;
1297 }
1298 case IP_BOUND_IF:
1299 ifindex = *(uint_t *)i1;
1300
1301 /* Just check it is ok. */
Erik Nordmarkb1b66e02010-01-04 23:29:24 -08001302 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001303 return (ENXIO);
1304 break;
1305 }
1306 if (checkonly)
1307 return (0);
1308
1309 /* Here we set the actual option value */
1310 /*
1311 * conn_lock protects the bitfields, and is used to
1312 * set the fields atomically. Not needed for ixa settings since
1313 * the caller has an exclusive copy of the ixa.
1314 * We can not hold conn_lock across the multicast options though.
1315 */
1316 switch (name) {
1317 case IP_OPTIONS:
1318 case T_IP_OPTIONS:
1319 /* Save options for use by IP. */
1320 mutex_enter(&connp->conn_lock);
1321 error = optcom_pkt_set(invalp, inlen,
1322 (uchar_t **)&ipp->ipp_ipv4_options,
1323 &ipp->ipp_ipv4_options_len);
1324 if (error != 0) {
1325 mutex_exit(&connp->conn_lock);
1326 return (error);
1327 }
1328 if (ipp->ipp_ipv4_options_len == 0) {
1329 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1330 } else {
1331 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1332 }
1333 mutex_exit(&connp->conn_lock);
1334 coa->coa_changed |= COA_HEADER_CHANGED;
1335 coa->coa_changed |= COA_WROFF_CHANGED;
1336 break;
1337
1338 case IP_TTL:
1339 mutex_enter(&connp->conn_lock);
1340 ipp->ipp_unicast_hops = *i1;
1341 mutex_exit(&connp->conn_lock);
1342 coa->coa_changed |= COA_HEADER_CHANGED;
1343 break;
1344 case IP_TOS:
1345 case T_IP_TOS:
1346 mutex_enter(&connp->conn_lock);
1347 if (*i1 == -1) {
1348 ipp->ipp_type_of_service = 0;
1349 } else {
1350 ipp->ipp_type_of_service = *i1;
1351 }
1352 mutex_exit(&connp->conn_lock);
1353 coa->coa_changed |= COA_HEADER_CHANGED;
1354 break;
1355 case IP_MULTICAST_IF:
1356 ixa->ixa_multicast_ifindex = ifindex;
1357 ixa->ixa_multicast_ifaddr = addr;
1358 coa->coa_changed |= COA_ROUTE_CHANGED;
1359 break;
1360 case IP_MULTICAST_TTL:
1361 ixa->ixa_multicast_ttl = *invalp;
1362 /* Handled automatically by ip_output */
1363 break;
1364 case IP_MULTICAST_LOOP:
1365 if (*invalp != 0)
1366 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1367 else
1368 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1369 /* Handled automatically by ip_output */
1370 break;
1371 case IP_RECVOPTS:
1372 mutex_enter(&connp->conn_lock);
1373 connp->conn_recv_ancillary.crb_recvopts = onoff;
1374 mutex_exit(&connp->conn_lock);
1375 break;
1376 case IP_RECVDSTADDR:
1377 mutex_enter(&connp->conn_lock);
1378 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1379 mutex_exit(&connp->conn_lock);
1380 break;
1381 case IP_RECVIF:
1382 mutex_enter(&connp->conn_lock);
1383 connp->conn_recv_ancillary.crb_recvif = onoff;
1384 mutex_exit(&connp->conn_lock);
1385 break;
1386 case IP_RECVSLLA:
1387 mutex_enter(&connp->conn_lock);
1388 connp->conn_recv_ancillary.crb_recvslla = onoff;
1389 mutex_exit(&connp->conn_lock);
1390 break;
1391 case IP_RECVTTL:
1392 mutex_enter(&connp->conn_lock);
1393 connp->conn_recv_ancillary.crb_recvttl = onoff;
1394 mutex_exit(&connp->conn_lock);
1395 break;
Andy Fiddaman221e47f2020-09-18 20:04:57 +00001396 case IP_RECVTOS:
1397 mutex_enter(&connp->conn_lock);
1398 connp->conn_recv_ancillary.crb_recvtos = onoff;
1399 mutex_exit(&connp->conn_lock);
1400 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001401 case IP_PKTINFO: {
1402 /*
1403 * This also handles IP_RECVPKTINFO.
1404 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1405 * Differentiation is based on the size of the
1406 * argument passed in.
1407 */
1408 struct in_pktinfo *pktinfo;
1409
1410 if (inlen == sizeof (int)) {
1411 /* This is IP_RECVPKTINFO option. */
1412 mutex_enter(&connp->conn_lock);
1413 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1414 onoff;
1415 mutex_exit(&connp->conn_lock);
1416 break;
1417 }
1418
1419 /* This is IP_PKTINFO option. */
1420 mutex_enter(&connp->conn_lock);
1421 pktinfo = (struct in_pktinfo *)invalp;
Marcel Telkaac3e5be2010-06-10 08:35:29 +02001422 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001423 ipp->ipp_fields |= IPPF_ADDR;
1424 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1425 &ipp->ipp_addr);
1426 } else {
1427 ipp->ipp_fields &= ~IPPF_ADDR;
1428 ipp->ipp_addr = ipv6_all_zeros;
1429 }
1430 mutex_exit(&connp->conn_lock);
1431 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1432 coa->coa_changed |= COA_ROUTE_CHANGED;
1433 coa->coa_changed |= COA_HEADER_CHANGED;
1434 break;
1435 }
1436 case IP_DONTFRAG:
1437 if (onoff) {
1438 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1439 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1440 } else {
1441 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1442 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1443 }
1444 /* Need to redo ip_attr_connect */
1445 coa->coa_changed |= COA_ROUTE_CHANGED;
1446 break;
1447 case IP_ADD_MEMBERSHIP:
1448 case IP_DROP_MEMBERSHIP:
1449 case MCAST_JOIN_GROUP:
1450 case MCAST_LEAVE_GROUP:
1451 return (ip_opt_set_multicast_group(connp, name,
1452 invalp, B_FALSE, checkonly));
1453
1454 case IP_BLOCK_SOURCE:
1455 case IP_UNBLOCK_SOURCE:
1456 case IP_ADD_SOURCE_MEMBERSHIP:
1457 case IP_DROP_SOURCE_MEMBERSHIP:
1458 case MCAST_BLOCK_SOURCE:
1459 case MCAST_UNBLOCK_SOURCE:
1460 case MCAST_JOIN_SOURCE_GROUP:
1461 case MCAST_LEAVE_SOURCE_GROUP:
1462 return (ip_opt_set_multicast_sources(connp, name,
1463 invalp, B_FALSE, checkonly));
1464
1465 case IP_SEC_OPT:
1466 mutex_enter(&connp->conn_lock);
1467 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1468 mutex_exit(&connp->conn_lock);
1469 if (error != 0) {
1470 return (error);
1471 }
1472 /* This is an IPsec policy change - redo ip_attr_connect */
1473 coa->coa_changed |= COA_ROUTE_CHANGED;
1474 break;
1475 case IP_NEXTHOP:
1476 ixa->ixa_nexthop_v4 = addr;
1477 if (addr != INADDR_ANY)
1478 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1479 else
1480 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1481 coa->coa_changed |= COA_ROUTE_CHANGED;
1482 break;
1483
1484 case IP_BOUND_IF:
1485 ixa->ixa_ifindex = ifindex; /* Send */
1486 mutex_enter(&connp->conn_lock);
1487 connp->conn_incoming_ifindex = ifindex; /* Receive */
1488 connp->conn_bound_if = ifindex; /* getsockopt */
1489 mutex_exit(&connp->conn_lock);
1490 coa->coa_changed |= COA_ROUTE_CHANGED;
1491 break;
1492 case IP_UNSPEC_SRC:
1493 mutex_enter(&connp->conn_lock);
1494 connp->conn_unspec_src = onoff;
1495 if (onoff)
1496 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1497 else
1498 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1499
1500 mutex_exit(&connp->conn_lock);
1501 break;
1502 case IP_BROADCAST_TTL:
1503 ixa->ixa_broadcast_ttl = *invalp;
1504 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1505 /* Handled automatically by ip_output */
1506 break;
1507 case MRT_INIT:
1508 case MRT_DONE:
1509 case MRT_ADD_VIF:
1510 case MRT_DEL_VIF:
1511 case MRT_ADD_MFC:
1512 case MRT_DEL_MFC:
1513 case MRT_ASSERT:
1514 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1515 return (error);
1516 }
1517 error = ip_mrouter_set((int)name, connp, checkonly,
1518 (uchar_t *)invalp, inlen);
1519 if (error) {
1520 return (error);
1521 }
1522 return (0);
1523
1524 }
1525 return (0);
1526}
1527
1528/* Handle IPPROTO_IPV6 */
1529static int
1530conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1531 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1532{
1533 conn_t *connp = coa->coa_connp;
1534 ip_xmit_attr_t *ixa = coa->coa_ixa;
1535 ip_pkt_t *ipp = coa->coa_ipp;
1536 int *i1 = (int *)invalp;
1537 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1538 uint_t ifindex;
1539 zoneid_t zoneid = IPCL_ZONEID(connp);
1540 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1541 int error;
1542
1543 if (connp->conn_family != AF_INET6)
1544 return (EINVAL);
1545
Toomas Soomeab82c292019-12-28 14:24:51 +02001546 ifindex = UINT_MAX;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001547 switch (name) {
1548 case IPV6_MULTICAST_IF:
1549 /*
1550 * The only possible error is EINVAL.
1551 * We call this option on both V4 and V6
1552 * If both fail, then this call returns
1553 * EINVAL. If at least one of them succeeds we
1554 * return success.
1555 */
1556 ifindex = *(uint_t *)i1;
1557
Erik Nordmarkb1b66e02010-01-04 23:29:24 -08001558 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1559 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001560 return (EINVAL);
1561 break;
1562 case IPV6_UNICAST_HOPS:
1563 /* Don't allow zero. -1 means to use default */
1564 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1565 return (EINVAL);
1566 break;
1567 case IPV6_MULTICAST_HOPS:
1568 /* -1 means use default */
1569 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1570 return (EINVAL);
1571 break;
1572 case IPV6_MULTICAST_LOOP:
1573 if (*i1 != 0 && *i1 != 1)
1574 return (EINVAL);
1575 break;
1576 case IPV6_BOUND_IF:
1577 ifindex = *(uint_t *)i1;
1578
Erik Nordmarkb1b66e02010-01-04 23:29:24 -08001579 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001580 return (ENXIO);
1581 break;
1582 case IPV6_PKTINFO: {
1583 struct in6_pktinfo *pkti;
1584 boolean_t isv6;
1585
1586 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1587 return (EINVAL);
1588 if (inlen == 0)
1589 break; /* Clear values below */
1590
1591 /*
1592 * Verify the source address and ifindex. Privileged users
1593 * can use any source address.
1594 */
1595 pkti = (struct in6_pktinfo *)invalp;
1596
1597 /*
1598 * For link-local addresses we use the ipi6_ifindex when
1599 * we verify the local address.
1600 * If net_rawaccess then any source address can be used.
1601 */
1602 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1603 secpolicy_net_rawaccess(cr) != 0) {
1604 uint_t scopeid = 0;
1605 in6_addr_t *v6src = &pkti->ipi6_addr;
1606 ipaddr_t v4src;
1607 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1608
1609 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1610 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1611 if (v4src != INADDR_ANY) {
1612 laddr_type = ip_laddr_verify_v4(v4src,
1613 zoneid, ipst, B_FALSE);
1614 }
1615 } else {
1616 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1617 scopeid = pkti->ipi6_ifindex;
1618
1619 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1620 ipst, B_FALSE, scopeid);
1621 }
1622 switch (laddr_type) {
1623 case IPVL_UNICAST_UP:
1624 case IPVL_UNICAST_DOWN:
1625 break;
1626 default:
1627 return (EADDRNOTAVAIL);
1628 }
1629 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1630 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1631 /* Allow any source */
1632 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1633 }
1634 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
Erik Nordmarkb1b66e02010-01-04 23:29:24 -08001635 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1636 ipst))
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001637 return (ENXIO);
1638 break;
1639 }
1640 case IPV6_HOPLIMIT:
1641 /* It is only allowed as ancilary data */
1642 if (!coa->coa_ancillary)
1643 return (EINVAL);
1644
1645 if (inlen != 0 && inlen != sizeof (int))
1646 return (EINVAL);
1647 if (inlen == sizeof (int)) {
1648 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1649 return (EINVAL);
1650 }
1651 break;
1652 case IPV6_TCLASS:
1653 if (inlen != 0 && inlen != sizeof (int))
1654 return (EINVAL);
1655 if (inlen == sizeof (int)) {
1656 if (*i1 > 255 || *i1 < -1)
1657 return (EINVAL);
1658 }
1659 break;
1660 case IPV6_NEXTHOP:
1661 if (inlen != 0 && inlen != sizeof (sin6_t))
1662 return (EINVAL);
1663 if (inlen == sizeof (sin6_t)) {
1664 sin6_t *sin6 = (sin6_t *)invalp;
1665 ire_t *ire;
1666
1667 if (sin6->sin6_family != AF_INET6)
1668 return (EAFNOSUPPORT);
1669 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1670 return (EADDRNOTAVAIL);
1671
1672 /* Verify that the next-hop is on-link */
1673 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1674 0, 0, IRE_ONLINK, NULL, zoneid,
1675 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1676 if (ire == NULL)
1677 return (EHOSTUNREACH);
1678 ire_refrele(ire);
1679 break;
1680 }
1681 break;
1682 case IPV6_RTHDR:
1683 case IPV6_DSTOPTS:
1684 case IPV6_RTHDRDSTOPTS:
1685 case IPV6_HOPOPTS: {
1686 /* All have the length field in the same place */
1687 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1688 /*
1689 * Sanity checks - minimum size, size a multiple of
1690 * eight bytes, and matching size passed in.
1691 */
1692 if (inlen != 0 &&
1693 inlen != (8 * (hopts->ip6h_len + 1)))
1694 return (EINVAL);
1695 break;
1696 }
1697 case IPV6_PATHMTU:
1698 /* Can't be set */
1699 return (EINVAL);
1700
1701 case IPV6_USE_MIN_MTU:
1702 if (inlen != sizeof (int))
1703 return (EINVAL);
1704 if (*i1 < -1 || *i1 > 1)
1705 return (EINVAL);
1706 break;
1707 case IPV6_SRC_PREFERENCES:
1708 if (inlen != sizeof (uint32_t))
1709 return (EINVAL);
1710 break;
1711 case IPV6_V6ONLY:
1712 if (*i1 < 0 || *i1 > 1) {
1713 return (EINVAL);
1714 }
1715 break;
1716 }
1717 if (checkonly)
1718 return (0);
1719
1720 /* Here we set the actual option value */
1721 /*
1722 * conn_lock protects the bitfields, and is used to
1723 * set the fields atomically. Not needed for ixa settings since
1724 * the caller has an exclusive copy of the ixa.
1725 * We can not hold conn_lock across the multicast options though.
1726 */
1727 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1728 switch (name) {
1729 case IPV6_MULTICAST_IF:
1730 ixa->ixa_multicast_ifindex = ifindex;
1731 /* Need to redo ip_attr_connect */
1732 coa->coa_changed |= COA_ROUTE_CHANGED;
1733 break;
1734 case IPV6_UNICAST_HOPS:
1735 /* -1 means use default */
1736 mutex_enter(&connp->conn_lock);
1737 if (*i1 == -1) {
1738 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1739 } else {
1740 ipp->ipp_unicast_hops = (uint8_t)*i1;
1741 }
1742 mutex_exit(&connp->conn_lock);
1743 coa->coa_changed |= COA_HEADER_CHANGED;
1744 break;
1745 case IPV6_MULTICAST_HOPS:
1746 /* -1 means use default */
1747 if (*i1 == -1) {
1748 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1749 } else {
1750 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1751 }
1752 /* Handled automatically by ip_output */
1753 break;
1754 case IPV6_MULTICAST_LOOP:
1755 if (*i1 != 0)
1756 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1757 else
1758 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1759 /* Handled automatically by ip_output */
1760 break;
1761 case IPV6_JOIN_GROUP:
1762 case IPV6_LEAVE_GROUP:
1763 case MCAST_JOIN_GROUP:
1764 case MCAST_LEAVE_GROUP:
1765 return (ip_opt_set_multicast_group(connp, name,
1766 invalp, B_TRUE, checkonly));
1767
1768 case MCAST_BLOCK_SOURCE:
1769 case MCAST_UNBLOCK_SOURCE:
1770 case MCAST_JOIN_SOURCE_GROUP:
1771 case MCAST_LEAVE_SOURCE_GROUP:
1772 return (ip_opt_set_multicast_sources(connp, name,
1773 invalp, B_TRUE, checkonly));
1774
1775 case IPV6_BOUND_IF:
1776 ixa->ixa_ifindex = ifindex; /* Send */
1777 mutex_enter(&connp->conn_lock);
1778 connp->conn_incoming_ifindex = ifindex; /* Receive */
1779 connp->conn_bound_if = ifindex; /* getsockopt */
1780 mutex_exit(&connp->conn_lock);
1781 coa->coa_changed |= COA_ROUTE_CHANGED;
1782 break;
1783 case IPV6_UNSPEC_SRC:
1784 mutex_enter(&connp->conn_lock);
1785 connp->conn_unspec_src = onoff;
1786 if (onoff)
1787 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1788 else
1789 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1790 mutex_exit(&connp->conn_lock);
1791 break;
1792 case IPV6_RECVPKTINFO:
1793 mutex_enter(&connp->conn_lock);
1794 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1795 mutex_exit(&connp->conn_lock);
1796 break;
1797 case IPV6_RECVTCLASS:
1798 mutex_enter(&connp->conn_lock);
1799 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1800 mutex_exit(&connp->conn_lock);
1801 break;
1802 case IPV6_RECVPATHMTU:
1803 mutex_enter(&connp->conn_lock);
1804 connp->conn_ipv6_recvpathmtu = onoff;
1805 mutex_exit(&connp->conn_lock);
1806 break;
1807 case IPV6_RECVHOPLIMIT:
1808 mutex_enter(&connp->conn_lock);
1809 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1810 onoff;
1811 mutex_exit(&connp->conn_lock);
1812 break;
1813 case IPV6_RECVHOPOPTS:
1814 mutex_enter(&connp->conn_lock);
1815 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1816 mutex_exit(&connp->conn_lock);
1817 break;
1818 case IPV6_RECVDSTOPTS:
1819 mutex_enter(&connp->conn_lock);
1820 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1821 mutex_exit(&connp->conn_lock);
1822 break;
1823 case _OLD_IPV6_RECVDSTOPTS:
1824 mutex_enter(&connp->conn_lock);
1825 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1826 onoff;
1827 mutex_exit(&connp->conn_lock);
1828 break;
1829 case IPV6_RECVRTHDRDSTOPTS:
1830 mutex_enter(&connp->conn_lock);
1831 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1832 onoff;
1833 mutex_exit(&connp->conn_lock);
1834 break;
1835 case IPV6_RECVRTHDR:
1836 mutex_enter(&connp->conn_lock);
1837 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1838 mutex_exit(&connp->conn_lock);
1839 break;
1840 case IPV6_PKTINFO:
1841 mutex_enter(&connp->conn_lock);
1842 if (inlen == 0) {
1843 ipp->ipp_fields &= ~IPPF_ADDR;
1844 ipp->ipp_addr = ipv6_all_zeros;
1845 ixa->ixa_ifindex = 0;
1846 } else {
1847 struct in6_pktinfo *pkti;
1848
1849 pkti = (struct in6_pktinfo *)invalp;
1850 ipp->ipp_addr = pkti->ipi6_addr;
1851 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1852 ipp->ipp_fields |= IPPF_ADDR;
1853 else
1854 ipp->ipp_fields &= ~IPPF_ADDR;
1855 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1856 }
1857 mutex_exit(&connp->conn_lock);
1858 /* Source and ifindex might have changed */
1859 coa->coa_changed |= COA_HEADER_CHANGED;
1860 coa->coa_changed |= COA_ROUTE_CHANGED;
1861 break;
1862 case IPV6_HOPLIMIT:
1863 mutex_enter(&connp->conn_lock);
1864 if (inlen == 0 || *i1 == -1) {
1865 /* Revert to default */
1866 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1867 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1868 } else {
1869 ipp->ipp_hoplimit = *i1;
1870 ipp->ipp_fields |= IPPF_HOPLIMIT;
1871 /* Ensure that it sticks for multicast packets */
1872 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1873 }
1874 mutex_exit(&connp->conn_lock);
1875 coa->coa_changed |= COA_HEADER_CHANGED;
1876 break;
1877 case IPV6_TCLASS:
1878 /*
1879 * IPV6_TCLASS accepts -1 as use kernel default
1880 * and [0, 255] as the actualy traffic class.
1881 */
1882 mutex_enter(&connp->conn_lock);
1883 if (inlen == 0 || *i1 == -1) {
1884 ipp->ipp_tclass = 0;
1885 ipp->ipp_fields &= ~IPPF_TCLASS;
1886 } else {
1887 ipp->ipp_tclass = *i1;
1888 ipp->ipp_fields |= IPPF_TCLASS;
1889 }
1890 mutex_exit(&connp->conn_lock);
1891 coa->coa_changed |= COA_HEADER_CHANGED;
1892 break;
1893 case IPV6_NEXTHOP:
1894 if (inlen == 0) {
1895 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1896 } else {
1897 sin6_t *sin6 = (sin6_t *)invalp;
1898
1899 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1900 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1901 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1902 else
1903 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1904 }
1905 coa->coa_changed |= COA_ROUTE_CHANGED;
1906 break;
1907 case IPV6_HOPOPTS:
1908 mutex_enter(&connp->conn_lock);
1909 error = optcom_pkt_set(invalp, inlen,
1910 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1911 if (error != 0) {
1912 mutex_exit(&connp->conn_lock);
1913 return (error);
1914 }
1915 if (ipp->ipp_hopoptslen == 0) {
1916 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1917 } else {
1918 ipp->ipp_fields |= IPPF_HOPOPTS;
1919 }
1920 mutex_exit(&connp->conn_lock);
1921 coa->coa_changed |= COA_HEADER_CHANGED;
1922 coa->coa_changed |= COA_WROFF_CHANGED;
1923 break;
1924 case IPV6_RTHDRDSTOPTS:
1925 mutex_enter(&connp->conn_lock);
1926 error = optcom_pkt_set(invalp, inlen,
1927 (uchar_t **)&ipp->ipp_rthdrdstopts,
1928 &ipp->ipp_rthdrdstoptslen);
1929 if (error != 0) {
1930 mutex_exit(&connp->conn_lock);
1931 return (error);
1932 }
1933 if (ipp->ipp_rthdrdstoptslen == 0) {
1934 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1935 } else {
1936 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1937 }
1938 mutex_exit(&connp->conn_lock);
1939 coa->coa_changed |= COA_HEADER_CHANGED;
1940 coa->coa_changed |= COA_WROFF_CHANGED;
1941 break;
1942 case IPV6_DSTOPTS:
1943 mutex_enter(&connp->conn_lock);
1944 error = optcom_pkt_set(invalp, inlen,
1945 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1946 if (error != 0) {
1947 mutex_exit(&connp->conn_lock);
1948 return (error);
1949 }
1950 if (ipp->ipp_dstoptslen == 0) {
1951 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1952 } else {
1953 ipp->ipp_fields |= IPPF_DSTOPTS;
1954 }
1955 mutex_exit(&connp->conn_lock);
1956 coa->coa_changed |= COA_HEADER_CHANGED;
1957 coa->coa_changed |= COA_WROFF_CHANGED;
1958 break;
1959 case IPV6_RTHDR:
1960 mutex_enter(&connp->conn_lock);
1961 error = optcom_pkt_set(invalp, inlen,
1962 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1963 if (error != 0) {
1964 mutex_exit(&connp->conn_lock);
1965 return (error);
1966 }
1967 if (ipp->ipp_rthdrlen == 0) {
1968 ipp->ipp_fields &= ~IPPF_RTHDR;
1969 } else {
1970 ipp->ipp_fields |= IPPF_RTHDR;
1971 }
1972 mutex_exit(&connp->conn_lock);
1973 coa->coa_changed |= COA_HEADER_CHANGED;
1974 coa->coa_changed |= COA_WROFF_CHANGED;
1975 break;
1976
1977 case IPV6_DONTFRAG:
1978 if (onoff) {
1979 ixa->ixa_flags |= IXAF_DONTFRAG;
1980 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1981 } else {
1982 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1983 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1984 }
1985 /* Need to redo ip_attr_connect */
1986 coa->coa_changed |= COA_ROUTE_CHANGED;
1987 break;
1988
1989 case IPV6_USE_MIN_MTU:
1990 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1991 ixa->ixa_use_min_mtu = *i1;
1992 /* Need to redo ip_attr_connect */
1993 coa->coa_changed |= COA_ROUTE_CHANGED;
1994 break;
1995
1996 case IPV6_SEC_OPT:
1997 mutex_enter(&connp->conn_lock);
1998 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1999 mutex_exit(&connp->conn_lock);
2000 if (error != 0) {
2001 return (error);
2002 }
2003 /* This is an IPsec policy change - redo ip_attr_connect */
2004 coa->coa_changed |= COA_ROUTE_CHANGED;
2005 break;
2006 case IPV6_SRC_PREFERENCES:
2007 /*
2008 * This socket option only affects connected
2009 * sockets that haven't already bound to a specific
2010 * IPv6 address. In other words, sockets that
2011 * don't call bind() with an address other than the
2012 * unspecified address and that call connect().
2013 * ip_set_destination_v6() passes these preferences
2014 * to the ipif_select_source_v6() function.
2015 */
2016 mutex_enter(&connp->conn_lock);
2017 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2018 mutex_exit(&connp->conn_lock);
2019 if (error != 0) {
2020 return (error);
2021 }
2022 break;
2023 case IPV6_V6ONLY:
2024 mutex_enter(&connp->conn_lock);
2025 connp->conn_ipv6_v6only = onoff;
2026 mutex_exit(&connp->conn_lock);
2027 break;
2028 }
2029 return (0);
2030}
2031
2032/* Handle IPPROTO_UDP */
2033/* ARGSUSED1 */
2034static int
2035conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037{
2038 conn_t *connp = coa->coa_connp;
2039 int *i1 = (int *)invalp;
2040 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2041 int error;
2042
2043 switch (name) {
2044 case UDP_ANONPRIVBIND:
2045 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2046 return (error);
2047 }
2048 break;
2049 }
2050 if (checkonly)
2051 return (0);
2052
2053 /* Here we set the actual option value */
2054 mutex_enter(&connp->conn_lock);
2055 switch (name) {
2056 case UDP_ANONPRIVBIND:
2057 connp->conn_anon_priv_bind = onoff;
2058 break;
2059 case UDP_EXCLBIND:
2060 connp->conn_exclbind = onoff;
2061 break;
2062 }
2063 mutex_exit(&connp->conn_lock);
2064 return (0);
2065}
2066
2067/* Handle IPPROTO_TCP */
2068/* ARGSUSED1 */
2069static int
2070conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2071 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2072{
2073 conn_t *connp = coa->coa_connp;
2074 int *i1 = (int *)invalp;
2075 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2076 int error;
2077
2078 switch (name) {
2079 case TCP_ANONPRIVBIND:
2080 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2081 return (error);
2082 }
2083 break;
2084 }
2085 if (checkonly)
2086 return (0);
2087
2088 /* Here we set the actual option value */
2089 mutex_enter(&connp->conn_lock);
2090 switch (name) {
2091 case TCP_ANONPRIVBIND:
2092 connp->conn_anon_priv_bind = onoff;
2093 break;
2094 case TCP_EXCLBIND:
2095 connp->conn_exclbind = onoff;
2096 break;
2097 case TCP_RECVDSTADDR:
2098 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2099 break;
2100 }
2101 mutex_exit(&connp->conn_lock);
2102 return (0);
2103}
2104
2105int
2106conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2107{
2108 sin_t *sin;
2109 sin6_t *sin6;
2110
2111 if (connp->conn_family == AF_INET) {
2112 if (*salenp < sizeof (sin_t))
2113 return (EINVAL);
2114
2115 *salenp = sizeof (sin_t);
2116 /* Fill zeroes and then initialize non-zero fields */
2117 sin = (sin_t *)sa;
2118 *sin = sin_null;
2119 sin->sin_family = AF_INET;
2120 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2121 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2122 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2123 } else {
2124 /*
2125 * INADDR_ANY
2126 * conn_saddr is not set, we might be bound to
2127 * broadcast/multicast. Use conn_bound_addr as
2128 * local address instead (that could
2129 * also still be INADDR_ANY)
2130 */
2131 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2132 }
2133 sin->sin_port = connp->conn_lport;
2134 } else {
2135 if (*salenp < sizeof (sin6_t))
2136 return (EINVAL);
2137
2138 *salenp = sizeof (sin6_t);
2139 /* Fill zeroes and then initialize non-zero fields */
2140 sin6 = (sin6_t *)sa;
2141 *sin6 = sin6_null;
2142 sin6->sin6_family = AF_INET6;
2143 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2144 sin6->sin6_addr = connp->conn_saddr_v6;
2145 } else {
2146 /*
2147 * conn_saddr is not set, we might be bound to
2148 * broadcast/multicast. Use conn_bound_addr as
2149 * local address instead (which could
2150 * also still be unspecified)
2151 */
2152 sin6->sin6_addr = connp->conn_bound_addr_v6;
2153 }
2154 sin6->sin6_port = connp->conn_lport;
2155 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2156 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2157 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2158 }
2159 return (0);
2160}
2161
2162int
2163conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2164{
2165 struct sockaddr_in *sin;
2166 struct sockaddr_in6 *sin6;
2167
2168 if (connp->conn_family == AF_INET) {
2169 if (*salenp < sizeof (sin_t))
2170 return (EINVAL);
2171
2172 *salenp = sizeof (sin_t);
2173 /* initialize */
2174 sin = (sin_t *)sa;
2175 *sin = sin_null;
2176 sin->sin_family = AF_INET;
2177 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2178 sin->sin_port = connp->conn_fport;
2179 } else {
2180 if (*salenp < sizeof (sin6_t))
2181 return (EINVAL);
2182
2183 *salenp = sizeof (sin6_t);
2184 /* initialize */
2185 sin6 = (sin6_t *)sa;
2186 *sin6 = sin6_null;
2187 sin6->sin6_family = AF_INET6;
2188 sin6->sin6_addr = connp->conn_faddr_v6;
2189 sin6->sin6_port = connp->conn_fport;
2190 sin6->sin6_flowinfo = connp->conn_flowinfo;
2191 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2192 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2193 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2194 }
2195 return (0);
2196}
2197
2198static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2199static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2200
2201/*
2202 * Allocate and fill in conn_ht_iphc based on the current information
2203 * in the conn.
2204 * Normally used when we bind() and connect().
2205 * Returns failure if can't allocate memory, or if there is a problem
2206 * with a routing header/option.
2207 *
2208 * We allocate space for the transport header (ulp_hdr_len + extra) and
2209 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2210 * The extra is there for transports that want some spare room for future
2211 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2212 * excludes the extra part.
2213 *
2214 * We massage an routing option/header and store the ckecksum difference
2215 * in conn_sum.
2216 *
2217 * Caller needs to update conn_wroff if desired.
2218 */
2219int
2220conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2221 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2222{
2223 ip_xmit_attr_t *ixa = connp->conn_ixa;
2224 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2225 uint_t ip_hdr_length;
2226 uchar_t *hdrs;
2227 uint_t hdrs_len;
2228
2229 ASSERT(MUTEX_HELD(&connp->conn_lock));
2230
2231 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2233 /* In case of TX label and IP options it can be too much */
2234 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2235 /* Preserves existing TX errno for this */
2236 return (EHOSTUNREACH);
2237 }
2238 } else {
2239 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2240 }
2241 ixa->ixa_ip_hdr_length = ip_hdr_length;
2242 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2243 ASSERT(hdrs_len != 0);
2244
2245 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2246 /* Allocate new before we free any old */
2247 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2248 if (hdrs == NULL)
2249 return (ENOMEM);
2250
2251 if (connp->conn_ht_iphc != NULL) {
2252 kmem_free(connp->conn_ht_iphc,
2253 connp->conn_ht_iphc_allocated);
2254 }
2255 connp->conn_ht_iphc = hdrs;
2256 connp->conn_ht_iphc_allocated = hdrs_len;
2257 } else {
2258 hdrs = connp->conn_ht_iphc;
2259 }
2260 hdrs_len -= extra;
2261 connp->conn_ht_iphc_len = hdrs_len;
2262
2263 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2264 connp->conn_ht_ulp_len = ulp_hdr_length;
2265
2266 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2267 ipha_t *ipha = (ipha_t *)hdrs;
2268
2269 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2270 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2271 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2272 ipha->ipha_length = htons(hdrs_len);
2273 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2274 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2275 else
2276 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2277
2278 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2279 connp->conn_sum = cksum_massage_options_v4(ipha,
2280 connp->conn_netstack);
2281 } else {
2282 connp->conn_sum = 0;
2283 }
2284 } else {
2285 ip6_t *ip6h = (ip6_t *)hdrs;
2286
2287 ip6h->ip6_src = *v6src;
2288 ip6h->ip6_dst = *v6dst;
2289 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2290 flowinfo);
2291 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2292
2293 if (ipp->ipp_fields & IPPF_RTHDR) {
2294 connp->conn_sum = cksum_massage_options_v6(ip6h,
2295 ip_hdr_length, connp->conn_netstack);
2296
2297 /*
2298 * Verify that the first hop isn't a mapped address.
2299 * Routers along the path need to do this verification
2300 * for subsequent hops.
2301 */
2302 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2303 return (EADDRNOTAVAIL);
2304
2305 } else {
2306 connp->conn_sum = 0;
2307 }
2308 }
2309 return (0);
2310}
2311
2312/*
2313 * Prepend a header template to data_mp based on the ip_pkt_t
2314 * and the passed in source, destination and protocol.
2315 *
2316 * Returns failure if can't allocate memory, in which case data_mp is freed.
2317 * We allocate space for the transport header (ulp_hdr_len) and
2318 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2319 *
2320 * We massage an routing option/header and return the ckecksum difference
2321 * in *sump. This is in host byte order.
2322 *
2323 * Caller needs to update conn_wroff if desired.
2324 */
2325mblk_t *
2326conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2327 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2328 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2329 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2330{
2331 uint_t ip_hdr_length;
2332 uchar_t *hdrs;
2333 uint_t hdrs_len;
2334 mblk_t *mp;
2335
2336 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2337 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2338 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2339 } else {
2340 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2341 }
2342 hdrs_len = ip_hdr_length + ulp_hdr_length;
2343 ASSERT(hdrs_len != 0);
2344
2345 ixa->ixa_ip_hdr_length = ip_hdr_length;
2346
2347 /* Can we prepend to data_mp? */
2348 if (data_mp != NULL &&
2349 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2350 data_mp->b_datap->db_ref == 1) {
2351 hdrs = data_mp->b_rptr - hdrs_len;
2352 data_mp->b_rptr = hdrs;
2353 mp = data_mp;
2354 } else {
2355 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2356 if (mp == NULL) {
2357 freemsg(data_mp);
2358 *errorp = ENOMEM;
2359 return (NULL);
2360 }
2361 mp->b_wptr = mp->b_datap->db_lim;
2362 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2363 mp->b_cont = data_mp;
2364 }
2365
2366 /*
2367 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2368 * if PKTINFO (aka IPPF_ADDR) was set.
2369 */
2370 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2371 ipha_t *ipha = (ipha_t *)hdrs;
2372
2373 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2374 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2375 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2376 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2377 ipha->ipha_length = htons(hdrs_len + data_length);
2378 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2379 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2380 else
2381 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2382
2383 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2384 *sump = cksum_massage_options_v4(ipha,
2385 ixa->ixa_ipst->ips_netstack);
2386 } else {
2387 *sump = 0;
2388 }
2389 } else {
2390 ip6_t *ip6h = (ip6_t *)hdrs;
2391
2392 ip6h->ip6_src = *v6src;
2393 ip6h->ip6_dst = *v6dst;
2394 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2395 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2396
2397 if (ipp->ipp_fields & IPPF_RTHDR) {
2398 *sump = cksum_massage_options_v6(ip6h,
2399 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2400
2401 /*
2402 * Verify that the first hop isn't a mapped address.
2403 * Routers along the path need to do this verification
2404 * for subsequent hops.
2405 */
2406 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2407 *errorp = EADDRNOTAVAIL;
2408 freemsg(mp);
2409 return (NULL);
2410 }
2411 } else {
2412 *sump = 0;
2413 }
2414 }
2415 return (mp);
2416}
2417
2418/*
2419 * Massage a source route if any putting the first hop
2420 * in ipha_dst. Compute a starting value for the checksum which
2421 * takes into account that the original ipha_dst should be
2422 * included in the checksum but that IP will include the
2423 * first hop from the source route in the tcp checksum.
2424 */
2425static uint32_t
2426cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2427{
2428 in_addr_t dst;
2429 uint32_t cksum;
2430
2431 /* Get last hop then diff against first hop */
2432 cksum = ip_massage_options(ipha, ns);
2433 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2434 dst = ipha->ipha_dst;
2435 cksum -= ((dst >> 16) + (dst & 0xffff));
2436 if ((int)cksum < 0)
2437 cksum--;
2438 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2439 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2440 ASSERT(cksum < 0x10000);
2441 return (ntohs(cksum));
2442}
2443
2444static uint32_t
2445cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2446{
2447 uint8_t *end;
2448 ip6_rthdr_t *rth;
2449 uint32_t cksum;
2450
2451 end = (uint8_t *)ip6h + ip_hdr_len;
2452 rth = ip_find_rthdr_v6(ip6h, end);
2453 if (rth == NULL)
2454 return (0);
2455
2456 cksum = ip_massage_options_v6(ip6h, rth, ns);
2457 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2458 ASSERT(cksum < 0x10000);
2459 return (ntohs(cksum));
2460}
2461
2462/*
2463 * ULPs that change the destination address need to call this for each
2464 * change to discard any state about a previous destination that might
2465 * have been multicast or multirt.
2466 */
2467void
2468ip_attr_newdst(ip_xmit_attr_t *ixa)
2469{
2470 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2471 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2472 IXAF_NO_LOOP_ZONEID_SET);
2473}
2474
2475/*
2476 * Determine the nexthop which will be used.
2477 * Normally this is just the destination, but if a IPv4 source route, or
2478 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2479 * there.
2480 */
2481void
2482ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2483 const in6_addr_t *dst, in6_addr_t *nexthop)
2484{
Erik Nordmark188e1662009-11-20 06:55:30 -08002485 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2486 *nexthop = *dst;
2487 return;
2488 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08002489 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2490 ipaddr_t v4dst;
2491 ipaddr_t v4nexthop;
2492
2493 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2494 v4nexthop = ip_pkt_source_route_v4(ipp);
2495 if (v4nexthop == INADDR_ANY)
2496 v4nexthop = v4dst;
2497
2498 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2499 } else {
2500 const in6_addr_t *v6nexthop;
2501
2502 v6nexthop = ip_pkt_source_route_v6(ipp);
2503 if (v6nexthop == NULL)
2504 v6nexthop = dst;
2505
2506 *nexthop = *v6nexthop;
2507 }
2508}
2509
2510/*
2511 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2512 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2513 * case (connected latching is done in conn_connect).
2514 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2515 * set, but doesn't otherwise use the conn_t.
2516 *
2517 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2518 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2519 *
2520 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2521 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2522 *
2523 * Updates laddrp and uinfo if they are non-NULL.
2524 *
2525 * TSOL notes: The callers if ip_attr_connect must check if the destination
2526 * is different than before and in that case redo conn_update_label.
2527 * The callers of conn_connect do not need that since conn_connect
2528 * performs the conn_update_label.
2529 */
2530int
2531ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2532 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2533 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2534 iulp_t *uinfo, uint32_t flags)
2535{
2536 in6_addr_t laddr = *v6src;
2537 int error;
2538
2539 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2540
2541 if (connp->conn_zone_is_global)
2542 flags |= IPDF_ZONE_IS_GLOBAL;
2543 else
2544 flags &= ~IPDF_ZONE_IS_GLOBAL;
2545
2546 /*
2547 * Lookup the route to determine a source address and the uinfo.
2548 * If the ULP has a source route option then the caller will
2549 * have set v6nexthop to be the first hop.
2550 */
2551 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2552 ipaddr_t v4dst;
2553 ipaddr_t v4src, v4nexthop;
2554
2555 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2556 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2557 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2558
2559 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2560 flags &= ~IPDF_SELECT_SRC;
2561 else
2562 flags |= IPDF_SELECT_SRC;
2563
2564 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2565 uinfo, flags, connp->conn_mac_mode);
2566 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2567 } else {
2568 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2569 flags &= ~IPDF_SELECT_SRC;
2570 else
2571 flags |= IPDF_SELECT_SRC;
2572
2573 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2574 uinfo, flags, connp->conn_mac_mode);
2575 }
2576 /* Pass out some address even if we hit a RTF_REJECT etc */
2577 if (laddrp != NULL)
2578 *laddrp = laddr;
2579
2580 if (error != 0)
2581 return (error);
2582
2583 if (flags & IPDF_IPSEC) {
2584 /*
2585 * Set any IPsec policy in ixa. Routine also looks at ULP
2586 * ports.
2587 */
2588 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2589 }
2590 return (0);
2591}
2592
2593/*
2594 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2595 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2596 * usable for SCTP, since SCTP has multiple faddrs.
2597 *
2598 * Caller must hold conn_lock to provide atomic constency between the
2599 * conn_t's addresses and the ixa.
2600 * NOTE: this function drops and reaquires conn_lock since it can't be
2601 * held across ip_attr_connect/ip_set_destination.
2602 *
2603 * The caller needs to handle inserting in the receive-side fanout when
2604 * appropriate after conn_connect returns.
2605 */
2606int
2607conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2608{
2609 ip_xmit_attr_t *ixa = connp->conn_ixa;
2610 in6_addr_t nexthop;
2611 in6_addr_t saddr, faddr;
2612 in_port_t fport;
2613 int error;
2614
2615 ASSERT(MUTEX_HELD(&connp->conn_lock));
2616
2617 if (connp->conn_ipversion == IPV4_VERSION)
2618 ixa->ixa_flags |= IXAF_IS_IPV4;
2619 else
2620 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2621
2622 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2623 flags &= ~IPDF_IPSEC;
2624
2625 /* In case we had previously done an ip_attr_connect */
2626 ip_attr_newdst(ixa);
2627
2628 /*
2629 * Determine the nexthop and copy the addresses before dropping
2630 * conn_lock.
2631 */
2632 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2633 &connp->conn_faddr_v6, &nexthop);
2634 saddr = connp->conn_saddr_v6;
2635 faddr = connp->conn_faddr_v6;
2636 fport = connp->conn_fport;
2637
2638 mutex_exit(&connp->conn_lock);
2639 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2640 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2641 mutex_enter(&connp->conn_lock);
2642
2643 /* Could have changed even if an error */
2644 connp->conn_saddr_v6 = saddr;
2645 if (error != 0)
2646 return (error);
2647
2648 /*
2649 * Check whether Trusted Solaris policy allows communication with this
2650 * host, and pretend that the destination is unreachable if not.
2651 * Compute any needed label and place it in ipp_label_v4/v6.
2652 *
2653 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2654 * the packet.
2655 *
2656 * TSOL Note: Any concurrent threads would pick a different ixa
2657 * (and ipp if they are to change the ipp) so we
2658 * don't have to worry about concurrent threads.
2659 */
2660 if (is_system_labeled()) {
2661 if (connp->conn_mlp_type != mlptSingle)
2662 return (ECONNREFUSED);
2663
2664 /*
2665 * conn_update_label will set ipp_label* which will later
2666 * be used by conn_build_hdr_template.
2667 */
2668 error = conn_update_label(connp, ixa,