blob: 678dce986a742edfa64fba3d896737e610742916 [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
rshoaib2caf0dc2006-03-05 18:00:39 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
rshoaib2caf0dc2006-03-05 18:00:39 -080021
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070022/*
Anders Persson3e95bd42010-06-17 17:22:09 -070023 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
Patrick Mooneyacb55912015-10-07 21:17:36 +000024 * Copyright 2015, Joyent, Inc.
Gordon Rossf012ee02016-11-17 22:13:10 -050025 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
Garrett D'Amore15f90b02022-07-03 19:05:50 -070026 * Copyright 2022 Garrett D'Amore
Robert Mustacchidfc0fed2013-04-03 11:07:46 -070027 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070028
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070029#include <sys/types.h>
30#include <sys/t_lock.h>
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/buf.h>
34#include <sys/conf.h>
35#include <sys/cred.h>
36#include <sys/kmem.h>
Yu Xiangning0f1702c2008-12-11 20:04:13 -080037#include <sys/kmem_impl.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070038#include <sys/sysmacros.h>
39#include <sys/vfs.h>
40#include <sys/vnode.h>
41#include <sys/debug.h>
42#include <sys/errno.h>
43#include <sys/time.h>
44#include <sys/file.h>
45#include <sys/open.h>
46#include <sys/user.h>
47#include <sys/termios.h>
48#include <sys/stream.h>
49#include <sys/strsubr.h>
50#include <sys/strsun.h>
Yu Xiangning0f1702c2008-12-11 20:04:13 -080051#include <sys/suntpi.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070052#include <sys/ddi.h>
53#include <sys/esunddi.h>
54#include <sys/flock.h>
55#include <sys/modctl.h>
56#include <sys/vtrace.h>
57#include <sys/cmn_err.h>
58#include <sys/pathname.h>
59
60#include <sys/socket.h>
61#include <sys/socketvar.h>
masputraff550d02005-10-22 22:50:14 -070062#include <sys/sockio.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070063#include <netinet/in.h>
64#include <sys/un.h>
65#include <sys/strsun.h>
66
67#include <sys/tiuser.h>
68#define _SUN_TPI_VERSION 2
69#include <sys/tihdr.h>
70#include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */
71
72#include <c2/audit.h>
73
74#include <inet/common.h>
75#include <inet/ip.h>
76#include <inet/ip6.h>
77#include <inet/tcp.h>
masputraff550d02005-10-22 22:50:14 -070078#include <inet/udp_impl.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070079
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070080#include <sys/zone.h>
81
Yu Xiangning0f1702c2008-12-11 20:04:13 -080082#include <fs/sockfs/sockcommon.h>
83#include <fs/sockfs/socktpi.h>
84#include <fs/sockfs/socktpi_impl.h>
85
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070086/*
87 * Possible failures when memory can't be allocated. The documented behavior:
88 *
Toomas Soomee9f74ea2019-01-20 09:23:49 +020089 * 5.5: 4.X: XNET:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070090 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/
91 * EINTR
92 * (4.X does not document EINTR but returns it)
93 * bind: ENOSR - ENOBUFS/ENOSR
Toomas Soomee9f74ea2019-01-20 09:23:49 +020094 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070095 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
97 * (4.X getpeername and getsockname do not fail in practice)
98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR
99 * listen: - - ENOBUFS
100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/
101 * EINTR
102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/
103 * EINTR
104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR
106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR
107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
108 *
109 * Resolution. When allocation fails:
110 * recv: return EINTR
111 * send: return EINTR
112 * connect, accept: EINTR
113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep
114 * socket, socketpair: ENOBUFS
115 * getpeername, getsockname: sleep
116 * getsockopt, setsockopt: sleep
117 */
118
119#ifdef SOCK_TEST
120/*
121 * Variables that make sockfs do something other than the standard TPI
122 * for the AF_INET transports.
123 *
124 * solisten_tpi_tcp:
125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though
126 * the transport is already bound. This is needed to avoid loosing the
127 * port number should listen() do a T_UNBIND_REQ followed by a
128 * O_T_BIND_REQ.
129 *
130 * soconnect_tpi_udp:
131 * UDP and ICMP can handle a T_CONN_REQ.
132 * This is needed to make the sequence of connect(), getsockname()
133 * return the local IP address used to send packets to the connected to
134 * destination.
135 *
136 * soconnect_tpi_tcp:
137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
138 * Set this to non-zero to send TPI conformant messages to TCP in this
139 * respect. This is a performance optimization.
140 *
141 * soaccept_tpi_tcp:
142 * TCP can handle a T_CONN_REQ without the acceptor being bound.
143 * This is a performance optimization that has been picked up in XTI.
144 *
145 * soaccept_tpi_multioptions:
146 * When inheriting SOL_SOCKET options from the listener to the accepting
147 * socket send them as a single message for AF_INET{,6}.
148 */
149int solisten_tpi_tcp = 0;
150int soconnect_tpi_udp = 0;
151int soconnect_tpi_tcp = 0;
152int soaccept_tpi_tcp = 0;
153int soaccept_tpi_multioptions = 1;
154#else /* SOCK_TEST */
155#define soconnect_tpi_tcp 0
156#define soconnect_tpi_udp 0
157#define solisten_tpi_tcp 0
158#define soaccept_tpi_tcp 0
159#define soaccept_tpi_multioptions 1
160#endif /* SOCK_TEST */
161
162#ifdef SOCK_TEST
163extern int do_useracc;
164extern clock_t sock_test_timelimit;
165#endif /* SOCK_TEST */
166
Jerry Jelinekd28d4712015-11-23 16:00:48 +0000167extern uint32_t ucredsize;
168
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700169/*
170 * Some X/Open added checks might have to be backed out to keep SunOS 4.X
171 * applications working. Turn on this flag to disable these checks.
172 */
173int xnet_skip_checks = 0;
174int xnet_check_print = 0;
175int xnet_truncate_print = 0;
176
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800177static void sotpi_destroy(struct sonode *);
178static struct sonode *sotpi_create(struct sockparams *, int, int, int, int,
179 int, int *, cred_t *cr);
180
181static boolean_t sotpi_info_create(struct sonode *, int);
182static void sotpi_info_init(struct sonode *);
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200183static void sotpi_info_fini(struct sonode *);
184static void sotpi_info_destroy(struct sonode *);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800185
186/*
187 * Do direct function call to the transport layer below; this would
188 * also allow the transport to utilize read-side synchronous stream
189 * interface if necessary. This is a /etc/system tunable that must
190 * not be modified on a running system. By default this is enabled
191 * for performance reasons and may be disabled for debugging purposes.
192 */
193boolean_t socktpi_direct = B_TRUE;
194
195static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
196
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700197extern void sigintr(k_sigset_t *, int);
198extern void sigunintr(k_sigset_t *);
199
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700200static int sotpi_unbind(struct sonode *, int);
201
202/* TPI sockfs sonode operations */
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200203int sotpi_init(struct sonode *, struct sonode *, struct cred *,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700204 int);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800205static int sotpi_accept(struct sonode *, int, struct cred *,
206 struct sonode **);
207static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
208 int, struct cred *);
209static int sotpi_listen(struct sonode *, int, struct cred *);
Anders Persson3e95bd42010-06-17 17:22:09 -0700210static int sotpi_connect(struct sonode *, struct sockaddr *,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800211 socklen_t, int, int, struct cred *);
212extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *,
213 struct uio *, struct cred *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700214static int sotpi_sendmsg(struct sonode *, struct nmsghdr *,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800215 struct uio *, struct cred *);
216static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int,
217 struct cred *, mblk_t **);
masputraff550d02005-10-22 22:50:14 -0700218static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t,
219 struct uio *, void *, t_uscalar_t, int);
220static int sodgram_direct(struct sonode *, struct sockaddr *,
221 socklen_t, struct uio *, int);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800222extern int sotpi_getpeername(struct sonode *, struct sockaddr *,
223 socklen_t *, boolean_t, struct cred *);
224static int sotpi_getsockname(struct sonode *, struct sockaddr *,
225 socklen_t *, struct cred *);
226static int sotpi_shutdown(struct sonode *, int, struct cred *);
227extern int sotpi_getsockopt(struct sonode *, int, int, void *,
228 socklen_t *, int, struct cred *);
229extern int sotpi_setsockopt(struct sonode *, int, int, const void *,
230 socklen_t, struct cred *);
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200231static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800232 int32_t *);
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200233static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800234 struct cred *, int32_t *);
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200235static int sotpi_poll(struct sonode *, short, int, short *,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800236 struct pollhead **);
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200237static int sotpi_close(struct sonode *, int, struct cred *);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800238
239static int i_sotpi_info_constructor(sotpi_info_t *);
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200240static void i_sotpi_info_destructor(sotpi_info_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700241
242sonodeops_t sotpi_sonodeops = {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800243 sotpi_init, /* sop_init */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700244 sotpi_accept, /* sop_accept */
245 sotpi_bind, /* sop_bind */
246 sotpi_listen, /* sop_listen */
247 sotpi_connect, /* sop_connect */
248 sotpi_recvmsg, /* sop_recvmsg */
249 sotpi_sendmsg, /* sop_sendmsg */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800250 sotpi_sendmblk, /* sop_sendmblk */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700251 sotpi_getpeername, /* sop_getpeername */
252 sotpi_getsockname, /* sop_getsockname */
253 sotpi_shutdown, /* sop_shutdown */
254 sotpi_getsockopt, /* sop_getsockopt */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800255 sotpi_setsockopt, /* sop_setsockopt */
256 sotpi_ioctl, /* sop_ioctl */
257 sotpi_poll, /* sop_poll */
258 sotpi_close, /* sop_close */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700259};
260
261/*
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800262 * Return a TPI socket vnode.
263 *
264 * Note that sockets assume that the driver will clone (either itself
265 * or by using the clone driver) i.e. a socket() call will always
266 * result in a new vnode being created.
267 */
268
269/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700270 * Common create code for socket and accept. If tso is set the values
271 * from that node is used instead of issuing a T_INFO_REQ.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700272 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800273
274/* ARGSUSED */
275static struct sonode *
276sotpi_create(struct sockparams *sp, int family, int type, int protocol,
277 int version, int sflags, int *errorp, cred_t *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700278{
279 struct sonode *so;
Toomas Soomee9f74ea2019-01-20 09:23:49 +0200280 kmem_cache_t *cp;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800281 int sfamily = family;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700282
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800283 ASSERT(sp->sp_sdev_info.sd_vnode != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700284
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800285 if (family == AF_NCA) {
286 /*
287 * The request is for an NCA socket so for NL7C use the
288 * INET domain instead and mark NL7C_AF_NCA below.
289 */
290 family = AF_INET;
291 /*
292 * NL7C is not supported in the non-global zone,
293 * we enforce this restriction here.
294 */
295 if (getzoneid() != GLOBAL_ZONEID) {
296 *errorp = ENOTSUP;
297 return (NULL);
298 }
299 }
masputraff550d02005-10-22 22:50:14 -0700300
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800301 /*
302 * to be compatible with old tpi socket implementation ignore
303 * sleep flag (sflags) passed in
304 */
305 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache;
306 so = kmem_cache_alloc(cp, KM_SLEEP);
307 if (so == NULL) {
308 *errorp = ENOMEM;
309 return (NULL);
310 }
311
312 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops);
313 sotpi_info_init(so);
314
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800315 if (version == SOV_DEFAULT)
316 version = so_default_version;
317
318 so->so_version = (short)version;
319 *errorp = 0;
320
321 return (so);
322}
323
324static void
325sotpi_destroy(struct sonode *so)
326{
327 kmem_cache_t *cp;
328 struct sockparams *origsp;
329
330 /*
331 * If there is a new dealloc function (ie. smod_destroy_func),
332 * then it should check the correctness of the ops.
333 */
334
335 ASSERT(so->so_ops == &sotpi_sonodeops);
336
337 origsp = SOTOTPI(so)->sti_orig_sp;
338
339 sotpi_info_fini(so);
340
341 if (so->so_state & SS_FALLBACK_COMP) {
342 /*
343 * A fallback happend, which means that a sotpi_info_t struct
344 * was allocated (as opposed to being allocated from the TPI
345 * sonode cache. Therefore we explicitly free the struct
346 * here.
347 */
348 sotpi_info_destroy(so);
349 ASSERT(origsp != NULL);
350
351 origsp->sp_smod_info->smod_sock_destroy_func(so);
352 SOCKPARAMS_DEC_REF(origsp);
353 } else {
354 sonode_fini(so);
355 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache :
356 socktpi_cache;
357 kmem_cache_free(cp, so);
358 }
359}
360
361/* ARGSUSED1 */
362int
363sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags)
364{
365 major_t maj;
366 dev_t newdev;
367 struct vnode *vp;
368 int error = 0;
369 struct stdata *stp;
370
371 sotpi_info_t *sti = SOTOTPI(so);
372
373 dprint(1, ("sotpi_init()\n"));
374
375 /*
376 * over write the sleep flag passed in but that is ok
377 * as tpi socket does not honor sleep flag.
378 */
379 flags |= FREAD|FWRITE;
380
381 /*
382 * Record in so_flag that it is a clone.
383 */
384 if (getmajor(sti->sti_dev) == clone_major)
385 so->so_flag |= SOCLONE;
386
387 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) &&
388 (so->so_family == AF_INET || so->so_family == AF_INET6) &&
389 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP ||
390 so->so_protocol == IPPROTO_IP)) {
masputraff550d02005-10-22 22:50:14 -0700391 /* Tell tcp or udp that it's talking to sockets */
392 flags |= SO_SOCKSTR;
393
394 /*
395 * Here we indicate to socktpi_open() our attempt to
396 * make direct calls between sockfs and transport.
397 * The final decision is left to socktpi_open().
398 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800399 sti->sti_direct = 1;
masputraff550d02005-10-22 22:50:14 -0700400
401 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL);
402 if (so->so_type == SOCK_STREAM && tso != NULL) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800403 if (SOTOTPI(tso)->sti_direct) {
masputraff550d02005-10-22 22:50:14 -0700404 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800405 * Inherit sti_direct from listener and pass
masputraff550d02005-10-22 22:50:14 -0700406 * SO_ACCEPTOR open flag to tcp, indicating
407 * that this is an accept fast-path instance.
408 */
409 flags |= SO_ACCEPTOR;
410 } else {
411 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800412 * sti_direct is not set on listener, meaning
masputraff550d02005-10-22 22:50:14 -0700413 * that the listener has been converted from
414 * a socket to a stream. Ensure that the
415 * acceptor inherits these settings.
416 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800417 sti->sti_direct = 0;
masputraff550d02005-10-22 22:50:14 -0700418 flags &= ~SO_SOCKSTR;
419 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700420 }
421 }
422
423 /*
424 * Tell local transport that it is talking to sockets.
425 */
426 if (so->so_family == AF_UNIX) {
427 flags |= SO_SOCKSTR;
428 }
429
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800430 vp = SOTOV(so);
431 newdev = vp->v_rdev;
432 maj = getmajor(newdev);
433 ASSERT(STREAMSTAB(maj));
kais655a2e92005-12-12 14:09:56 -0800434
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800435 error = stropen(vp, &newdev, flags, cr);
436
437 stp = vp->v_stream;
438 if (error == 0) {
439 if (so->so_flag & SOCLONE)
440 ASSERT(newdev != vp->v_rdev);
441 mutex_enter(&so->so_lock);
442 sti->sti_dev = newdev;
443 vp->v_rdev = newdev;
444 mutex_exit(&so->so_lock);
445
446 if (stp->sd_flag & STRISTTY) {
447 /*
448 * this is a post SVR4 tty driver - a socket can not
449 * be a controlling terminal. Fail the open.
450 */
451 (void) sotpi_close(so, flags, cr);
452 return (ENOTTY); /* XXX */
453 }
454
455 ASSERT(stp->sd_wrq != NULL);
456 sti->sti_provinfo = tpi_findprov(stp->sd_wrq);
457
458 /*
459 * If caller is interested in doing direct function call
460 * interface to/from transport module, probe the module
461 * directly beneath the streamhead to see if it qualifies.
462 *
463 * We turn off the direct interface when qualifications fail.
464 * In the acceptor case, we simply turn off the sti_direct
465 * flag on the socket. We do the fallback after the accept
466 * has completed, before the new socket is returned to the
467 * application.
468 */
469 if (sti->sti_direct) {
470 queue_t *tq = stp->sd_wrq->q_next;
471
472 /*
473 * sti_direct is currently supported and tested
474 * only for tcp/udp; this is the main reason to
475 * have the following assertions.
476 */
477 ASSERT(so->so_family == AF_INET ||
478 so->so_family == AF_INET6);
479 ASSERT(so->so_protocol == IPPROTO_UDP ||
480 so->so_protocol == IPPROTO_TCP ||
481 so->so_protocol == IPPROTO_IP);
482 ASSERT(so->so_type == SOCK_DGRAM ||
483 so->so_type == SOCK_STREAM);
484
485 /*
486 * Abort direct call interface if the module directly
487 * underneath the stream head is not defined with the
488 * _D_DIRECT flag. This could happen in the tcp or
489 * udp case, when some other module is autopushed
490 * above it, or for some reasons the expected module
491 * isn't purely D_MP (which is the main requirement).
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800492 */
493 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
494 !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
495 int rval;
496
497 /* Continue on without direct calls */
498 sti->sti_direct = 0;
499
500 /*
501 * Cannot issue ioctl on fallback socket since
502 * there is no conn associated with the queue.
503 * The fallback downcall will notify the proto
504 * of the change.
505 */
506 if (!(flags & SO_ACCEPTOR) &&
507 !(flags & SO_FALLBACK)) {
508 if ((error = strioctl(vp,
509 _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
510 cr, &rval)) != 0) {
511 (void) sotpi_close(so, flags,
512 cr);
513 return (error);
514 }
515 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800516 }
517 }
518
519 if (flags & SO_FALLBACK) {
520 /*
521 * The stream created does not have a conn.
522 * do stream set up after conn has been assigned
523 */
524 return (error);
525 }
526 if (error = so_strinit(so, tso)) {
527 (void) sotpi_close(so, flags, cr);
528 return (error);
529 }
530
Patrick Mooneyacb55912015-10-07 21:17:36 +0000531 /* Enable sendfile() on AF_UNIX streams */
532 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) {
533 mutex_enter(&so->so_lock);
534 so->so_mode |= SM_SENDFILESUPP;
535 mutex_exit(&so->so_lock);
536 }
537
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800538 /* Wildcard */
539 if (so->so_protocol != so->so_sockparams->sp_protocol) {
540 int protocol = so->so_protocol;
541 /*
542 * Issue SO_PROTOTYPE setsockopt.
543 */
544 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
545 &protocol, (t_uscalar_t)sizeof (protocol), cr);
546 if (error != 0) {
547 (void) sotpi_close(so, flags, cr);
548 /*
549 * Setsockopt often fails with ENOPROTOOPT but
550 * socket() should fail with
551 * EPROTONOSUPPORT/EPROTOTYPE.
552 */
553 return (EPROTONOSUPPORT);
554 }
555 }
556
557 } else {
558 /*
559 * While the same socket can not be reopened (unlike specfs)
560 * the stream head sets STREOPENFAIL when the autopush fails.
561 */
562 if ((stp != NULL) &&
563 (stp->sd_flag & STREOPENFAIL)) {
564 /*
565 * Open failed part way through.
566 */
567 mutex_enter(&stp->sd_lock);
568 stp->sd_flag &= ~STREOPENFAIL;
569 mutex_exit(&stp->sd_lock);
570 (void) sotpi_close(so, flags, cr);
571 return (error);
572 /*NOTREACHED*/
573 }
574 ASSERT(stp == NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700575 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800576 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN,
577 "sockfs open:maj %d vp %p so %p error %d",
578 maj, vp, so, error);
579 return (error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700580}
581
582/*
583 * Bind the socket to an unspecified address in sockfs only.
584 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
585 * required in all cases.
586 */
587static void
588so_automatic_bind(struct sonode *so)
589{
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800590 sotpi_info_t *sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700591 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
592
593 ASSERT(MUTEX_HELD(&so->so_lock));
594 ASSERT(!(so->so_state & SS_ISBOUND));
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800595 ASSERT(sti->sti_unbind_mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700596
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800597 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
598 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
599 sti->sti_laddr_sa->sa_family = so->so_family;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700600 so->so_state |= SS_ISBOUND;
601}
602
603
604/*
605 * bind the socket.
606 *
607 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
608 * are passed in we allow rebinding. Note that for backwards compatibility
609 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
610 * Thus the rebinding code is currently not executed.
611 *
612 * The constraints for rebinding are:
613 * - it is a SOCK_DGRAM, or
614 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
615 * and no listen() has been done.
616 * This rebinding code was added based on some language in the XNET book
617 * about not returning EINVAL it the protocol allows rebinding. However,
618 * this language is not present in the Posix socket draft. Thus maybe the
619 * rebinding logic should be deleted from the source.
620 *
621 * A null "name" can be used to unbind the socket if:
622 * - it is a SOCK_DGRAM, or
623 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
624 * and no listen() has been done.
625 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800626/* ARGSUSED */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700627static int
628sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800629 socklen_t namelen, int backlog, int flags, struct cred *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700630{
631 struct T_bind_req bind_req;
632 struct T_bind_ack *bind_ack;
633 int error = 0;
634 mblk_t *mp;
635 void *addr;
636 t_uscalar_t addrlen;
637 int unbind_on_err = 1;
638 boolean_t clear_acceptconn_on_err = B_FALSE;
639 boolean_t restore_backlog_on_err = B_FALSE;
640 int save_so_backlog;
641 t_scalar_t PRIM_type = O_T_BIND_REQ;
642 boolean_t tcp_udp_xport;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800643 sotpi_info_t *sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700644
645 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
rh87107903a11e2008-07-31 15:02:18 -0700646 (void *)so, (void *)name, namelen, backlog, flags,
nordmarkfc80c0d2007-10-11 22:57:36 -0700647 pr_state(so->so_state, so->so_mode)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700648
649 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM;
650
651 if (!(flags & _SOBIND_LOCK_HELD)) {
652 mutex_enter(&so->so_lock);
653 so_lock_single(so); /* Set SOLOCKED */
654 } else {
655 ASSERT(MUTEX_HELD(&so->so_lock));
656 ASSERT(so->so_flag & SOLOCKED);
657 }
658
659 /*
660 * Make sure that there is a preallocated unbind_req message
661 * before binding. This message allocated when the socket is
662 * created but it might be have been consumed.
663 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800664 if (sti->sti_unbind_mp == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700665 dprintso(so, 1, ("sobind: allocating unbind_req\n"));
666 /* NOTE: holding so_lock while sleeping */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800667 sti->sti_unbind_mp =
Erik Nordmarkde8c4a12009-02-12 08:42:06 -0800668 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP,
669 cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700670 }
671
672 if (flags & _SOBIND_REBIND) {
673 /*
674 * Called from solisten after doing an sotpi_unbind() or
675 * potentially without the unbind (latter for AF_INET{,6}).
676 */
677 ASSERT(name == NULL && namelen == 0);
678
679 if (so->so_family == AF_UNIX) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800680 ASSERT(sti->sti_ux_bound_vp);
681 addr = &sti->sti_ux_laddr;
682 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
nordmarkfc80c0d2007-10-11 22:57:36 -0700683 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, "
684 "addr 0x%p, vp %p\n",
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700685 addrlen,
rh87107903a11e2008-07-31 15:02:18 -0700686 (void *)((struct so_ux_addr *)addr)->soua_vp,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800687 (void *)sti->sti_ux_bound_vp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700688 } else {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800689 addr = sti->sti_laddr_sa;
690 addrlen = (t_uscalar_t)sti->sti_laddr_len;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700691 }
692 } else if (flags & _SOBIND_UNSPEC) {
693 ASSERT(name == NULL && namelen == 0);
694
695 /*
696 * The caller checked SS_ISBOUND but not necessarily
697 * under so_lock
698 */
699 if (so->so_state & SS_ISBOUND) {
700 /* No error */
701 goto done;
702 }
703
704 /* Set an initial local address */
705 switch (so->so_family) {
706 case AF_UNIX:
707 /*
708 * Use an address with same size as struct sockaddr
709 * just like BSD.
710 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800711 sti->sti_laddr_len =
nordmarkfc80c0d2007-10-11 22:57:36 -0700712 (socklen_t)sizeof (struct sockaddr);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800713 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
714 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
715 sti->sti_laddr_sa->sa_family = so->so_family;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700716
717 /*
718 * Pass down an address with the implicit bind
719 * magic number and the rest all zeros.
720 * The transport will return a unique address.
721 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800722 sti->sti_ux_laddr.soua_vp = NULL;
723 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
724 addr = &sti->sti_ux_laddr;
725 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700726 break;
727
728 case AF_INET:
729 case AF_INET6:
730 /*
731 * An unspecified bind in TPI has a NULL address.
732 * Set the address in sockfs to have the sa_family.
733 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800734 sti->sti_laddr_len = (so->so_family == AF_INET) ?
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700735 (socklen_t)sizeof (sin_t) :
736 (socklen_t)sizeof (sin6_t);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800737 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
738 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
739 sti->sti_laddr_sa->sa_family = so->so_family;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700740 addr = NULL;
741 addrlen = 0;
742 break;
743
744 default:
745 /*
746 * An unspecified bind in TPI has a NULL address.
747 * Set the address in sockfs to be zero length.
748 *
749 * Can not assume there is a sa_family for all
750 * protocol families. For example, AF_X25 does not
751 * have a family field.
752 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800753 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
754 sti->sti_laddr_len = 0; /* XXX correct? */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700755 addr = NULL;
756 addrlen = 0;
757 break;
758 }
759
760 } else {
761 if (so->so_state & SS_ISBOUND) {
762 /*
763 * If it is ok to rebind the socket, first unbind
764 * with the transport. A rebind to the NULL address
765 * is interpreted as an unbind.
766 * Note that a bind to NULL in BSD does unbind the
767 * socket but it fails with EINVAL.
768 * Note that regular sockets set SOV_SOCKBSD i.e.
769 * _SOBIND_SOCKBSD gets set here hence no type of
770 * socket does currently allow rebinding.
771 *
772 * If the name is NULL just do an unbind.
773 */
774 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) &&
775 name != NULL) {
776 error = EINVAL;
777 unbind_on_err = 0;
778 eprintsoline(so, error);
779 goto done;
780 }
781 if ((so->so_mode & SM_CONNREQUIRED) &&
782 (so->so_state & SS_CANTREBIND)) {
783 error = EINVAL;
784 unbind_on_err = 0;
785 eprintsoline(so, error);
786 goto done;
787 }
788 error = sotpi_unbind(so, 0);
789 if (error) {
790 eprintsoline(so, error);
791 goto done;
792 }
793 ASSERT(!(so->so_state & SS_ISBOUND));
794 if (name == NULL) {
795 so->so_state &=
nordmarkfc80c0d2007-10-11 22:57:36 -0700796 ~(SS_ISCONNECTED|SS_ISCONNECTING);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700797 goto done;
798 }
799 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800800
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700801 /* X/Open requires this check */
802 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
803 if (xnet_check_print) {
804 printf("sockfs: X/Open bind state check "
805 "caused EINVAL\n");
806 }
807 error = EINVAL;
808 goto done;
809 }
810
811 switch (so->so_family) {
812 case AF_UNIX:
813 /*
814 * All AF_UNIX addresses are nul terminated
815 * when copied (copyin_name) in so the minimum
816 * length is 3 bytes.
817 */
818 if (name == NULL ||
819 (ssize_t)namelen <= sizeof (short) + 1) {
820 error = EISDIR;
821 eprintsoline(so, error);
822 goto done;
823 }
824 /*
825 * Verify so_family matches the bound family.
826 * BSD does not check this for AF_UNIX resulting
827 * in funny mknods.
828 */
829 if (name->sa_family != so->so_family) {
830 error = EAFNOSUPPORT;
831 goto done;
832 }
833 break;
834 case AF_INET:
835 if (name == NULL) {
836 error = EINVAL;
837 eprintsoline(so, error);
838 goto done;
839 }
840 if ((size_t)namelen != sizeof (sin_t)) {
841 error = name->sa_family != so->so_family ?
842 EAFNOSUPPORT : EINVAL;
843 eprintsoline(so, error);
844 goto done;
845 }
846 if ((flags & _SOBIND_XPG4_2) &&
847 (name->sa_family != so->so_family)) {
848 /*
849 * This check has to be made for X/Open
850 * sockets however application failures have
851 * been observed when it is applied to
852 * all sockets.
853 */
854 error = EAFNOSUPPORT;
855 eprintsoline(so, error);
856 goto done;
857 }
858 /*
859 * Force a zero sa_family to match so_family.
860 *
Richard Lowebbf21552022-02-26 16:40:47 -0600861 * Some programs like inetd(8) don't set the
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700862 * family field. Other programs leave
863 * sin_family set to garbage - SunOS 4.X does
864 * not check the family field on a bind.
865 * We use the family field that
866 * was passed in to the socket() call.
867 */
868 name->sa_family = so->so_family;
869 break;
870
871 case AF_INET6: {
872#ifdef DEBUG
873 sin6_t *sin6 = (sin6_t *)name;
874#endif /* DEBUG */
875
876 if (name == NULL) {
877 error = EINVAL;
878 eprintsoline(so, error);
879 goto done;
880 }
881 if ((size_t)namelen != sizeof (sin6_t)) {
882 error = name->sa_family != so->so_family ?
883 EAFNOSUPPORT : EINVAL;
884 eprintsoline(so, error);
885 goto done;
886 }
887 if (name->sa_family != so->so_family) {
888 /*
889 * With IPv6 we require the family to match
890 * unlike in IPv4.
891 */
892 error = EAFNOSUPPORT;
893 eprintsoline(so, error);
894 goto done;
895 }
896#ifdef DEBUG
897 /*
898 * Verify that apps don't forget to clear
899 * sin6_scope_id etc
900 */
901 if (sin6->sin6_scope_id != 0 &&
902 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
rshoaib2caf0dc2006-03-05 18:00:39 -0800903 zcmn_err(getzoneid(), CE_WARN,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700904 "bind with uninitialized sin6_scope_id "
905 "(%d) on socket. Pid = %d\n",
906 (int)sin6->sin6_scope_id,
907 (int)curproc->p_pid);
908 }
909 if (sin6->__sin6_src_id != 0) {
rshoaib2caf0dc2006-03-05 18:00:39 -0800910 zcmn_err(getzoneid(), CE_WARN,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700911 "bind with uninitialized __sin6_src_id "
912 "(%d) on socket. Pid = %d\n",
913 (int)sin6->__sin6_src_id,
914 (int)curproc->p_pid);
915 }
916#endif /* DEBUG */
917 break;
918 }
919 default:
920 /*
921 * Don't do any length or sa_family check to allow
922 * non-sockaddr style addresses.
923 */
924 if (name == NULL) {
925 error = EINVAL;
926 eprintsoline(so, error);
927 goto done;
928 }
929 break;
930 }
931
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800932 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700933 error = ENAMETOOLONG;
934 eprintsoline(so, error);
935 goto done;
936 }
937 /*
938 * Save local address.
939 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800940 sti->sti_laddr_len = (socklen_t)namelen;
941 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
942 bcopy(name, sti->sti_laddr_sa, namelen);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700943
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800944 addr = sti->sti_laddr_sa;
945 addrlen = (t_uscalar_t)sti->sti_laddr_len;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700946 switch (so->so_family) {
947 case AF_INET6:
948 case AF_INET:
949 break;
950 case AF_UNIX: {
951 struct sockaddr_un *soun =
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800952 (struct sockaddr_un *)sti->sti_laddr_sa;
Ric Aleshire92f45f62008-11-06 22:37:19 -0800953 struct vnode *vp, *rvp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700954 struct vattr vattr;
955
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800956 ASSERT(sti->sti_ux_bound_vp == NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700957 /*
958 * Create vnode for the specified path name.
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800959 * Keep vnode held with a reference in sti_ux_bound_vp.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700960 * Use the vnode pointer as the address used in the
961 * bind with the transport.
962 *
963 * Use the same mode as in BSD. In particular this does
964 * not observe the umask.
965 */
966 /* MAXPATHLEN + soun_family + nul termination */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800967 if (sti->sti_laddr_len >
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700968 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
969 error = ENAMETOOLONG;
970 eprintsoline(so, error);
971 goto done;
972 }
973 vattr.va_type = VSOCK;
mrjae115bc2007-01-19 08:10:06 -0800974 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700975 vattr.va_mask = AT_TYPE|AT_MODE;
976 /* NOTE: holding so_lock */
977 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr,
nordmarkfc80c0d2007-10-11 22:57:36 -0700978 EXCL, 0, &vp, CRMKNOD, 0, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700979 if (error) {
980 if (error == EEXIST)
981 error = EADDRINUSE;
982 eprintsoline(so, error);
983 goto done;
984 }
985 /*
986 * Establish pointer from the underlying filesystem
987 * vnode to the socket node.
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800988 * sti_ux_bound_vp and v_stream->sd_vnode form the
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700989 * cross-linkage between the underlying filesystem
990 * node and the socket node.
991 */
Ric Aleshire92f45f62008-11-06 22:37:19 -0800992
993 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) {
994 VN_HOLD(rvp);
995 VN_RELE(vp);
996 vp = rvp;
997 }
998
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700999 ASSERT(SOTOV(so)->v_stream);
1000 mutex_enter(&vp->v_lock);
1001 vp->v_stream = SOTOV(so)->v_stream;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001002 sti->sti_ux_bound_vp = vp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001003 mutex_exit(&vp->v_lock);
1004
1005 /*
1006 * Use the vnode pointer value as a unique address
1007 * (together with the magic number to avoid conflicts
1008 * with implicit binds) in the transport provider.
1009 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001010 sti->sti_ux_laddr.soua_vp =
1011 (void *)sti->sti_ux_bound_vp;
1012 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
1013 addr = &sti->sti_ux_laddr;
1014 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001015 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
1016 addrlen,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001017 (void *)((struct so_ux_addr *)addr)->soua_vp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001018 break;
1019 }
1020 } /* end switch (so->so_family) */
1021 }
1022
1023 /*
1024 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
1025 * the transport can start passing up T_CONN_IND messages
1026 * as soon as it receives the bind req and strsock_proto()
1027 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
1028 */
1029 if (flags & _SOBIND_LISTEN) {
1030 if ((so->so_state & SS_ACCEPTCONN) == 0)
1031 clear_acceptconn_on_err = B_TRUE;
1032 save_so_backlog = so->so_backlog;
1033 restore_backlog_on_err = B_TRUE;
1034 so->so_state |= SS_ACCEPTCONN;
1035 so->so_backlog = backlog;
1036 }
1037
1038 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001039 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
1040 * for other transports we will send in a O_T_BIND_REQ.
1041 */
1042 if (tcp_udp_xport &&
1043 (so->so_family == AF_INET || so->so_family == AF_INET6))
1044 PRIM_type = T_BIND_REQ;
1045
1046 bind_req.PRIM_type = PRIM_type;
1047 bind_req.ADDR_length = addrlen;
1048 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
1049 bind_req.CONIND_number = backlog;
1050 /* NOTE: holding so_lock while sleeping */
1051 mp = soallocproto2(&bind_req, sizeof (bind_req),
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001052 addr, addrlen, 0, _ALLOC_SLEEP, cr);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001053 sti->sti_laddr_valid = 0;
kaisc28749e2005-11-12 18:58:05 -08001054
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001055 /* Done using sti_laddr_sa - can drop the lock */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001056 mutex_exit(&so->so_lock);
1057
1058 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
nordmarkfc80c0d2007-10-11 22:57:36 -07001059 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001060 if (error) {
1061 eprintsoline(so, error);
1062 mutex_enter(&so->so_lock);
1063 goto done;
1064 }
1065
1066 mutex_enter(&so->so_lock);
1067 error = sowaitprim(so, PRIM_type, T_BIND_ACK,
1068 (t_uscalar_t)sizeof (*bind_ack), &mp, 0);
1069 if (error) {
1070 eprintsoline(so, error);
1071 goto done;
1072 }
1073 ASSERT(mp);
1074 /*
1075 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1076 * strsock_proto while the lock was dropped above, the bind
1077 * is allowed to complete.
1078 */
1079
1080 /* Mark as bound. This will be undone if we detect errors below. */
1081 if (flags & _SOBIND_NOXLATE) {
1082 ASSERT(so->so_family == AF_UNIX);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001083 sti->sti_faddr_noxlate = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001084 }
1085 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
1086 so->so_state |= SS_ISBOUND;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001087 ASSERT(sti->sti_unbind_mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001088
1089 /* note that we've already set SS_ACCEPTCONN above */
1090
1091 /*
1092 * Recompute addrlen - an unspecied bind sent down an
1093 * address of length zero but we expect the appropriate length
1094 * in return.
1095 */
1096 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001097 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001098
1099 bind_ack = (struct T_bind_ack *)mp->b_rptr;
1100 /*
1101 * The alignment restriction is really too strict but
1102 * we want enough alignment to inspect the fields of
1103 * a sockaddr_in.
1104 */
1105 addr = sogetoff(mp, bind_ack->ADDR_offset,
nordmarkfc80c0d2007-10-11 22:57:36 -07001106 bind_ack->ADDR_length,
1107 __TPI_ALIGN_SIZE);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001108 if (addr == NULL) {
1109 freemsg(mp);
1110 error = EPROTO;
1111 eprintsoline(so, error);
1112 goto done;
1113 }
1114 if (!(flags & _SOBIND_UNSPEC)) {
1115 /*
1116 * Verify that the transport didn't return something we
1117 * did not want e.g. an address other than what we asked for.
1118 *
1119 * NOTE: These checks would go away if/when we switch to
1120 * using the new TPI (in which the transport would fail
1121 * the request instead of assigning a different address).
1122 *
1123 * NOTE2: For protocols that we don't know (i.e. any
1124 * other than AF_INET6, AF_INET and AF_UNIX), we
1125 * cannot know if the transport should be expected to
1126 * return the same address as that requested.
1127 *
1128 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
1129 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
1130 *
1131 * For example, in the case of netatalk it may be
1132 * inappropriate for the transport to return the
1133 * requested address (as it may have allocated a local
1134 * port number in behaviour similar to that of an
1135 * AF_INET bind request with a port number of zero).
1136 *
1137 * Given the definition of O_T_BIND_REQ, where the
1138 * transport may bind to an address other than the
1139 * requested address, it's not possible to determine
1140 * whether a returned address that differs from the
1141 * requested address is a reason to fail (because the
1142 * requested address was not available) or succeed
1143 * (because the transport allocated an appropriate
1144 * address and/or port).
1145 *
1146 * sockfs currently requires that the transport return
1147 * the requested address in the T_BIND_ACK, unless
1148 * there is code here to allow for any discrepancy.
1149 * Such code exists for AF_INET and AF_INET6.
1150 *
1151 * Netatalk chooses to return the requested address
1152 * rather than the (correct) allocated address. This
1153 * means that netatalk violates the TPI specification
1154 * (and would not function correctly if used from a
1155 * TLI application), but it does mean that it works
1156 * with sockfs.
1157 *
1158 * As noted above, using the newer XTI bind primitive
1159 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
1160 * allow sockfs to be more sure about whether or not
1161 * the bind request had succeeded (as transports are
1162 * not permitted to bind to a different address than
1163 * that requested - they must return failure).
1164 * Unfortunately, support for T_BIND_REQ may not be
1165 * present in all transport implementations (netatalk,
1166 * for example, doesn't have it), making the
1167 * transition difficult.
1168 */
1169 if (bind_ack->ADDR_length != addrlen) {
1170 /* Assumes that the requested address was in use */
1171 freemsg(mp);
1172 error = EADDRINUSE;
1173 eprintsoline(so, error);
1174 goto done;
1175 }
1176
1177 switch (so->so_family) {
1178 case AF_INET6:
1179 case AF_INET: {
1180 sin_t *rname, *aname;
1181
1182 rname = (sin_t *)addr;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001183 aname = (sin_t *)sti->sti_laddr_sa;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001184
1185 /*
1186 * Take advantage of the alignment
1187 * of sin_port and sin6_port which fall
1188 * in the same place in their data structures.
1189 * Just use sin_port for either address family.
1190 *
1191 * This may become a problem if (heaven forbid)
1192 * there's a separate ipv6port_reserved... :-P
1193 *
1194 * Binding to port 0 has the semantics of letting
1195 * the transport bind to any port.
1196 *
1197 * If the transport is TCP or UDP since we had sent
1198 * a T_BIND_REQ we would not get a port other than
1199 * what we asked for.
1200 */
1201 if (tcp_udp_xport) {
1202 /*
1203 * Pick up the new port number if we bound to
1204 * port 0.
1205 */
1206 if (aname->sin_port == 0)
1207 aname->sin_port = rname->sin_port;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001208 sti->sti_laddr_valid = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001209 break;
1210 }
1211 if (aname->sin_port != 0 &&
1212 aname->sin_port != rname->sin_port) {
1213 freemsg(mp);
1214 error = EADDRINUSE;
1215 eprintsoline(so, error);
1216 goto done;
1217 }
1218 /*
1219 * Pick up the new port number if we bound to port 0.
1220 */
1221 aname->sin_port = rname->sin_port;
1222
1223 /*
1224 * Unfortunately, addresses aren't _quite_ the same.
1225 */
1226 if (so->so_family == AF_INET) {
1227 if (aname->sin_addr.s_addr !=
1228 rname->sin_addr.s_addr) {
1229 freemsg(mp);
1230 error = EADDRNOTAVAIL;
1231 eprintsoline(so, error);
1232 goto done;
1233 }
1234 } else {
1235 sin6_t *rname6 = (sin6_t *)rname;
1236 sin6_t *aname6 = (sin6_t *)aname;
1237
1238 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr,
1239 &rname6->sin6_addr)) {
1240 freemsg(mp);
1241 error = EADDRNOTAVAIL;
1242 eprintsoline(so, error);
1243 goto done;
1244 }
1245 }
1246 break;
1247 }
1248 case AF_UNIX:
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001249 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001250 freemsg(mp);
1251 error = EADDRINUSE;
1252 eprintsoline(so, error);
1253 eprintso(so,
nordmarkfc80c0d2007-10-11 22:57:36 -07001254 ("addrlen %d, addr 0x%x, vp %p\n",
1255 addrlen, *((int *)addr),
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001256 (void *)sti->sti_ux_bound_vp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001257 goto done;
1258 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001259 sti->sti_laddr_valid = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001260 break;
1261 default:
1262 /*
1263 * NOTE: This assumes that addresses can be
1264 * byte-compared for equivalence.
1265 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001266 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001267 freemsg(mp);
1268 error = EADDRINUSE;
1269 eprintsoline(so, error);
1270 goto done;
1271 }
1272 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001273 * Don't mark sti_laddr_valid, as we cannot be
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001274 * sure that the returned address is the real
1275 * bound address when talking to an unknown
1276 * transport.
1277 */
1278 break;
1279 }
1280 } else {
1281 /*
1282 * Save for returned address for getsockname.
1283 * Needed for unspecific bind unless transport supports
1284 * the TI_GETMYNAME ioctl.
1285 * Do this for AF_INET{,6} even though they do, as
1286 * caching info here is much better performance than
1287 * a TPI/STREAMS trip to the transport for getsockname.
1288 * Any which can't for some reason _must_ _not_ set
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001289 * sti_laddr_valid here for the caching version of
1290 * getsockname to not break;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001291 */
1292 switch (so->so_family) {
1293 case AF_UNIX:
1294 /*
1295 * Record the address bound with the transport
1296 * for use by socketpair.
1297 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001298 bcopy(addr, &sti->sti_ux_laddr, addrlen);
1299 sti->sti_laddr_valid = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001300 break;
1301 case AF_INET:
1302 case AF_INET6:
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001303 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
1304 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
1305 sti->sti_laddr_valid = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001306 break;
1307 default:
1308 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001309 * Don't mark sti_laddr_valid, as we cannot be
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001310 * sure that the returned address is the real
1311 * bound address when talking to an unknown
1312 * transport.
1313 */
1314 break;
1315 }
1316 }
1317
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001318 freemsg(mp);
1319
1320done:
1321 if (error) {
1322 /* reset state & backlog to values held on entry */
1323 if (clear_acceptconn_on_err == B_TRUE)
1324 so->so_state &= ~SS_ACCEPTCONN;
1325 if (restore_backlog_on_err == B_TRUE)
1326 so->so_backlog = save_so_backlog;
1327
1328 if (unbind_on_err && so->so_state & SS_ISBOUND) {
1329 int err;
1330
1331 err = sotpi_unbind(so, 0);
1332 /* LINTED - statement has no consequent: if */
1333 if (err) {
1334 eprintsoline(so, error);
1335 } else {
1336 ASSERT(!(so->so_state & SS_ISBOUND));
1337 }
1338 }
1339 }
1340 if (!(flags & _SOBIND_LOCK_HELD)) {
1341 so_unlock_single(so, SOLOCKED);
1342 mutex_exit(&so->so_lock);
1343 } else {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001344 ASSERT(MUTEX_HELD(&so->so_lock));
1345 ASSERT(so->so_flag & SOLOCKED);
1346 }
1347 return (error);
1348}
1349
1350/* bind the socket */
masputraff550d02005-10-22 22:50:14 -07001351static int
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001352sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001353 int flags, struct cred *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001354{
1355 if ((flags & _SOBIND_SOCKETPAIR) == 0)
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001356 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001357
1358 flags &= ~_SOBIND_SOCKETPAIR;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001359 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001360}
1361
1362/*
1363 * Unbind a socket - used when bind() fails, when bind() specifies a NULL
1364 * address, or when listen needs to unbind and bind.
1365 * If the _SOUNBIND_REBIND flag is specified the addresses are retained
1366 * so that a sobind can pick them up.
1367 */
1368static int
1369sotpi_unbind(struct sonode *so, int flags)
1370{
1371 struct T_unbind_req unbind_req;
1372 int error = 0;
1373 mblk_t *mp;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001374 sotpi_info_t *sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001375
1376 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
rh87107903a11e2008-07-31 15:02:18 -07001377 (void *)so, flags, pr_state(so->so_state, so->so_mode)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001378
1379 ASSERT(MUTEX_HELD(&so->so_lock));
1380 ASSERT(so->so_flag & SOLOCKED);
1381
1382 if (!(so->so_state & SS_ISBOUND)) {
1383 error = EINVAL;
1384 eprintsoline(so, error);
1385 goto done;
1386 }
1387
1388 mutex_exit(&so->so_lock);
1389
1390 /*
1391 * Flush the read and write side (except stream head read queue)
1392 * and send down T_UNBIND_REQ.
1393 */
1394 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1395
1396 unbind_req.PRIM_type = T_UNBIND_REQ;
1397 mp = soallocproto1(&unbind_req, sizeof (unbind_req),
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001398 0, _ALLOC_SLEEP, CRED());
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001399 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
nordmarkfc80c0d2007-10-11 22:57:36 -07001400 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001401 mutex_enter(&so->so_lock);
1402 if (error) {
1403 eprintsoline(so, error);
1404 goto done;
1405 }
1406
1407 error = sowaitokack(so, T_UNBIND_REQ);
1408 if (error) {
1409 eprintsoline(so, error);
1410 goto done;
1411 }
1412
1413 /*
1414 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1415 * strsock_proto while the lock was dropped above, the unbind
1416 * is allowed to complete.
1417 */
1418 if (!(flags & _SOUNBIND_REBIND)) {
1419 /*
1420 * Clear out bound address.
1421 */
1422 vnode_t *vp;
1423
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001424 if ((vp = sti->sti_ux_bound_vp) != NULL) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001425 sti->sti_ux_bound_vp = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001426 vn_rele_stream(vp);
1427 }
1428 /* Clear out address */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001429 sti->sti_laddr_len = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001430 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001431 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
1432 sti->sti_laddr_valid = 0;
brutus2c9e4292006-05-12 16:45:05 -07001433
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001434done:
kaisc28749e2005-11-12 18:58:05 -08001435
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001436 /* If the caller held the lock don't release it here */
1437 ASSERT(MUTEX_HELD(&so->so_lock));
1438 ASSERT(so->so_flag & SOLOCKED);
1439
1440 return (error);
1441}
1442
1443/*
1444 * listen on the socket.
1445 * For TPI conforming transports this has to first unbind with the transport
1446 * and then bind again using the new backlog.
1447 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001448/* ARGSUSED */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001449int
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001450sotpi_listen(struct sonode *so, int backlog, struct cred *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001451{
1452 int error = 0;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001453 sotpi_info_t *sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001454
1455 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
rh87107903a11e2008-07-31 15:02:18 -07001456 (void *)so, backlog, pr_state(so->so_state, so->so_mode)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001457
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001458 if (sti->sti_serv_type == T_CLTS)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001459 return (EOPNOTSUPP);
1460
1461 /*
1462 * If the socket is ready to accept connections already, then
1463 * return without doing anything. This avoids a problem where
1464 * a second listen() call fails if a connection is pending and
1465 * leaves the socket unbound. Only when we are not unbinding
1466 * with the transport can we safely increase the backlog.
1467 */
1468 if (so->so_state & SS_ACCEPTCONN &&
1469 !((so->so_family == AF_INET || so->so_family == AF_INET6) &&
nordmarkfc80c0d2007-10-11 22:57:36 -07001470 /*CONSTCOND*/
1471 !solisten_tpi_tcp))
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001472 return (0);
1473
1474 if (so->so_state & SS_ISCONNECTED)
1475 return (EINVAL);
1476
1477 mutex_enter(&so->so_lock);
1478 so_lock_single(so); /* Set SOLOCKED */
1479
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001480 /*
1481 * If the listen doesn't change the backlog we do nothing.
1482 * This avoids an EPROTO error from the transport.
1483 */
1484 if ((so->so_state & SS_ACCEPTCONN) &&
1485 so->so_backlog == backlog)
1486 goto done;
1487
1488 if (!(so->so_state & SS_ISBOUND)) {
1489 /*
1490 * Must have been explicitly bound in the UNIX domain.
1491 */
1492 if (so->so_family == AF_UNIX) {
1493 error = EINVAL;
1494 goto done;
1495 }
1496 error = sotpi_bindlisten(so, NULL, 0, backlog,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001497 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001498 } else if (backlog > 0) {
1499 /*
1500 * AF_INET{,6} hack to avoid losing the port.
1501 * Assumes that all AF_INET{,6} transports can handle a
1502 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
1503 * has already bound thus it is possible to avoid the unbind.
1504 */
1505 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1506 /*CONSTCOND*/
1507 !solisten_tpi_tcp)) {
1508 error = sotpi_unbind(so, _SOUNBIND_REBIND);
1509 if (error)
1510 goto done;
1511 }
1512 error = sotpi_bindlisten(so, NULL, 0, backlog,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001513 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001514 } else {
1515 so->so_state |= SS_ACCEPTCONN;
1516 so->so_backlog = backlog;
1517 }
1518 if (error)
1519 goto done;
1520 ASSERT(so->so_state & SS_ACCEPTCONN);
1521done:
1522 so_unlock_single(so, SOLOCKED);
1523 mutex_exit(&so->so_lock);
1524 return (error);
1525}
1526
1527/*
1528 * Disconnect either a specified seqno or all (-1).
1529 * The former is used on listening sockets only.
1530 *
1531 * When seqno == -1 sodisconnect could call sotpi_unbind. However,
1532 * the current use of sodisconnect(seqno == -1) is only for shutdown
1533 * so there is no point (and potentially incorrect) to unbind.
1534 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001535static int
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001536sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
1537{
1538 struct T_discon_req discon_req;
1539 int error = 0;
1540 mblk_t *mp;
1541
1542 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
rh87107903a11e2008-07-31 15:02:18 -07001543 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001544
1545 if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1546 mutex_enter(&so->so_lock);
1547 so_lock_single(so); /* Set SOLOCKED */
1548 } else {
1549 ASSERT(MUTEX_HELD(&so->so_lock));
1550 ASSERT(so->so_flag & SOLOCKED);
1551 }
1552
1553 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) {
1554 error = EINVAL;
1555 eprintsoline(so, error);
1556 goto done;
1557 }
1558
1559 mutex_exit(&so->so_lock);
1560 /*
1561 * Flush the write side (unless this is a listener)
1562 * and then send down a T_DISCON_REQ.
1563 * (Don't flush on listener since it could flush {O_}T_CONN_RES
1564 * and other messages.)
1565 */
1566 if (!(so->so_state & SS_ACCEPTCONN))
1567 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW);
1568
1569 discon_req.PRIM_type = T_DISCON_REQ;
1570 discon_req.SEQ_number = seqno;
1571 mp = soallocproto1(&discon_req, sizeof (discon_req),
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001572 0, _ALLOC_SLEEP, CRED());
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001573 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
nordmarkfc80c0d2007-10-11 22:57:36 -07001574 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001575 mutex_enter(&so->so_lock);
1576 if (error) {
1577 eprintsoline(so, error);
1578 goto done;
1579 }
1580
1581 error = sowaitokack(so, T_DISCON_REQ);
1582 if (error) {
1583 eprintsoline(so, error);
1584 goto done;
1585 }
1586 /*
1587 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1588 * strsock_proto while the lock was dropped above, the disconnect
1589 * is allowed to complete. However, it is not possible to
1590 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
1591 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001592 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING);
1593 SOTOTPI(so)->sti_laddr_valid = 0;
1594 SOTOTPI(so)->sti_faddr_valid = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001595done:
1596 if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1597 so_unlock_single(so, SOLOCKED);
1598 mutex_exit(&so->so_lock);
1599 } else {
1600 /* If the caller held the lock don't release it here */
1601 ASSERT(MUTEX_HELD(&so->so_lock));
1602 ASSERT(so->so_flag & SOLOCKED);
1603 }
1604 return (error);
1605}
1606
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001607/* ARGSUSED */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001608int
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001609sotpi_accept(struct sonode *so, int fflag, struct cred *cr,
1610 struct sonode **nsop)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001611{
1612 struct T_conn_ind *conn_ind;
1613 struct T_conn_res *conn_res;
1614 int error = 0;
Anders Perssondd49f122010-06-17 17:23:59 -07001615 mblk_t *mp, *ack_mp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001616 struct sonode *nso;
1617 vnode_t *nvp;
1618 void *src;
1619 t_uscalar_t srclen;
1620 void *opt;
1621 t_uscalar_t optlen;
1622 t_scalar_t PRIM_type;
1623 t_scalar_t SEQ_number;
ja97890188658b2007-06-04 07:35:45 -07001624 size_t sinlen;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001625 sotpi_info_t *sti = SOTOTPI(so);
1626 sotpi_info_t *nsti;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001627
1628 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
rh87107903a11e2008-07-31 15:02:18 -07001629 (void *)so, fflag, (void *)nsop,
1630 pr_state(so->so_state, so->so_mode)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001631
1632 /*
1633 * Defer single-threading the accepting socket until
1634 * the T_CONN_IND has been received and parsed and the
1635 * new sonode has been opened.
1636 */
1637
1638 /* Check that we are not already connected */
1639 if ((so->so_state & SS_ACCEPTCONN) == 0)
1640 goto conn_bad;
1641again:
1642 if ((error = sowaitconnind(so, fflag, &mp)) != 0)
1643 goto e_bad;
1644
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001645 ASSERT(mp != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001646 conn_ind = (struct T_conn_ind *)mp->b_rptr;
kaisc28749e2005-11-12 18:58:05 -08001647
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001648 /*
1649 * Save SEQ_number for error paths.
1650 */
1651 SEQ_number = conn_ind->SEQ_number;
1652
1653 srclen = conn_ind->SRC_length;
1654 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1);
1655 if (src == NULL) {
1656 error = EPROTO;
1657 freemsg(mp);
1658 eprintsoline(so, error);
1659 goto disconnect_unlocked;
1660 }
1661 optlen = conn_ind->OPT_length;
1662 switch (so->so_family) {
1663 case AF_INET:
1664 case AF_INET6:
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001665 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001666 bcopy(mp->b_rptr + conn_ind->OPT_offset,
1667 &opt, conn_ind->OPT_length);
1668 } else {
1669 /*
1670 * The transport (in this case TCP) hasn't sent up
1671 * a pointer to an instance for the accept fast-path.
1672 * Disable fast-path completely because the call to
1673 * sotpi_create() below would otherwise create an
1674 * incomplete TCP instance, which would lead to
1675 * problems when sockfs sends a normal T_CONN_RES
1676 * message down the new stream.
1677 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001678 if (sti->sti_direct) {
masputraff550d02005-10-22 22:50:14 -07001679 int rval;
1680 /*
1681 * For consistency we inform tcp to disable
1682 * direct interface on the listener, though
1683 * we can certainly live without doing this
1684 * because no data will ever travel upstream
1685 * on the listening socket.
1686 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001687 sti->sti_direct = 0;
masputraff550d02005-10-22 22:50:14 -07001688 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK,
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001689 0, 0, K_TO_K, cr, &rval);
masputraff550d02005-10-22 22:50:14 -07001690 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001691 opt = NULL;
1692 optlen = 0;
1693 }
1694 break;
1695 case AF_UNIX:
1696 default:
1697 if (optlen != 0) {
1698 opt = sogetoff(mp, conn_ind->OPT_offset, optlen,
1699 __TPI_ALIGN_SIZE);
1700 if (opt == NULL) {
1701 error = EPROTO;
1702 freemsg(mp);
1703 eprintsoline(so, error);
1704 goto disconnect_unlocked;
1705 }
1706 }
1707 if (so->so_family == AF_UNIX) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001708 if (!sti->sti_faddr_noxlate) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001709 src = NULL;
1710 srclen = 0;
1711 }
1712 /* Extract src address from options */
1713 if (optlen != 0)
1714 so_getopt_srcaddr(opt, optlen, &src, &srclen);
1715 }
1716 break;
1717 }
1718
1719 /*
1720 * Create the new socket.
1721 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001722 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001723 if (nso == NULL) {
1724 ASSERT(error != 0);
1725 /*
1726 * Accept can not fail with ENOBUFS. sotpi_create
1727 * sleeps waiting for memory until a signal is caught
1728 * so return EINTR.
1729 */
1730 freemsg(mp);
1731 if (error == ENOBUFS)
1732 error = EINTR;
1733 goto e_disc_unl;
1734 }
1735 nvp = SOTOV(nso);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001736 nsti = SOTOTPI(nso);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001737
1738#ifdef DEBUG
1739 /*
1740 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
1741 * it's inherited early to allow debugging of the accept code itself.
1742 */
1743 nso->so_options |= so->so_options & SO_DEBUG;
1744#endif /* DEBUG */
1745
1746 /*
1747 * Save the SRC address from the T_CONN_IND
1748 * for getpeername to work on AF_UNIX and on transports that do not
1749 * support TI_GETPEERNAME.
1750 *
1751 * NOTE: AF_UNIX NUL termination is ensured by the sender's
1752 * copyin_name().
1753 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001754 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001755 error = EINVAL;
1756 freemsg(mp);
1757 eprintsoline(so, error);
1758 goto disconnect_vp_unlocked;
1759 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001760 nsti->sti_faddr_len = (socklen_t)srclen;
1761 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
1762 bcopy(src, nsti->sti_faddr_sa, srclen);
1763 nsti->sti_faddr_valid = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001764
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001765 /*
1766 * Record so_peercred and so_cpid from a cred in the T_CONN_IND.
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001767 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001768 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
1769 (sizeof (struct T_conn_res) + sizeof (intptr_t))) {
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001770 cred_t *cr;
1771 pid_t cpid;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001772
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001773 cr = msg_getcred(mp, &cpid);
1774 if (cr != NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001775 crhold(cr);
1776 nso->so_peercred = cr;
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001777 nso->so_cpid = cpid;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001778 }
1779 freemsg(mp);
1780
1781 mp = soallocproto1(NULL, sizeof (struct T_conn_res) +
Anders Perssond4f98ef2009-03-02 22:33:16 -08001782 sizeof (intptr_t), 0, _ALLOC_INTR, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001783 if (mp == NULL) {
1784 /*
1785 * Accept can not fail with ENOBUFS.
1786 * A signal was caught so return EINTR.
1787 */
1788 error = EINTR;
1789 eprintsoline(so, error);
1790 goto disconnect_vp_unlocked;
1791 }
1792 conn_res = (struct T_conn_res *)mp->b_rptr;
1793 } else {
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001794 /*
1795 * For efficency reasons we use msg_extractcred; no crhold
1796 * needed since db_credp is cleared (i.e., we move the cred
1797 * from the message to so_peercred.
1798 */
1799 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001800
1801 mp->b_rptr = DB_BASE(mp);
1802 conn_res = (struct T_conn_res *)mp->b_rptr;
1803 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res);
Anders Perssond4f98ef2009-03-02 22:33:16 -08001804
1805 mblk_setcred(mp, cr, curproc->p_pid);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001806 }
1807
1808 /*
1809 * New socket must be bound at least in sockfs and, except for AF_INET,
1810 * (or AF_INET6) it also has to be bound in the transport provider.
ja97890188658b2007-06-04 07:35:45 -07001811 * We set the local address in the sonode from the T_OK_ACK of the
1812 * T_CONN_RES. For this reason the address we bind to here isn't
1813 * important.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001814 */
1815 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) &&
1816 /*CONSTCOND*/
1817 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) {
1818 /*
1819 * Optimization for AF_INET{,6} transports
1820 * that can handle a T_CONN_RES without being bound.
1821 */
1822 mutex_enter(&nso->so_lock);
1823 so_automatic_bind(nso);
1824 mutex_exit(&nso->so_lock);
1825 } else {
1826 /* Perform NULL bind with the transport provider. */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001827 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC,
1828 cr)) != 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001829 ASSERT(error != ENOBUFS);
1830 freemsg(mp);
1831 eprintsoline(nso, error);
1832 goto disconnect_vp_unlocked;
1833 }
1834 }
1835
1836 /*
1837 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
1838 * so that any data arriving on the new socket will cause the
1839 * appropriate signals to be delivered for the new socket.
1840 *
1841 * No other thread (except strsock_proto and strsock_misc)
1842 * can access the new socket thus we relax the locking.
1843 */
1844 nso->so_pgrp = so->so_pgrp;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001845 nso->so_state |= so->so_state & SS_ASYNC;
1846 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001847
1848 if (nso->so_pgrp != 0) {
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001849 if ((error = so_set_events(nso, nvp, cr)) != 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001850 eprintsoline(nso, error);
1851 error = 0;
1852 nso->so_pgrp = 0;
1853 }
1854 }
1855
1856 /*
1857 * Make note of the socket level options. TCP and IP level options
1858 * are already inherited. We could do all this after accept is
1859 * successful but doing it here simplifies code and no harm done
1860 * for error case.
1861 */
1862 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE|
1863 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
1864 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
1865 nso->so_sndbuf = so->so_sndbuf;
1866 nso->so_rcvbuf = so->so_rcvbuf;
1867 if (nso->so_options & SO_LINGER)
1868 nso->so_linger = so->so_linger;
1869
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001870 /*
1871 * Note that the following sti_direct code path should be
1872 * removed once we are confident that the direct sockets
1873 * do not result in any degradation.
1874 */
1875 if (sti->sti_direct) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001876
1877 ASSERT(opt != NULL);
1878
1879 conn_res->OPT_length = optlen;
1880 conn_res->OPT_offset = MBLKL(mp);
1881 bcopy(&opt, mp->b_wptr, optlen);
1882 mp->b_wptr += optlen;
1883 conn_res->PRIM_type = T_CONN_RES;
1884 conn_res->ACCEPTOR_id = 0;
1885 PRIM_type = T_CONN_RES;
1886
1887 /* Send down the T_CONN_RES on acceptor STREAM */
1888 error = kstrputmsg(SOTOV(nso), mp, NULL,
1889 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1890 if (error) {
1891 mutex_enter(&so->so_lock);
1892 so_lock_single(so);
1893 eprintsoline(so, error);
1894 goto disconnect_vp;
1895 }
1896 mutex_enter(&nso->so_lock);
1897 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK,
1898 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
1899 if (error) {
1900 mutex_exit(&nso->so_lock);
1901 mutex_enter(&so->so_lock);
1902 so_lock_single(so);
1903 eprintsoline(so, error);
1904 goto disconnect_vp;
1905 }
1906 if (nso->so_family == AF_INET) {
1907 sin_t *sin;
1908
1909 sin = (sin_t *)(ack_mp->b_rptr +
1910 sizeof (struct T_ok_ack));
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001911 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t));
1912 nsti->sti_laddr_len = sizeof (sin_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001913 } else {
1914 sin6_t *sin6;
1915
1916 sin6 = (sin6_t *)(ack_mp->b_rptr +
1917 sizeof (struct T_ok_ack));
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001918 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t));
1919 nsti->sti_laddr_len = sizeof (sin6_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001920 }
1921 freemsg(ack_mp);
1922
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001923 nso->so_state |= SS_ISCONNECTED;
1924 nso->so_proto_handle = (sock_lower_handle_t)opt;
1925 nsti->sti_laddr_valid = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001926
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001927 mutex_exit(&nso->so_lock);
1928
1929 /*
ja978907d6c0352006-09-27 01:18:55 -07001930 * It's possible, through the use of autopush for example,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001931 * that the acceptor stream may not support sti_direct
1932 * semantics. If the new socket does not support sti_direct
ja978907d6c0352006-09-27 01:18:55 -07001933 * we issue a _SIOCSOCKFALLBACK to inform the transport
1934 * as we would in the I_PUSH case.
1935 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001936 if (nsti->sti_direct == 0) {
ja978907d6c0352006-09-27 01:18:55 -07001937 int rval;
1938
1939 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK,
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001940 0, 0, K_TO_K, cr, &rval)) != 0) {
ja978907d6c0352006-09-27 01:18:55 -07001941 mutex_enter(&so->so_lock);
1942 so_lock_single(so);
1943 eprintsoline(so, error);
1944 goto disconnect_vp;
1945 }
1946 }
1947
1948 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001949 * Pass out new socket.
1950 */
1951 if (nsop != NULL)
1952 *nsop = nso;
1953
1954 return (0);
1955 }
1956
1957 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001958 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
1959 * which don't support the FireEngine accept fast-path. It is also
1960 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
1961 * again. Neither sockfs nor TCP attempt to find out if some other
1962 * random module has been inserted in between (in which case we
1963 * should follow TLI accept behaviour). We blindly assume the worst
1964 * case and revert back to old behaviour i.e. TCP will not send us
1965 * any option (eager) and the accept should happen on the listener
1966 * queue. Any queued T_conn_ind have already got their options removed
1967 * by so_sock2_stream() when "sockmod" was I_POP'd.
1968 */
1969 /*
1970 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
1971 */
1972 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) {
1973#ifdef _ILP32
1974 queue_t *q;
1975
1976 /*
1977 * Find read queue in driver
1978 * Can safely do this since we "own" nso/nvp.
1979 */
1980 q = strvp2wq(nvp)->q_next;
1981 while (SAMESTR(q))
1982 q = q->q_next;
1983 q = RD(q);
1984 conn_res->ACCEPTOR_id = (t_uscalar_t)q;
1985#else
1986 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev);
1987#endif /* _ILP32 */
1988 conn_res->PRIM_type = O_T_CONN_RES;
1989 PRIM_type = O_T_CONN_RES;
1990 } else {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001991 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001992 conn_res->PRIM_type = T_CONN_RES;
1993 PRIM_type = T_CONN_RES;
1994 }
1995 conn_res->SEQ_number = SEQ_number;
1996 conn_res->OPT_length = 0;
1997 conn_res->OPT_offset = 0;
1998
1999 mutex_enter(&so->so_lock);
2000 so_lock_single(so); /* Set SOLOCKED */
2001 mutex_exit(&so->so_lock);
2002
2003 error = kstrputmsg(SOTOV(so), mp, NULL,
2004 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
2005 mutex_enter(&so->so_lock);
2006 if (error) {
2007 eprintsoline(so, error);
2008 goto disconnect_vp;
2009 }
ja97890188658b2007-06-04 07:35:45 -07002010 error = sowaitprim(so, PRIM_type, T_OK_ACK,
2011 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002012 if (error) {
2013 eprintsoline(so, error);
2014 goto disconnect_vp;
2015 }
George Shepherd881776c2010-08-11 12:59:10 -07002016 mutex_exit(&so->so_lock);
ja97890188658b2007-06-04 07:35:45 -07002017 /*
2018 * If there is a sin/sin6 appended onto the T_OK_ACK use
2019 * that to set the local address. If this is not present
2020 * then we zero out the address and don't set the
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002021 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over
ja978906e81d8d2007-07-17 14:35:07 -07002022 * the pathname from the listening socket.
George Shepherd881776c2010-08-11 12:59:10 -07002023 * In the case where this is TCP or an AF_UNIX socket the
2024 * client side may have queued data or a T_ORDREL in the
2025 * transport. Having now sent the T_CONN_RES we may receive
2026 * those queued messages at any time. Hold the acceptor
2027 * so_lock until its state and laddr are finalized.
ja97890188658b2007-06-04 07:35:45 -07002028 */
George Shepherd881776c2010-08-11 12:59:10 -07002029 mutex_enter(&nso->so_lock);
ja97890188658b2007-06-04 07:35:45 -07002030 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t);
2031 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) &&
2032 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) {
2033 ack_mp->b_rptr += sizeof (struct T_ok_ack);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002034 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen);
2035 nsti->sti_laddr_len = sinlen;
2036 nsti->sti_laddr_valid = 1;
ja978906e81d8d2007-07-17 14:35:07 -07002037 } else if (nso->so_family == AF_UNIX) {
2038 ASSERT(so->so_family == AF_UNIX);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002039 nsti->sti_laddr_len = sti->sti_laddr_len;
2040 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
2041 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa,
2042 nsti->sti_laddr_len);
2043 nsti->sti_laddr_valid = 1;
ja97890188658b2007-06-04 07:35:45 -07002044 } else {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002045 nsti->sti_laddr_len = sti->sti_laddr_len;
2046 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
2047 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size);
2048 nsti->sti_laddr_sa->sa_family = nso->so_family;
ja97890188658b2007-06-04 07:35:45 -07002049 }
George Shepherd881776c2010-08-11 12:59:10 -07002050 nso->so_state |= SS_ISCONNECTED;
2051 mutex_exit(&nso->so_lock);
2052
ja97890188658b2007-06-04 07:35:45 -07002053 freemsg(ack_mp);
2054
George Shepherd881776c2010-08-11 12:59:10 -07002055 mutex_enter(&so->so_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002056 so_unlock_single(so, SOLOCKED);
2057 mutex_exit(&so->so_lock);
2058
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002059 /*
2060 * Pass out new socket.
2061 */
2062 if (nsop != NULL)
2063 *nsop = nso;
2064
2065 return (0);
2066
2067
2068eproto_disc_unl:
2069 error = EPROTO;
2070e_disc_unl:
2071 eprintsoline(so, error);
2072 goto disconnect_unlocked;
2073
2074pr_disc_vp_unl:
2075 eprintsoline(so, error);
2076disconnect_vp_unlocked:
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08002077 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002078 VN_RELE(nvp);
2079disconnect_unlocked:
2080 (void) sodisconnect(so, SEQ_number, 0);
2081 return (error);
2082
2083pr_disc_vp:
2084 eprintsoline(so, error);
2085disconnect_vp:
2086 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD);
2087 so_unlock_single(so, SOLOCKED);
2088 mutex_exit(&so->so_lock);
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08002089 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002090 VN_RELE(nvp);
2091 return (error);
2092
2093conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */
2094 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW)
2095 ? EOPNOTSUPP : EINVAL;
2096e_bad:
2097 eprintsoline(so, error);
2098 return (error);
2099}
2100
2101/*
2102 * connect a socket.
2103 *
2104 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
2105 * unconnect (by specifying a null address).
2106 */
2107int
2108sotpi_connect(struct sonode *so,
Gordon Rossf012ee02016-11-17 22:13:10 -05002109 struct sockaddr *name,
2110 socklen_t namelen,
2111 int fflag,
2112 int flags,
2113 struct cred *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002114{
2115 struct T_conn_req conn_req;
2116 int error = 0;
2117 mblk_t *mp;
2118 void *src;
2119 socklen_t srclen;
2120 void *addr;
2121 socklen_t addrlen;
2122 boolean_t need_unlock;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002123 sotpi_info_t *sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002124
2125 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
rh87107903a11e2008-07-31 15:02:18 -07002126 (void *)so, (void *)name, namelen, fflag, flags,
nordmarkfc80c0d2007-10-11 22:57:36 -07002127 pr_state(so->so_state, so->so_mode)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002128
2129 /*
2130 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
2131 * avoid sleeping for memory with SOLOCKED held.
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002132 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002133 * + sizeof (struct T_opthdr).
2134 * (the AF_UNIX so_ux_addr_xlate() does not make the address
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002135 * exceed sti_faddr_maxlen).
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002136 */
2137 mp = soallocproto(sizeof (struct T_conn_req) +
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08002138 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR,
2139 cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002140 if (mp == NULL) {
2141 /*
2142 * Connect can not fail with ENOBUFS. A signal was
2143 * caught so return EINTR.
2144 */
2145 error = EINTR;
2146 eprintsoline(so, error);
2147 return (error);
2148 }
2149
2150 mutex_enter(&so->so_lock);
2151 /*
jprakashba3431d2007-12-14 11:26:33 -08002152 * Make sure there is a preallocated T_unbind_req message
2153 * before any binding. This message is allocated when the
2154 * socket is created. Since another thread can consume
2155 * so_unbind_mp by the time we return from so_lock_single(),
2156 * we should check the availability of so_unbind_mp after
2157 * we return from so_lock_single().
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002158 */
jprakashba3431d2007-12-14 11:26:33 -08002159
2160 so_lock_single(so); /* Set SOLOCKED */
2161 need_unlock = B_TRUE;
2162
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002163 if (sti->sti_unbind_mp == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002164 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
2165 /* NOTE: holding so_lock while sleeping */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002166 sti->sti_unbind_mp =
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08002167 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002168 if (sti->sti_unbind_mp == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002169 error = EINTR;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002170 goto done;
2171 }
2172 }
2173
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002174 /*
2175 * Can't have done a listen before connecting.
2176 */
2177 if (so->so_state & SS_ACCEPTCONN) {
2178 error = EOPNOTSUPP;
2179 goto done;
2180 }
2181
2182 /*
2183 * Must be bound with the transport
2184 */
2185 if (!(so->so_state & SS_ISBOUND)) {
2186 if ((so->so_family == AF_INET || so->so_family == AF_INET6) &&
2187 /*CONSTCOND*/
2188 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) {
2189 /*
2190 * Optimization for AF_INET{,6} transports
2191 * that can handle a T_CONN_REQ without being bound.
2192 */
2193 so_automatic_bind(so);
2194 } else {
2195 error = sotpi_bind(so, NULL, 0,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002196 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002197 if (error)
2198 goto done;
2199 }
2200 ASSERT(so->so_state & SS_ISBOUND);
2201 flags |= _SOCONNECT_DID_BIND;
2202 }
2203
2204 /*
2205 * Handle a connect to a name parameter of type AF_UNSPEC like a
2206 * connect to a null address. This is the portable method to
2207 * unconnect a socket.
2208 */
2209 if ((namelen >= sizeof (sa_family_t)) &&
2210 (name->sa_family == AF_UNSPEC)) {
2211 name = NULL;
2212 namelen = 0;
2213 }
2214
2215 /*
2216 * Check that we are not already connected.
2217 * A connection-oriented socket cannot be reconnected.
2218 * A connected connection-less socket can be
2219 * - connected to a different address by a subsequent connect
2220 * - "unconnected" by a connect to the NULL address
2221 */
2222 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
2223 ASSERT(!(flags & _SOCONNECT_DID_BIND));
2224 if (so->so_mode & SM_CONNREQUIRED) {
2225 /* Connection-oriented socket */
2226 error = so->so_state & SS_ISCONNECTED ?
2227 EISCONN : EALREADY;
2228 goto done;
2229 }
2230 /* Connection-less socket */
2231 if (name == NULL) {
2232 /*
2233 * Remove the connected state and clear SO_DGRAM_ERRIND
2234 * since it was set when the socket was connected.
2235 * If this is UDP also send down a T_DISCON_REQ.
2236 */
2237 int val;
2238
2239 if ((so->so_family == AF_INET ||
nordmarkfc80c0d2007-10-11 22:57:36 -07002240 so->so_family == AF_INET6) &&
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002241 (so->so_type == SOCK_DGRAM ||
nordmarkfc80c0d2007-10-11 22:57:36 -07002242 so->so_type == SOCK_RAW) &&
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002243 /*CONSTCOND*/
2244 !soconnect_tpi_udp) {
2245 /* XXX What about implicitly unbinding here? */
2246 error = sodisconnect(so, -1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002247 _SODISCONNECT_LOCK_HELD);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002248 } else {
2249 so->so_state &=
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002250 ~(SS_ISCONNECTED | SS_ISCONNECTING);
2251 sti->sti_faddr_valid = 0;
2252 sti->sti_faddr_len = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002253 }
2254
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002255 /* Remove SOLOCKED since setsockopt will grab it */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002256 so_unlock_single(so, SOLOCKED);
2257 mutex_exit(&so->so_lock);
2258
2259 val = 0;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002260 (void) sotpi_setsockopt(so, SOL_SOCKET,
2261 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val),
2262 cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002263
2264 mutex_enter(&so->so_lock);
2265 so_lock_single(so); /* Set SOLOCKED */
2266 goto done;
2267 }
2268 }
2269 ASSERT(so->so_state & SS_ISBOUND);
2270
2271 if (name == NULL || namelen == 0) {
2272 error = EINVAL;
2273 goto done;
2274 }
2275 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002276 * Mark the socket if sti_faddr_sa represents the transport level
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002277 * address.
2278 */
2279 if (flags & _SOCONNECT_NOXLATE) {
2280 struct sockaddr_ux *soaddr_ux;
2281
2282 ASSERT(so->so_family == AF_UNIX);
2283 if (namelen != sizeof (struct sockaddr_ux)) {
2284 error = EINVAL;
2285 goto done;
2286 }
2287 soaddr_ux = (struct sockaddr_ux *)name;
2288 name = (struct sockaddr *)&soaddr_ux->sou_addr;
2289 namelen = sizeof (soaddr_ux->sou_addr);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002290 sti->sti_faddr_noxlate = 1;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002291 }
2292
2293 /*
2294 * Length and family checks.
2295 */
2296 error = so_addr_verify(so, name, namelen);
2297 if (error)
2298 goto bad;
2299
2300 /*
2301 * Save foreign address. Needed for AF_UNIX as well as
2302 * transport providers that do not support TI_GETPEERNAME.
2303 * Also used for cached foreign address for TCP and UDP.
2304 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08002305 if (namelen > (t_uscalar_t)