blob: 3262150f797099831e857db176521382338aa062 [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
rshoaib2caf0dc2006-03-05 18:00:39 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
rshoaib2caf0dc2006-03-05 18:00:39 -080021
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070022/*
Anders Persson3e95bd42010-06-17 17:22:09 -070023 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
Gordon Rossf012ee02016-11-17 22:13:10 -050024 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
Bryan Cantrill8e935252014-10-04 09:02:58 +000025 * Copyright 2015, Joyent, Inc. All rights reserved.
Andy Fiddamand865fc92020-06-05 14:22:45 +000026 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
Garrett D'Amore15f90b02022-07-03 19:05:50 -070027 * Copyright 2022 Garrett D'Amore
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070028 */
29
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070030#include <sys/types.h>
31#include <sys/t_lock.h>
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/buf.h>
35#include <sys/conf.h>
36#include <sys/cred.h>
37#include <sys/kmem.h>
38#include <sys/sysmacros.h>
39#include <sys/vfs.h>
rsbaa59c4c2007-03-26 17:41:06 -070040#include <sys/vfs_opreg.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070041#include <sys/vnode.h>
42#include <sys/debug.h>
43#include <sys/errno.h>
44#include <sys/time.h>
45#include <sys/file.h>
46#include <sys/open.h>
47#include <sys/user.h>
48#include <sys/termios.h>
49#include <sys/stream.h>
50#include <sys/strsubr.h>
51#include <sys/strsun.h>
52#include <sys/esunddi.h>
53#include <sys/flock.h>
54#include <sys/modctl.h>
55#include <sys/cmn_err.h>
56#include <sys/mkdev.h>
57#include <sys/pathname.h>
58#include <sys/ddi.h>
59#include <sys/stat.h>
60#include <sys/fs/snode.h>
61#include <sys/fs/dv_node.h>
62#include <sys/zone.h>
63
64#include <sys/socket.h>
65#include <sys/socketvar.h>
66#include <netinet/in.h>
67#include <sys/un.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070068#include <sys/ucred.h>
69
70#include <sys/tiuser.h>
71#define _SUN_TPI_VERSION 2
72#include <sys/tihdr.h>
73
74#include <c2/audit.h>
75
Yu Xiangning0f1702c2008-12-11 20:04:13 -080076#include <fs/sockfs/sockcommon.h>
Anders Persson3e95bd42010-06-17 17:22:09 -070077#include <fs/sockfs/sockfilter_impl.h>
Yu Xiangning0f1702c2008-12-11 20:04:13 -080078#include <fs/sockfs/socktpi.h>
79#include <fs/sockfs/socktpi_impl.h>
Anders Perssonbbc000e2009-04-28 12:10:59 -070080#include <fs/sockfs/sodirect.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070081
82/*
83 * Macros that operate on struct cmsghdr.
84 * The CMSG_VALID macro does not assume that the last option buffer is padded.
85 */
86#define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
87#define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
88#define CMSG_VALID(cmsg, start, end) \
89 (ISALIGNED_cmsghdr(cmsg) && \
90 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
91 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
92 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
93 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
94#define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
95
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070096dev_t sockdev; /* For fsid in getattr */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070097
98struct socklist socklist;
99
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800100struct kmem_cache *socket_cache;
101
Anders Persson3e95bd42010-06-17 17:22:09 -0700102/*
103 * sockconf_lock protects the socket configuration (socket types and
104 * socket filters) which is changed via the sockconfig system call.
105 */
106krwlock_t sockconf_lock;
107
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700108static int sockfs_update(kstat_t *, int);
109static int sockfs_snapshot(kstat_t *, void *, int);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800110extern smod_info_t *sotpi_smod_create(void);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700111
112extern void sendfile_init();
113
Jack Meng6cefaae2008-11-22 07:33:57 +0800114extern int modrootloaded;
115
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700116/*
117 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
118 * Returns with the vnode held.
119 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800120int
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700121sogetvp(char *devpath, vnode_t **vpp, int uioflag)
122{
123 struct snode *csp;
124 vnode_t *vp, *dvp;
125 major_t maj;
126 int error;
127
128 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800129
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700130 /*
131 * Lookup the underlying filesystem vnode.
132 */
133 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
134 if (error)
135 return (error);
136
137 /* Check that it is the correct vnode */
138 if (vp->v_type != VCHR) {
139 VN_RELE(vp);
140 return (ENOTSOCK);
141 }
142
143 /*
144 * If devpath went through devfs, the device should already
145 * be configured. If devpath is a mknod file, however, we
146 * need to make sure the device is properly configured.
147 * To do this, we do something similar to spec_open()
148 * except that we resolve to the minor/leaf level since
149 * we need to return a vnode.
150 */
151 csp = VTOS(VTOS(vp)->s_commonvp);
152 if (!(csp->s_flag & SDIPSET)) {
153 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
154 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
155 if (error == 0)
156 error = devfs_lookupname(pathname, NULLVPP, &dvp);
157 VN_RELE(vp);
158 kmem_free(pathname, MAXPATHLEN);
159 if (error != 0)
160 return (ENXIO);
161 vp = dvp; /* use the devfs vp */
162 }
163
164 /* device is configured at this point */
165 maj = getmajor(vp->v_rdev);
166 if (!STREAMSTAB(maj)) {
167 VN_RELE(vp);
168 return (ENOSTR);
169 }
170
171 *vpp = vp;
172 return (0);
173}
174
175/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700176 * Update the accessed, updated, or changed times in an sonode
177 * with the current time.
178 *
179 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
180 * attributes in a fstat call. (They return the current time and 0 for
181 * all timestamps, respectively.) We maintain the current timestamps
182 * here primarily so that should sockmod be popped the resulting
183 * file descriptor will behave like a stream w.r.t. the timestamps.
184 */
185void
186so_update_attrs(struct sonode *so, int flag)
187{
188 time_t now = gethrestime_sec();
189
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800190 if (SOCK_IS_NONSTR(so))
191 return;
192
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700193 mutex_enter(&so->so_lock);
194 so->so_flag |= flag;
195 if (flag & SOACC)
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800196 SOTOTPI(so)->sti_atime = now;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700197 if (flag & SOMOD)
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800198 SOTOTPI(so)->sti_mtime = now;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700199 mutex_exit(&so->so_lock);
200}
201
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800202extern so_create_func_t sock_comm_create_function;
203extern so_destroy_func_t sock_comm_destroy_function;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700204/*
205 * Init function called when sockfs is loaded.
206 */
207int
208sockinit(int fstype, char *name)
209{
210 static const fs_operation_def_t sock_vfsops_template[] = {
211 NULL, NULL
212 };
213 int error;
214 major_t dev;
215 char *err_str;
216
217 error = vfs_setfsops(fstype, sock_vfsops_template, NULL);
218 if (error != 0) {
rshoaib2caf0dc2006-03-05 18:00:39 -0800219 zcmn_err(GLOBAL_ZONEID, CE_WARN,
220 "sockinit: bad vfs ops template");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700221 return (error);
222 }
223
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800224 error = vn_make_ops(name, socket_vnodeops_template,
225 &socket_vnodeops);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700226 if (error != 0) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800227 err_str = "sockinit: bad socket vnode ops template";
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700228 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800229 socket_vnodeops = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700230 goto failure;
231 }
232
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800233 socket_cache = kmem_cache_create("socket_cache",
234 sizeof (struct sonode), 0, sonode_constructor,
235 sonode_destructor, NULL, NULL, NULL, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700236
Anders Persson3e95bd42010-06-17 17:22:09 -0700237 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL);
238
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800239 error = socktpi_init();
nh14500274e20cf2007-01-15 11:53:51 -0800240 if (error != 0) {
241 err_str = NULL;
242 goto failure;
243 }
244
Anders Perssonbbc000e2009-04-28 12:10:59 -0700245 error = sod_init();
brutus17169042008-05-23 20:14:10 -0700246 if (error != 0) {
247 err_str = NULL;
248 goto failure;
249 }
250
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700251 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800252 * Set up the default create and destroy functions
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700253 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800254 sock_comm_create_function = socket_sonode_create;
255 sock_comm_destroy_function = socket_sonode_destroy;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700256
257 /*
258 * Build initial list mapping socket parameters to vnode.
259 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800260 smod_init();
261 smod_add(sotpi_smod_create());
262
263 sockparams_init();
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700264
265 /*
266 * If sockets are needed before init runs /sbin/soconfig
267 * it is possible to preload the sockparams list here using
268 * calls like:
269 * sockconfig(1,2,3, "/dev/tcp", 0);
270 */
271
272 /*
273 * Create a unique dev_t for use in so_fsid.
274 */
275
276 if ((dev = getudev()) == (major_t)-1)
277 dev = 0;
278 sockdev = makedevice(dev, 0);
279
280 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700281 sendfile_init();
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700282
Anders Persson3e95bd42010-06-17 17:22:09 -0700283 /* Initialize socket filters */
284 sof_init();
285
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700286 return (0);
287
288failure:
289 (void) vfs_freevfsops_by_type(fstype);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800290 if (socket_vnodeops != NULL)
291 vn_freevnodeops(socket_vnodeops);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700292 if (err_str != NULL)
rshoaib2caf0dc2006-03-05 18:00:39 -0800293 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700294 return (error);
295}
296
297/*
298 * Caller must hold the mutex. Used to set SOLOCKED.
299 */
300void
301so_lock_single(struct sonode *so)
302{
303 ASSERT(MUTEX_HELD(&so->so_lock));
304
305 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
Anders Persson6a571a22010-01-15 20:20:54 -0800306 cv_wait_stop(&so->so_single_cv, &so->so_lock,
gwwd3e55dc2007-12-27 11:07:48 -0800307 SO_LOCK_WAKEUP_TIME);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700308 }
309 so->so_flag |= SOLOCKED;
310}
311
312/*
313 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
314 * Used to clear SOLOCKED or SOASYNC_UNBIND.
315 */
316void
317so_unlock_single(struct sonode *so, int flag)
318{
319 ASSERT(MUTEX_HELD(&so->so_lock));
320 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
321 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
322 ASSERT(so->so_flag & flag);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700323 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800324 * Process the T_DISCON_IND on sti_discon_ind_mp.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700325 *
326 * Call to so_drain_discon_ind will result in so_lock
327 * being dropped and re-acquired later.
328 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800329 if (!SOCK_IS_NONSTR(so)) {
330 sotpi_info_t *sti = SOTOTPI(so);
331
332 if (sti->sti_discon_ind_mp != NULL)
333 so_drain_discon_ind(so);
334 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700335
Anders Persson6a571a22010-01-15 20:20:54 -0800336 cv_signal(&so->so_single_cv);
337 so->so_flag &= ~flag;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700338}
339
340/*
341 * Caller must hold the mutex. Used to set SOREADLOCKED.
342 * If the caller wants nonblocking behavior it should set fmode.
343 */
344int
345so_lock_read(struct sonode *so, int fmode)
346{
347 ASSERT(MUTEX_HELD(&so->so_lock));
348
349 while (so->so_flag & SOREADLOCKED) {
350 if (fmode & (FNDELAY|FNONBLOCK))
351 return (EWOULDBLOCK);
Anders Persson6a571a22010-01-15 20:20:54 -0800352 cv_wait_stop(&so->so_read_cv, &so->so_lock,
gwwd3e55dc2007-12-27 11:07:48 -0800353 SO_LOCK_WAKEUP_TIME);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700354 }
355 so->so_flag |= SOREADLOCKED;
356 return (0);
357}
358
359/*
360 * Like so_lock_read above but allows signals.
361 */
362int
363so_lock_read_intr(struct sonode *so, int fmode)
364{
365 ASSERT(MUTEX_HELD(&so->so_lock));
366
367 while (so->so_flag & SOREADLOCKED) {
368 if (fmode & (FNDELAY|FNONBLOCK))
369 return (EWOULDBLOCK);
Anders Persson6a571a22010-01-15 20:20:54 -0800370 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock))
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700371 return (EINTR);
372 }
373 so->so_flag |= SOREADLOCKED;
374 return (0);
375}
376
377/*
378 * Caller must hold the mutex. Used to clear SOREADLOCKED,
379 * set in so_lock_read() or so_lock_read_intr().
380 */
381void
382so_unlock_read(struct sonode *so)
383{
384 ASSERT(MUTEX_HELD(&so->so_lock));
385 ASSERT(so->so_flag & SOREADLOCKED);
386
Anders Persson6a571a22010-01-15 20:20:54 -0800387 cv_signal(&so->so_read_cv);
388 so->so_flag &= ~SOREADLOCKED;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700389}
390
391/*
392 * Verify that the specified offset falls within the mblk and
393 * that the resulting pointer is aligned.
394 * Returns NULL if not.
395 */
396void *
397sogetoff(mblk_t *mp, t_uscalar_t offset,
398 t_uscalar_t length, uint_t align_size)
399{
400 uintptr_t ptr1, ptr2;
401
402 ASSERT(mp && mp->b_wptr >= mp->b_rptr);
403 ptr1 = (uintptr_t)mp->b_rptr + offset;
404 ptr2 = (uintptr_t)ptr1 + length;
405 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
406 eprintline(0);
407 return (NULL);
408 }
409 if ((ptr1 & (align_size - 1)) != 0) {
410 eprintline(0);
411 return (NULL);
412 }
413 return ((void *)ptr1);
414}
415
416/*
417 * Return the AF_UNIX underlying filesystem vnode matching a given name.
418 * Makes sure the sending and the destination sonodes are compatible.
419 * The vnode is returned held.
420 *
421 * The underlying filesystem VSOCK vnode has a v_stream pointer that
422 * references the actual stream head (hence indirectly the actual sonode).
423 */
424static int
425so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
Gordon Rossf012ee02016-11-17 22:13:10 -0500426 vnode_t **vpp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700427{
428 vnode_t *vp; /* Underlying filesystem vnode */
Ric Aleshire9bf93552008-08-26 16:37:19 -0700429 vnode_t *rvp; /* real vnode */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700430 vnode_t *svp; /* sockfs vnode */
431 struct sonode *so2;
432 int error;
433
rh871073eceedb2008-07-31 15:42:10 -0700434 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
435 soun->sun_path));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700436
437 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
438 if (error) {
439 eprintsoline(so, error);
440 return (error);
441 }
Ric Aleshire9bf93552008-08-26 16:37:19 -0700442
443 /*
444 * Traverse lofs mounts get the real vnode
445 */
446 if (VOP_REALVP(vp, &rvp, NULL) == 0) {
447 VN_HOLD(rvp); /* hold the real vnode */
448 VN_RELE(vp); /* release hold from lookup */
449 vp = rvp;
450 }
451
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700452 if (vp->v_type != VSOCK) {
453 error = ENOTSOCK;
454 eprintsoline(so, error);
455 goto done2;
456 }
457
458 if (checkaccess) {
459 /*
460 * Check that we have permissions to access the destination
461 * vnode. This check is not done in BSD but it is required
462 * by X/Open.
463 */
amwda6c28a2007-10-25 16:34:29 -0700464 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700465 eprintsoline(so, error);
466 goto done2;
467 }
468 }
469
470 /*
471 * Check if the remote socket has been closed.
472 *
473 * Synchronize with vn_rele_stream by holding v_lock while traversing
474 * v_stream->sd_vnode.
475 */
476 mutex_enter(&vp->v_lock);
477 if (vp->v_stream == NULL) {
478 mutex_exit(&vp->v_lock);
479 if (so->so_type == SOCK_DGRAM)
480 error = EDESTADDRREQ;
481 else
482 error = ECONNREFUSED;
483
484 eprintsoline(so, error);
485 goto done2;
486 }
487 ASSERT(vp->v_stream->sd_vnode);
488 svp = vp->v_stream->sd_vnode;
489 /*
490 * holding v_lock on underlying filesystem vnode and acquiring
491 * it on sockfs vnode. Assumes that no code ever attempts to
492 * acquire these locks in the reverse order.
493 */
494 VN_HOLD(svp);
495 mutex_exit(&vp->v_lock);
496
497 if (svp->v_type != VSOCK) {
498 error = ENOTSOCK;
499 eprintsoline(so, error);
500 goto done;
501 }
502
503 so2 = VTOSO(svp);
504
505 if (so->so_type != so2->so_type) {
506 error = EPROTOTYPE;
507 eprintsoline(so, error);
508 goto done;
509 }
510
511 VN_RELE(svp);
512 *vpp = vp;
513 return (0);
514
515done:
516 VN_RELE(svp);
517done2:
518 VN_RELE(vp);
519 return (error);
520}
521
522/*
523 * Verify peer address for connect and sendto/sendmsg.
524 * Since sendto/sendmsg would not get synchronous errors from the transport
525 * provider we have to do these ugly checks in the socket layer to
526 * preserve compatibility with SunOS 4.X.
527 */
528int
529so_addr_verify(struct sonode *so, const struct sockaddr *name,
530 socklen_t namelen)
531{
532 int family;
533
rh87107903a11e2008-07-31 15:02:18 -0700534 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
535 (void *)so, (void *)name, namelen));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700536
537 ASSERT(name != NULL);
538
539 family = so->so_family;
540 switch (family) {
541 case AF_INET:
542 if (name->sa_family != family) {
543 eprintsoline(so, EAFNOSUPPORT);
544 return (EAFNOSUPPORT);
545 }
546 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
547 eprintsoline(so, EINVAL);
548 return (EINVAL);
549 }
550 break;
551 case AF_INET6: {
552#ifdef DEBUG
553 struct sockaddr_in6 *sin6;
554#endif /* DEBUG */
555
556 if (name->sa_family != family) {
557 eprintsoline(so, EAFNOSUPPORT);
558 return (EAFNOSUPPORT);
559 }
560 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
561 eprintsoline(so, EINVAL);
562 return (EINVAL);
563 }
564#ifdef DEBUG
565 /* Verify that apps don't forget to clear sin6_scope_id etc */
566 sin6 = (struct sockaddr_in6 *)name;
567 if (sin6->sin6_scope_id != 0 &&
568 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
rshoaib2caf0dc2006-03-05 18:00:39 -0800569 zcmn_err(getzoneid(), CE_WARN,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700570 "connect/send* with uninitialized sin6_scope_id "
571 "(%d) on socket. Pid = %d\n",
572 (int)sin6->sin6_scope_id, (int)curproc->p_pid);
573 }
574#endif /* DEBUG */
575 break;
576 }
577 case AF_UNIX:
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800578 if (SOTOTPI(so)->sti_faddr_noxlate) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700579 return (0);
580 }
581 if (namelen < (socklen_t)sizeof (short)) {
582 eprintsoline(so, ENOENT);
583 return (ENOENT);
584 }
585 if (name->sa_family != family) {
586 eprintsoline(so, EAFNOSUPPORT);
587 return (EAFNOSUPPORT);
588 }
589 /* MAXPATHLEN + soun_family + nul termination */
590 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
591 eprintsoline(so, ENAMETOOLONG);
592 return (ENAMETOOLONG);
593 }
594
595 break;
596
597 default:
598 /*
599 * Default is don't do any length or sa_family check
600 * to allow non-sockaddr style addresses.
601 */
602 break;
603 }
604
605 return (0);
606}
607
608
609/*
610 * Translate an AF_UNIX sockaddr_un to the transport internal name.
Gordon Rossf012ee02016-11-17 22:13:10 -0500611 * Assumes caller has called so_addr_verify first. The translated
612 * (internal form) address is stored in sti->sti_ux_taddr.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700613 */
614/*ARGSUSED*/
615int
616so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
617 socklen_t namelen, int checkaccess,
618 void **addrp, socklen_t *addrlenp)
619{
620 int error;
621 struct sockaddr_un *soun;
622 vnode_t *vp;
623 void *addr;
624 socklen_t addrlen;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800625 sotpi_info_t *sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700626
627 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
rh87107903a11e2008-07-31 15:02:18 -0700628 (void *)so, (void *)name, namelen, checkaccess));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700629
630 ASSERT(name != NULL);
631 ASSERT(so->so_family == AF_UNIX);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800632 ASSERT(!sti->sti_faddr_noxlate);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700633 ASSERT(namelen >= (socklen_t)sizeof (short));
634 ASSERT(name->sa_family == AF_UNIX);
635 soun = (struct sockaddr_un *)name;
636 /*
637 * Lookup vnode for the specified path name and verify that
638 * it is a socket.
639 */
640 error = so_ux_lookup(so, soun, checkaccess, &vp);
641 if (error) {
642 eprintsoline(so, error);
643 return (error);
644 }
645 /*
646 * Use the address of the peer vnode as the address to send
647 * to. We release the peer vnode here. In case it has been
Jerry Jelinekd28d4712015-11-23 16:00:48 +0000648 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700649 * transport the message will get an error or be dropped.
Gordon Rossf012ee02016-11-17 22:13:10 -0500650 * Note that that soua_vp is never dereferenced; it's just a
651 * convenient value by which we can identify the peer.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700652 */
Gordon Rossf012ee02016-11-17 22:13:10 -0500653 sti->sti_ux_taddr.soua_vp = vp;
654 sti->sti_ux_taddr.soua_magic = SOU_MAGIC_EXPLICIT;
655 addr = &sti->sti_ux_taddr;
656 addrlen = (socklen_t)sizeof (sti->sti_ux_taddr);
rh87107903a11e2008-07-31 15:02:18 -0700657 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
658 addrlen, (void *)vp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700659 VN_RELE(vp);
660 *addrp = addr;
661 *addrlenp = (socklen_t)addrlen;
662 return (0);
663}
664
665/*
666 * Esballoc free function for messages that contain SO_FILEP option.
667 * Decrement the reference count on the file pointers using closef.
668 */
669void
670fdbuf_free(struct fdbuf *fdbuf)
671{
672 int i;
673 struct file *fp;
674
675 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
676 for (i = 0; i < fdbuf->fd_numfd; i++) {
677 /*
678 * We need pointer size alignment for fd_fds. On a LP64
679 * kernel, the required alignment is 8 bytes while
680 * the option headers and values are only 4 bytes
681 * aligned. So its safer to do a bcopy compared to
682 * assigning fdbuf->fd_fds[i] to fp.
683 */
684 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
rh87107903a11e2008-07-31 15:02:18 -0700685 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700686 (void) closef(fp);
687 }
688 if (fdbuf->fd_ebuf != NULL)
689 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
690 kmem_free(fdbuf, fdbuf->fd_size);
691}
692
693/*
meembd118332005-08-30 13:07:43 -0700694 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
695 * Waits if memory is not available.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700696 */
697mblk_t *
698fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
699{
meembd118332005-08-30 13:07:43 -0700700 uchar_t *buf;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700701 mblk_t *mp;
702
703 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
704 buf = kmem_alloc(size, KM_SLEEP);
705 fdbuf->fd_ebuf = (caddr_t)buf;
706 fdbuf->fd_ebuflen = size;
707 fdbuf->fd_frtn.free_func = fdbuf_free;
708 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
709
meembd118332005-08-30 13:07:43 -0700710 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700711 mp->b_datap->db_type = M_PROTO;
712 return (mp);
713}
714
715/*
716 * Extract file descriptors from a fdbuf.
717 * Return list in rights/rightslen.
718 */
719/*ARGSUSED*/
720static int
721fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen)
722{
723 int i, fd;
724 int *rp;
725 struct file *fp;
726 int numfd;
727
728 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
gwwd3e55dc2007-12-27 11:07:48 -0800729 fdbuf->fd_numfd, rightslen));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700730
731 numfd = fdbuf->fd_numfd;
732 ASSERT(rightslen == numfd * (int)sizeof (int));
733
734 /*
735 * Allocate a file descriptor and increment the f_count.
736 * The latter is needed since we always call fdbuf_free
737 * which performs a closef.
738 */
739 rp = (int *)rights;
740 for (i = 0; i < numfd; i++) {
741 if ((fd = ufalloc(0)) == -1)
742 goto cleanup;
743 /*
744 * We need pointer size alignment for fd_fds. On a LP64
745 * kernel, the required alignment is 8 bytes while
746 * the option headers and values are only 4 bytes
747 * aligned. So its safer to do a bcopy compared to
748 * assigning fdbuf->fd_fds[i] to fp.
749 */
750 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
751 mutex_enter(&fp->f_tlock);
752 fp->f_count++;
753 mutex_exit(&fp->f_tlock);
754 setf(fd, fp);
755 *rp++ = fd;
Marek Pospisil005d3fe2010-03-05 13:16:08 -0800756 if (AU_AUDITING())
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700757 audit_fdrecv(fd, fp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700758 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
rh87107903a11e2008-07-31 15:02:18 -0700759 i, fd, (void *)fp, fp->f_count));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700760 }
761 return (0);
762
763cleanup:
764 /*
765 * Undo whatever partial work the loop above has done.
766 */
767 {
768 int j;
769
770 rp = (int *)rights;
771 for (j = 0; j < i; j++) {
772 dprint(0,
773 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
774 (void) closeandsetf(*rp++, NULL);
775 }
776 }
777
778 return (EMFILE);
779}
780
781/*
782 * Insert file descriptors into an fdbuf.
783 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
784 * by calling fdbuf_free().
785 */
786int
787fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
788{
789 int numfd, i;
790 int *fds;
791 struct file *fp;
792 struct fdbuf *fdbuf;
793 int fdbufsize;
794
795 dprint(1, ("fdbuf_create: len %d\n", rightslen));
796
797 numfd = rightslen / (int)sizeof (int);
798
799 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
800 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
801 fdbuf->fd_size = fdbufsize;
802 fdbuf->fd_numfd = 0;
803 fdbuf->fd_ebuf = NULL;
804 fdbuf->fd_ebuflen = 0;
805 fds = (int *)rights;
806 for (i = 0; i < numfd; i++) {
807 if ((fp = getf(fds[i])) == NULL) {
808 fdbuf_free(fdbuf);
809 return (EBADF);
810 }
811 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
rh87107903a11e2008-07-31 15:02:18 -0700812 i, fds[i], (void *)fp, fp->f_count));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700813 mutex_enter(&fp->f_tlock);
814 fp->f_count++;
815 mutex_exit(&fp->f_tlock);
816 /*
817 * The maximum alignment for fdbuf (or any option header
818 * and its value) it 4 bytes. On a LP64 kernel, the alignment
819 * is not sufficient for pointers (fd_fds in this case). Since
820 * we just did a kmem_alloc (we get a double word alignment),
821 * we don't need to do anything on the send side (we loose
822 * the double word alignment because fdbuf goes after an
823 * option header (eg T_unitdata_req) which is only 4 byte
824 * aligned). We take care of this when we extract the file
825 * descriptor in fdbuf_extract or fdbuf_free.
826 */
827 fdbuf->fd_fds[i] = fp;
828 fdbuf->fd_numfd++;
829 releasef(fds[i]);
Marek Pospisil005d3fe2010-03-05 13:16:08 -0800830 if (AU_AUDITING())
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700831 audit_fdsend(fds[i], fp, 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700832 }
833 *fdbufp = fdbuf;
834 return (0);
835}
836
837static int
838fdbuf_optlen(int rightslen)
839{
840 int numfd;
841
842 numfd = rightslen / (int)sizeof (int);
843
844 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
845}
846
847static t_uscalar_t
848fdbuf_cmsglen(int fdbuflen)
849{
850 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
851 (int)sizeof (struct file *) * (int)sizeof (int));
852}
853
854
855/*
856 * Return non-zero if the mblk and fdbuf are consistent.
857 */
858static int
859fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
860{
861 if (fdbuflen >= FDBUF_HDRSIZE &&
862 fdbuflen == fdbuf->fd_size) {
863 frtn_t *frp = mp->b_datap->db_frtnp;
864 /*
865 * Check that the SO_FILEP portion of the
866 * message has not been modified by
867 * the loopback transport. The sending sockfs generates
868 * a message that is esballoc'ed with the free function
869 * being fdbuf_free() and where free_arg contains the
870 * identical information as the SO_FILEP content.
871 *
872 * If any of these constraints are not satisfied we
873 * silently ignore the option.
874 */
875 ASSERT(mp);
876 if (frp != NULL &&
877 frp->free_func == fdbuf_free &&
878 frp->free_arg != NULL &&
879 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
880 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
rh87107903a11e2008-07-31 15:02:18 -0700881 (void *)fdbuf, fdbuflen));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700882 return (1);
883 } else {
rshoaib2caf0dc2006-03-05 18:00:39 -0800884 zcmn_err(getzoneid(), CE_WARN,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700885 "sockfs: mismatched fdbuf content (%p)",
886 (void *)mp);
887 return (0);
888 }
889 } else {
rshoaib2caf0dc2006-03-05 18:00:39 -0800890 zcmn_err(getzoneid(), CE_WARN,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700891 "sockfs: mismatched fdbuf len %d, %d\n",
892 fdbuflen, fdbuf->fd_size);
893 return (0);
894 }
895}
896
897/*
898 * When the file descriptors returned by sorecvmsg can not be passed
899 * to the application this routine will cleanup the references on
900 * the files. Start at startoff bytes into the buffer.
901 */
902static void
903close_fds(void *fdbuf, int fdbuflen, int startoff)
904{
905 int *fds = (int *)fdbuf;
906 int numfd = fdbuflen / (int)sizeof (int);
907 int i;
908
909 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
910
911 for (i = 0; i < numfd; i++) {
912 if (startoff < 0)
913 startoff = 0;
914 if (startoff < (int)sizeof (int)) {
915 /*
916 * This file descriptor is partially or fully after
917 * the offset
918 */
919 dprint(0,
920 ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
921 (void) closeandsetf(fds[i], NULL);
922 }
923 startoff -= (int)sizeof (int);
924 }
925}
926
927/*
928 * Close all file descriptors contained in the control part starting at
929 * the startoffset.
930 */
931void
932so_closefds(void *control, t_uscalar_t controllen, int oldflg,
933 int startoff)
934{
935 struct cmsghdr *cmsg;
936
937 if (control == NULL)
938 return;
939
940 if (oldflg) {
941 close_fds(control, controllen, startoff);
942 return;
943 }
944 /* Scan control part for file descriptors. */
945 for (cmsg = (struct cmsghdr *)control;
946 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
947 cmsg = CMSG_NEXT(cmsg)) {
948 if (cmsg->cmsg_level == SOL_SOCKET &&
949 cmsg->cmsg_type == SCM_RIGHTS) {
950 close_fds(CMSG_CONTENT(cmsg),
951 (int)CMSG_CONTENTLEN(cmsg),
952 startoff - (int)sizeof (struct cmsghdr));
953 }
Andy Fiddamand865fc92020-06-05 14:22:45 +0000954 startoff -= ROUNDUP_cmsglen(cmsg->cmsg_len);
955 }
956}
957
958/*
959 * Handle truncation of a cmsg when the receive buffer is not big enough.
960 * Adjust the cmsg_len header field in the last cmsg that will be included in
961 * the buffer to reflect the number of bytes included.
962 */
963void
964so_truncatecmsg(void *control, t_uscalar_t controllen, uint_t maxlen)
965{
966 struct cmsghdr *cmsg;
967 uint_t len = 0;
968
969 if (control == NULL)
970 return;
971
972 for (cmsg = control;
973 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
974 cmsg = CMSG_NEXT(cmsg)) {
975
976 len += ROUNDUP_cmsglen(cmsg->cmsg_len);
977
978 if (len > maxlen) {
979 /*
980 * This cmsg is the last one that will be included in
981 * the truncated buffer.
982 */
983 socklen_t diff = len - maxlen;
984
985 if (diff < CMSG_CONTENTLEN(cmsg)) {
986 dprint(1, ("so_truncatecmsg: %d -> %d\n",
987 cmsg->cmsg_len, cmsg->cmsg_len - diff));
988 cmsg->cmsg_len -= diff;
989 } else {
990 cmsg->cmsg_len = sizeof (struct cmsghdr);
991 }
992 break;
993 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700994 }
995}
996
997/*
998 * Returns a pointer/length for the file descriptors contained
999 * in the control buffer. Returns with *fdlenp == -1 if there are no
1000 * file descriptor options present. This is different than there being
1001 * a zero-length file descriptor option.
1002 * Fail if there are multiple SCM_RIGHT cmsgs.
1003 */
1004int
1005so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
1006 void **fdsp, int *fdlenp)
1007{
1008 struct cmsghdr *cmsg;
1009 void *fds;
1010 int fdlen;
1011
1012 if (control == NULL) {
1013 *fdsp = NULL;
1014 *fdlenp = -1;
1015 return (0);
1016 }
1017
1018 if (oldflg) {
1019 *fdsp = control;
1020 if (controllen == 0)
1021 *fdlenp = -1;
1022 else
1023 *fdlenp = controllen;
1024 dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
1025 return (0);
1026 }
1027
1028 fds = NULL;
1029 fdlen = 0;
1030
1031 for (cmsg = (struct cmsghdr *)control;
1032 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1033 cmsg = CMSG_NEXT(cmsg)) {
1034 if (cmsg->cmsg_level == SOL_SOCKET &&
1035 cmsg->cmsg_type == SCM_RIGHTS) {
1036 if (fds != NULL)
1037 return (EINVAL);
1038 fds = CMSG_CONTENT(cmsg);
1039 fdlen = (int)CMSG_CONTENTLEN(cmsg);
krgopi1e0267d2005-08-23 19:50:49 -07001040 dprint(1, ("so_getfdopt: new %lu\n",
gwwd3e55dc2007-12-27 11:07:48 -08001041 (size_t)CMSG_CONTENTLEN(cmsg)));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001042 }
1043 }
1044 if (fds == NULL) {
1045 dprint(1, ("so_getfdopt: NONE\n"));
1046 *fdlenp = -1;
1047 } else
1048 *fdlenp = fdlen;
1049 *fdsp = fds;
1050 return (0);
1051}
1052
1053/*
1054 * Return the length of the options including any file descriptor options.
1055 */
1056t_uscalar_t
1057so_optlen(void *control, t_uscalar_t controllen, int oldflg)
1058{
1059 struct cmsghdr *cmsg;
1060 t_uscalar_t optlen = 0;
1061 t_uscalar_t len;
1062
1063 if (control == NULL)
1064 return (0);
1065
1066 if (oldflg)
1067 return ((t_uscalar_t)(sizeof (struct T_opthdr) +
1068 fdbuf_optlen(controllen)));
1069
1070 for (cmsg = (struct cmsghdr *)control;
1071 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1072 cmsg = CMSG_NEXT(cmsg)) {
1073 if (cmsg->cmsg_level == SOL_SOCKET &&
1074 cmsg->cmsg_type == SCM_RIGHTS) {
1075 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
1076 } else {
1077 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1078 }
1079 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
1080 sizeof (struct T_opthdr));
1081 }
1082 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
gwwd3e55dc2007-12-27 11:07:48 -08001083 controllen, oldflg, optlen));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001084 return (optlen);
1085}
1086
1087/*
1088 * Copy options from control to the mblk. Skip any file descriptor options.
1089 */
1090void
1091so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
1092{
1093 struct T_opthdr toh;
1094 struct cmsghdr *cmsg;
1095
1096 if (control == NULL)
1097 return;
1098
1099 if (oldflg) {
1100 /* No real options - caller has handled file descriptors */
1101 return;
1102 }
1103 for (cmsg = (struct cmsghdr *)control;
1104 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1105 cmsg = CMSG_NEXT(cmsg)) {
1106 /*
1107 * Note: The caller handles file descriptors prior
1108 * to calling this function.
1109 */
1110 t_uscalar_t len;
1111
1112 if (cmsg->cmsg_level == SOL_SOCKET &&
1113 cmsg->cmsg_type == SCM_RIGHTS)
1114 continue;
1115
1116 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1117 toh.level = cmsg->cmsg_level;
1118 toh.name = cmsg->cmsg_type;
1119 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
1120 toh.status = 0;
1121
1122 soappendmsg(mp, &toh, sizeof (toh));
1123 soappendmsg(mp, CMSG_CONTENT(cmsg), len);
1124 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
1125 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1126 }
1127}
1128
1129/*
1130 * Return the length of the control message derived from the options.
1131 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1132 * When oldflg is set only include SO_FILEP.
gt1456700d204002006-06-26 11:20:08 -07001133 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1134 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1135 * also be checked for any possible impacts.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001136 */
1137t_uscalar_t
1138so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
1139{
1140 t_uscalar_t cmsglen = 0;
1141 struct T_opthdr *tohp;
1142 t_uscalar_t len;
1143 t_uscalar_t last_roundup = 0;
1144
1145 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1146
1147 for (tohp = (struct T_opthdr *)opt;
1148 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1149 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1150 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
gwwd3e55dc2007-12-27 11:07:48 -08001151 tohp->level, tohp->name, tohp->len));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001152 if (tohp->level == SOL_SOCKET &&
1153 (tohp->name == SO_SRCADDR ||
1154 tohp->name == SO_UNIX_CLOSE)) {
1155 continue;
1156 }
1157 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1158 struct fdbuf *fdbuf;
1159 int fdbuflen;
1160
1161 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1162 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1163
1164 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1165 continue;
1166 if (oldflg) {
1167 cmsglen += fdbuf_cmsglen(fdbuflen);
1168 continue;
1169 }
1170 len = fdbuf_cmsglen(fdbuflen);
gt1456700d204002006-06-26 11:20:08 -07001171 } else if (tohp->level == SOL_SOCKET &&
1172 tohp->name == SCM_TIMESTAMP) {
1173 if (oldflg)
1174 continue;
1175
1176 if (get_udatamodel() == DATAMODEL_NATIVE) {
1177 len = sizeof (struct timeval);
1178 } else {
1179 len = sizeof (struct timeval32);
1180 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001181 } else {
1182 if (oldflg)
1183 continue;
1184 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1185 }
1186 /*
gt1456700d204002006-06-26 11:20:08 -07001187 * Exclude roundup for last option to not set
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001188 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1189 */
1190 last_roundup = (t_uscalar_t)
1191 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
1192 (len + (int)sizeof (struct cmsghdr)));
1193 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
1194 last_roundup;
1195 }
1196 cmsglen -= last_roundup;
1197 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
gwwd3e55dc2007-12-27 11:07:48 -08001198 optlen, oldflg, cmsglen));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001199 return (cmsglen);
1200}
1201
1202/*
1203 * Copy options from options to the control. Convert SO_FILEP to
1204 * file descriptors.
1205 * Returns errno or zero.
gt1456700d204002006-06-26 11:20:08 -07001206 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1207 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1208 * also be checked for any possible impacts.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001209 */
1210int
1211so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg,
1212 void *control, t_uscalar_t controllen)
1213{
1214 struct T_opthdr *tohp;
1215 struct cmsghdr *cmsg;
1216 struct fdbuf *fdbuf;
1217 int fdbuflen;
1218 int error;
gt1456700d204002006-06-26 11:20:08 -07001219#if defined(DEBUG) || defined(__lint)
1220 struct cmsghdr *cend = (struct cmsghdr *)
1221 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
1222#endif
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001223 cmsg = (struct cmsghdr *)control;
1224
1225 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1226
1227 for (tohp = (struct T_opthdr *)opt;
1228 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1229 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1230 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
gwwd3e55dc2007-12-27 11:07:48 -08001231 tohp->level, tohp->name, tohp->len));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001232
1233 if (tohp->level == SOL_SOCKET &&
1234 (tohp->name == SO_SRCADDR ||
1235 tohp->name == SO_UNIX_CLOSE)) {
1236 continue;
1237 }
1238 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
1239 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1240 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1241 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1242
1243 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1244 return (EPROTO);
1245 if (oldflg) {
1246 error = fdbuf_extract(fdbuf, control,
1247 (int)controllen);
1248 if (error != 0)
1249 return (error);
1250 continue;
1251 } else {
1252 int fdlen;
1253
1254 fdlen = (int)fdbuf_cmsglen(
1255 (int)_TPI_TOPT_DATALEN(tohp));
1256
1257 cmsg->cmsg_level = tohp->level;
1258 cmsg->cmsg_type = SCM_RIGHTS;
1259 cmsg->cmsg_len = (socklen_t)(fdlen +
gwwd3e55dc2007-12-27 11:07:48 -08001260 sizeof (struct cmsghdr));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001261
1262 error = fdbuf_extract(fdbuf,
gwwd3e55dc2007-12-27 11:07:48 -08001263 CMSG_CONTENT(cmsg), fdlen);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001264 if (error != 0)
1265 return (error);
1266 }
gt145670e4f35db2006-03-23 19:03:11 -08001267 } else if (tohp->level == SOL_SOCKET &&
1268 tohp->name == SCM_TIMESTAMP) {
1269 timestruc_t *timestamp;
1270
1271 if (oldflg)
1272 continue;
1273
1274 cmsg->cmsg_level = tohp->level;
1275 cmsg->cmsg_type = tohp->name;
1276
1277 timestamp =
1278 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
1279 sizeof (intptr_t));
1280
1281 if (get_udatamodel() == DATAMODEL_NATIVE) {
gt1456700d204002006-06-26 11:20:08 -07001282 struct timeval tv;
gt145670e4f35db2006-03-23 19:03:11 -08001283
1284 cmsg->cmsg_len = sizeof (struct timeval) +
1285 sizeof (struct cmsghdr);
gt1456700d204002006-06-26 11:20:08 -07001286 tv.tv_sec = timestamp->tv_sec;
1287 tv.tv_usec = timestamp->tv_nsec /
1288 (NANOSEC / MICROSEC);
1289 /*
1290 * on LP64 systems, the struct timeval in
1291 * the destination will not be 8-byte aligned,
1292 * so use bcopy to avoid alignment trouble
1293 */
1294 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
gt145670e4f35db2006-03-23 19:03:11 -08001295 } else {
1296 struct timeval32 *time32;
1297
1298 cmsg->cmsg_len = sizeof (struct timeval32) +
1299 sizeof (struct cmsghdr);
1300 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
1301 time32->tv_sec = (time32_t)timestamp->tv_sec;
1302 time32->tv_usec =
1303 (int32_t)(timestamp->tv_nsec /
1304 (NANOSEC / MICROSEC));
1305 }
1306
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001307 } else {
1308 if (oldflg)
1309 continue;
1310
1311 cmsg->cmsg_level = tohp->level;
1312 cmsg->cmsg_type = tohp->name;
Andy Fiddaman221e47f2020-09-18 20:04:57 +00001313 cmsg->cmsg_len = (socklen_t)sizeof (struct cmsghdr);
1314 if (tohp->level == IPPROTO_IP &&
1315 (tohp->name == IP_RECVTOS ||
1316 tohp->name == IP_RECVTTL)) {
1317 /*
1318 * The data for these is a uint8_t but, in
1319 * order to maintain alignment for any
1320 * following TPI primitives in the message,
1321 * there will be some trailing padding bytes
1322 * which are included in the TPI_TOPT_DATALEN.
1323 * For these types, we set the cmsg_len
1324 * explicitly to the correct value.
1325 */
1326 cmsg->cmsg_len += (socklen_t)sizeof (uint8_t);
1327 } else {
1328 cmsg->cmsg_len +=
1329 (socklen_t)(_TPI_TOPT_DATALEN(tohp));
1330 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001331
1332 /* copy content to control data part */
1333 bcopy(&tohp[1], CMSG_CONTENT(cmsg),
gwwd3e55dc2007-12-27 11:07:48 -08001334 CMSG_CONTENTLEN(cmsg));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001335 }
1336 /* move to next CMSG structure! */
1337 cmsg = CMSG_NEXT(cmsg);
1338 }
gt1456700d204002006-06-26 11:20:08 -07001339 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
rh87107903a11e2008-07-31 15:02:18 -07001340 control, controllen, (void *)cend, (void *)cmsg));
gt1456700d204002006-06-26 11:20:08 -07001341 ASSERT(cmsg <= cend);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001342 return (0);
1343}
1344
1345/*
1346 * Extract the SO_SRCADDR option value if present.
1347 */
1348void
1349so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
1350 t_uscalar_t *srclenp)
1351{
1352 struct T_opthdr *tohp;
1353
1354 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1355
1356 ASSERT(srcp != NULL && srclenp != NULL);
1357 *srcp = NULL;
1358 *srclenp = 0;
1359
1360 for (tohp = (struct T_opthdr *)opt;
1361 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1362 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1363 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
gwwd3e55dc2007-12-27 11:07:48 -08001364 tohp->level, tohp->name, tohp->len));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001365 if (tohp->level == SOL_SOCKET &&
1366 tohp->name == SO_SRCADDR) {
1367 *srcp = _TPI_TOPT_DATA(tohp);
1368 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1369 }
1370 }
1371}
1372
1373/*
1374 * Verify if the SO_UNIX_CLOSE option is present.
1375 */
1376int
1377so_getopt_unix_close(void *opt, t_uscalar_t optlen)
1378{
1379 struct T_opthdr *tohp;
1380
1381 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1382
1383 for (tohp = (struct T_opthdr *)opt;
1384 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1385 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1386 dprint(1,
gwwd3e55dc2007-12-27 11:07:48 -08001387 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1388 tohp->level, tohp->name, tohp->len));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001389 if (tohp->level == SOL_SOCKET &&
1390 tohp->name == SO_UNIX_CLOSE)
1391 return (1);
1392 }
1393 return (0);
1394}
1395
1396/*
1397 * Allocate an M_PROTO message.
1398 *
1399 * If allocation fails the behavior depends on sleepflg:
1400 * _ALLOC_NOSLEEP fail immediately
1401 * _ALLOC_INTR sleep for memory until a signal is caught
1402 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1403 */
1404mblk_t *
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001405soallocproto(size_t size, int sleepflg, cred_t *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001406{
1407 mblk_t *mp;
1408
1409 /* Round up size for reuse */
1410 size = MAX(size, 64);
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001411 if (cr != NULL)
1412 mp = allocb_cred(size, cr, curproc->p_pid);
1413 else
1414 mp = allocb(size, BPRI_MED);
1415
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001416 if (mp == NULL) {
1417 int error; /* Dummy - error not returned to caller */
1418
1419 switch (sleepflg) {
1420 case _ALLOC_SLEEP:
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001421 if (cr != NULL) {
1422 mp = allocb_cred_wait(size, STR_NOSIG, &error,
1423 cr, curproc->p_pid);
1424 } else {
1425 mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
1426 &error);
1427 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001428 ASSERT(mp);
1429 break;
1430 case _ALLOC_INTR:
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001431 if (cr != NULL) {
1432 mp = allocb_cred_wait(size, 0, &error, cr,
1433 curproc->p_pid);
1434 } else {
1435 mp = allocb_wait(size, BPRI_MED, 0, &error);
1436 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001437 if (mp == NULL) {
1438 /* Caught signal while sleeping for memory */
1439 eprintline(ENOBUFS);
1440 return (NULL);
1441 }
1442 break;
1443 case _ALLOC_NOSLEEP:
1444 default:
1445 eprintline(ENOBUFS);
1446 return (NULL);
1447 }
1448 }
1449 DB_TYPE(mp) = M_PROTO;
1450 return (mp);
1451}
1452
1453/*
1454 * Allocate an M_PROTO message with a single component.
1455 * len is the length of buf. size is the amount to allocate.
1456 *
1457 * buf can be NULL with a non-zero len.
1458 * This results in a bzero'ed chunk being placed the message.
1459 */
1460mblk_t *
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001461soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
1462 cred_t *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001463{
1464 mblk_t *mp;
1465
1466 if (size == 0)
1467 size = len;
1468
1469 ASSERT(size >= len);
1470 /* Round up size for reuse */
1471 size = MAX(size, 64);
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001472 mp = soallocproto(size, sleepflg, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001473 if (mp == NULL)
1474 return (NULL);
1475 mp->b_datap->db_type = M_PROTO;
1476 if (len != 0) {
1477 if (buf != NULL)
1478 bcopy(buf, mp->b_wptr, len);
1479 else
1480 bzero(mp->b_wptr, len);
1481 mp->b_wptr += len;
1482 }
1483 return (mp);
1484}
1485
1486/*
1487 * Append buf/len to mp.
1488 * The caller has to ensure that there is enough room in the mblk.
1489 *
1490 * buf can be NULL with a non-zero len.
1491 * This results in a bzero'ed chunk being placed the message.
1492 */
1493void
1494soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
1495{
1496 ASSERT(mp);
1497
1498 if (len != 0) {
1499 /* Assert for room left */
1500 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
1501 if (buf != NULL)
1502 bcopy(buf, mp->b_wptr, len);
1503 else
1504 bzero(mp->b_wptr, len);
1505 }
1506 mp->b_wptr += len;
1507}
1508
1509/*
1510 * Create a message using two kernel buffers.
1511 * If size is set that will determine the allocation size (e.g. for future
1512 * soappendmsg calls). If size is zero it is derived from the buffer
1513 * lengths.
1514 */
1515mblk_t *
1516soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001517 ssize_t size, int sleepflg, cred_t *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001518{
1519 mblk_t *mp;
1520
1521 if (size == 0)
1522 size = len1 + len2;
1523 ASSERT(size >= len1 + len2);
1524
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001525 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001526 if (mp)
1527 soappendmsg(mp, buf2, len2);
1528 return (mp);
1529}
1530
1531/*
1532 * Create a message using three kernel buffers.
1533 * If size is set that will determine the allocation size (for future
1534 * soappendmsg calls). If size is zero it is derived from the buffer
1535 * lengths.
1536 */
1537mblk_t *
1538soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001539 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001540{
1541 mblk_t *mp;
1542
1543 if (size == 0)
1544 size = len1 + len2 +len3;
1545 ASSERT(size >= len1 + len2 + len3);
1546
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08001547 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001548 if (mp != NULL) {
1549 soappendmsg(mp, buf2, len2);
1550 soappendmsg(mp, buf3, len3);
1551 }
1552 return (mp);
1553}
1554
1555#ifdef DEBUG
1556char *
1557pr_state(uint_t state, uint_t mode)
1558{
1559 static char buf[1024];
1560
1561 buf[0] = 0;
1562 if (state & SS_ISCONNECTED)
rh87107903a11e2008-07-31 15:02:18 -07001563 (void) strcat(buf, "ISCONNECTED ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001564 if (state & SS_ISCONNECTING)
rh87107903a11e2008-07-31 15:02:18 -07001565 (void) strcat(buf, "ISCONNECTING ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001566 if (state & SS_ISDISCONNECTING)
rh87107903a11e2008-07-31 15:02:18 -07001567 (void) strcat(buf, "ISDISCONNECTING ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001568 if (state & SS_CANTSENDMORE)
rh87107903a11e2008-07-31 15:02:18 -07001569 (void) strcat(buf, "CANTSENDMORE ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001570
1571 if (state & SS_CANTRCVMORE)
rh87107903a11e2008-07-31 15:02:18 -07001572 (void) strcat(buf, "CANTRCVMORE ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001573 if (state & SS_ISBOUND)
rh87107903a11e2008-07-31 15:02:18 -07001574 (void) strcat(buf, "ISBOUND ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001575 if (state & SS_NDELAY)
rh87107903a11e2008-07-31 15:02:18 -07001576 (void) strcat(buf, "NDELAY ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001577 if (state & SS_NONBLOCK)
rh87107903a11e2008-07-31 15:02:18 -07001578 (void) strcat(buf, "NONBLOCK ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001579
1580 if (state & SS_ASYNC)
rh87107903a11e2008-07-31 15:02:18 -07001581 (void) strcat(buf, "ASYNC ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001582 if (state & SS_ACCEPTCONN)
rh87107903a11e2008-07-31 15:02:18 -07001583 (void) strcat(buf, "ACCEPTCONN ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001584 if (state & SS_SAVEDEOR)
rh87107903a11e2008-07-31 15:02:18 -07001585 (void) strcat(buf, "SAVEDEOR ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001586
1587 if (state & SS_RCVATMARK)
rh87107903a11e2008-07-31 15:02:18 -07001588 (void) strcat(buf, "RCVATMARK ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001589 if (state & SS_OOBPEND)
rh87107903a11e2008-07-31 15:02:18 -07001590 (void) strcat(buf, "OOBPEND ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001591 if (state & SS_HAVEOOBDATA)
rh87107903a11e2008-07-31 15:02:18 -07001592 (void) strcat(buf, "HAVEOOBDATA ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001593 if (state & SS_HADOOBDATA)
rh87107903a11e2008-07-31 15:02:18 -07001594 (void) strcat(buf, "HADOOBDATA ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001595
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001596 if (mode & SM_PRIV)
rh87107903a11e2008-07-31 15:02:18 -07001597 (void) strcat(buf, "PRIV ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001598 if (mode & SM_ATOMIC)
rh87107903a11e2008-07-31 15:02:18 -07001599 (void) strcat(buf, "ATOMIC ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001600 if (mode & SM_ADDR)
rh87107903a11e2008-07-31 15:02:18 -07001601 (void) strcat(buf, "ADDR ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001602 if (mode & SM_CONNREQUIRED)
rh87107903a11e2008-07-31 15:02:18 -07001603 (void) strcat(buf, "CONNREQUIRED ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001604
1605 if (mode & SM_FDPASSING)
rh87107903a11e2008-07-31 15:02:18 -07001606 (void) strcat(buf, "FDPASSING ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001607 if (mode & SM_EXDATA)
rh87107903a11e2008-07-31 15:02:18 -07001608 (void) strcat(buf, "EXDATA ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001609 if (mode & SM_OPTDATA)
rh87107903a11e2008-07-31 15:02:18 -07001610 (void) strcat(buf, "OPTDATA ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001611 if (mode & SM_BYTESTREAM)
rh87107903a11e2008-07-31 15:02:18 -07001612 (void) strcat(buf, "BYTESTREAM ");
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001613 return (buf);
1614}
1615
1616char *
1617pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
1618{
1619 static char buf[1024];
1620
1621 if (addr == NULL || addrlen == 0) {
rh87107903a11e2008-07-31 15:02:18 -07001622 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001623 return (buf);
1624 }
1625 switch (family) {
1626 case AF_INET: {
1627 struct sockaddr_in sin;
1628
1629 bcopy(addr, &sin, sizeof (sin));
1630
1631 (void) sprintf(buf, "(len %d) %x/%d",
tomeeb5fca8f2008-05-26 17:53:26 -07001632 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001633 break;
1634 }
1635 case AF_INET6: {
1636 struct sockaddr_in6 sin6;
1637 uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
1638
1639 bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
rh87107903a11e2008-07-31 15:02:18 -07001640 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001641 addrlen,
1642 ntohs(piece[0]), ntohs(piece[1]),
1643 ntohs(piece[2]), ntohs(piece[3]),
1644 ntohs(piece[4]), ntohs(piece[5]),
1645 ntohs(piece[6]), ntohs(piece[7]),
1646 ntohs(sin6.sin6_port));
1647 break;
1648 }
1649 case AF_UNIX: {
1650 struct sockaddr_un *soun = (struct sockaddr_un *)addr;
1651
tomeeb5fca8f2008-05-26 17:53:26 -07001652 (void) sprintf(buf, "(len %d) %s", addrlen,
gwwd3e55dc2007-12-27 11:07:48 -08001653 (soun == NULL) ? "(none)" : soun->sun_path);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001654 break;
1655 }
1656 default:
1657 (void) sprintf(buf, "(unknown af %d)", family);
1658 break;
1659 }
1660 return (buf);
1661}
1662
1663/* The logical equivalence operator (a if-and-only-if b) */
Jonathan Adams56f33202010-01-12 17:06:34 -08001664#define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001665
1666/*
1667 * Verify limitations and invariants on oob state.
1668 * Return 1 if OK, otherwise 0 so that it can be used as
1669 * ASSERT(verify_oobstate(so));
1670 */
1671int
1672so_verify_oobstate(struct sonode *so)
1673{
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001674 boolean_t havemark;
1675
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001676 ASSERT(MUTEX_HELD(&so->so_lock));
1677
1678 /*
1679 * The possible state combinations are:
1680 * 0
1681 * SS_OOBPEND
1682 * SS_OOBPEND|SS_HAVEOOBDATA
1683 * SS_OOBPEND|SS_HADOOBDATA
1684 * SS_HADOOBDATA
1685 */
1686 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
1687 case 0:
1688 case SS_OOBPEND:
1689 case SS_OOBPEND|SS_HAVEOOBDATA:
1690 case SS_OOBPEND|SS_HADOOBDATA:
1691 case SS_HADOOBDATA:
1692 break;
1693 default:
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001694 printf("Bad oob state 1 (%p): state %s\n",
1695 (void *)so, pr_state(so->so_state, so->so_mode));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001696 return (0);
1697 }
1698
1699 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1700 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001701 printf("Bad oob state 2 (%p): state %s\n",
1702 (void *)so, pr_state(so->so_state, so->so_mode));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001703 return (0);
1704 }
1705
1706 /*
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001707 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1708 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001709 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001710 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
1711 SOTOTPI(so)->sti_oobsigcnt > 0;
1712
Jonathan Adams56f33202010-01-12 17:06:34 -08001713 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK),
gwwd3e55dc2007-12-27 11:07:48 -08001714 so->so_state & SS_OOBPEND)) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001715 printf("Bad oob state 3 (%p): state %s\n",
1716 (void *)so, pr_state(so->so_state, so->so_mode));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001717 return (0);
1718 }
1719
1720 /*
1721 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1722 */
1723 if (!(so->so_options & SO_OOBINLINE) &&
Jonathan Adams56f33202010-01-12 17:06:34 -08001724 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001725 printf("Bad oob state 4 (%p): state %s\n",
1726 (void *)so, pr_state(so->so_state, so->so_mode));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001727 return (0);
1728 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001729
1730 if (!SOCK_IS_NONSTR(so) &&
1731 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001732 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001733 (void *)so, SOTOTPI(so)->sti_oobsigcnt,
1734 SOTOTPI(so)->sti_oobcnt,
1735 pr_state(so->so_state, so->so_mode));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001736 return (0);
1737 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001738
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001739 return (1);
1740}
Jonathan Adams56f33202010-01-12 17:06:34 -08001741#undef EQUIVALENT
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001742#endif /* DEBUG */
1743
1744/* initialize sockfs zone specific kstat related items */
1745void *
1746sock_kstat_init(zoneid_t zoneid)
1747{
1748 kstat_t *ksp;
1749
1750 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1751 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
1752
1753 if (ksp != NULL) {
1754 ksp->ks_update = sockfs_update;
1755 ksp->ks_snapshot = sockfs_snapshot;
1756 ksp->ks_lock = &socklist.sl_lock;
1757 ksp->ks_private = (void *)(uintptr_t)zoneid;
1758 kstat_install(ksp);
1759 }
1760
1761 return (ksp);
1762}
1763
1764/* tear down sockfs zone specific kstat related items */
1765/*ARGSUSED*/
1766void
1767sock_kstat_fini(zoneid_t zoneid, void *arg)
1768{
1769 kstat_t *ksp = (kstat_t *)arg;
1770
1771 if (ksp != NULL) {
1772 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
1773 kstat_delete(ksp);
1774 }
1775}
1776
1777/*
1778 * Zones:
1779 * Note that nactive is going to be different for each zone.
1780 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1781 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1782 * buffer. This is safe, but if the buffer is too small, user will not be
1783 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1784 * driver will keep it locked between the update and the snapshot, so no
1785 * other process (zone) can currently get inbetween resulting in a wrong size
1786 * buffer allocation.
1787 */
1788static int
1789sockfs_update(kstat_t *ksp, int rw)
1790{
1791 uint_t nactive = 0; /* # of active AF_UNIX sockets */
1792 struct sonode *so; /* current sonode on socklist */
1793 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1794
1795 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1796
1797 if (rw == KSTAT_WRITE) { /* bounce all writes */
1798 return (EACCES);
1799 }
1800
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001801 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1802 if (so->so_count != 0 && so->so_zoneid == myzoneid) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001803 nactive++;
1804 }
1805 }
1806 ksp->ks_ndata = nactive;
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001807 ksp->ks_data_size = nactive * sizeof (struct sockinfo);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001808
1809 return (0);
1810}
1811
1812static int
1813sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
1814{
1815 int ns; /* # of sonodes we've copied */
1816 struct sonode *so; /* current sonode on socklist */
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001817 struct sockinfo *psi; /* where we put sockinfo data */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001818 t_uscalar_t sn_len; /* soa_len */
1819 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001820 sotpi_info_t *sti;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001821
1822 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1823
1824 ksp->ks_snaptime = gethrtime();
1825
1826 if (rw == KSTAT_WRITE) { /* bounce all writes */
1827 return (EACCES);
1828 }
1829
1830 /*
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001831 * For each sonode on the socklist, we massage the important
1832 * info into buf, in sockinfo format.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001833 */
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001834 psi = (struct sockinfo *)buf;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001835 ns = 0;
1836 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001837 vattr_t attr;
1838
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001839 /* only stuff active sonodes and the same zone: */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001840 if (so->so_count == 0 || so->so_zoneid != myzoneid) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001841 continue;
1842 }
1843
1844 /*
1845 * If the sonode was activated between the update and the
1846 * snapshot, we're done - as this is only a snapshot.
1847 */
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001848 if ((caddr_t)(psi) >= (caddr_t)buf + ksp->ks_data_size) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001849 break;
1850 }
1851
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001852 sti = SOTOTPI(so);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001853 /* copy important info into buf: */
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001854 psi->si_size = sizeof (struct sockinfo);
1855 psi->si_family = so->so_family;
1856 psi->si_type = so->so_type;
1857 psi->si_flag = so->so_flag;
1858 psi->si_state = so->so_state;
1859 psi->si_serv_type = sti->sti_serv_type;
1860 psi->si_ux_laddr_sou_magic = sti->sti_ux_laddr.soua_magic;
1861 psi->si_ux_faddr_sou_magic = sti->sti_ux_faddr.soua_magic;
1862 psi->si_laddr_soa_len = sti->sti_laddr.soa_len;
1863 psi->si_faddr_soa_len = sti->sti_faddr.soa_len;
1864 psi->si_szoneid = so->so_zoneid;
1865 psi->si_faddr_noxlate = sti->sti_faddr_noxlate;
1866
1867 /*
1868 * Grab the inode, if possible.
1869 * This must be done before entering so_lock as VOP_GETATTR
1870 * will acquire it.
1871 */
1872 if (so->so_vnode == NULL ||
1873 VOP_GETATTR(so->so_vnode, &attr, 0, CRED(), NULL) != 0)
1874 attr.va_nodeid = 0;
1875
1876 psi->si_inode = attr.va_nodeid;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001877
1878 mutex_enter(&so->so_lock);
1879
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001880 if (sti->sti_laddr_sa != NULL) {
1881 ASSERT(sti->sti_laddr_sa->sa_data != NULL);
1882 sn_len = sti->sti_laddr_len;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001883 ASSERT(sn_len <= sizeof (short) +
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001884 sizeof (psi->si_laddr_sun_path));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001885
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001886 psi->si_laddr_family =
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001887 sti->sti_laddr_sa->sa_family;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001888 if (sn_len != 0) {
1889 /* AF_UNIX socket names are NULL terminated */
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001890 (void) strncpy(psi->si_laddr_sun_path,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001891 sti->sti_laddr_sa->sa_data,
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001892 sizeof (psi->si_laddr_sun_path));
1893 sn_len = strlen(psi->si_laddr_sun_path);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001894 }
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001895 psi->si_laddr_sun_path[sn_len] = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001896 }
1897
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001898 if (sti->sti_faddr_sa != NULL) {
1899 ASSERT(sti->sti_faddr_sa->sa_data != NULL);
1900 sn_len = sti->sti_faddr_len;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001901 ASSERT(sn_len <= sizeof (short) +
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001902 sizeof (psi->si_faddr_sun_path));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001903
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001904 psi->si_faddr_family =
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001905 sti->sti_faddr_sa->sa_family;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001906 if (sn_len != 0) {
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001907 (void) strncpy(psi->si_faddr_sun_path,
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001908 sti->sti_faddr_sa->sa_data,
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001909 sizeof (psi->si_faddr_sun_path));
1910 sn_len = strlen(psi->si_faddr_sun_path);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001911 }
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001912 psi->si_faddr_sun_path[sn_len] = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001913 }
1914
1915 mutex_exit(&so->so_lock);
1916
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001917 (void) snprintf(psi->si_son_straddr,
1918 sizeof (psi->si_son_straddr), "%p", (void *)so);
1919 (void) snprintf(psi->si_lvn_straddr,
1920 sizeof (psi->si_lvn_straddr), "%p",
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001921 (void *)sti->sti_ux_laddr.soua_vp);
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001922 (void) snprintf(psi->si_fvn_straddr,
1923 sizeof (psi->si_fvn_straddr), "%p",
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001924 (void *)sti->sti_ux_faddr.soua_vp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001925
1926 ns++;
Andy Fiddaman78a2e112019-08-22 14:59:11 +00001927 psi++;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001928 }
1929
1930 ksp->ks_ndata = ns;
1931 return (0);
1932}
1933
1934ssize_t
1935soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
1936{
1937 struct uio auio;
Bryan Cantrill8e935252014-10-04 09:02:58 +00001938 struct iovec aiov[1];
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001939 register vnode_t *vp;
1940 int ioflag, rwflag;
1941 ssize_t cnt;
1942 int error = 0;
1943 int iovcnt = 0;
1944 short fflag;
1945
1946 vp = fp->f_vnode;
1947 fflag = fp->f_flag;
1948
1949 rwflag = 0;
1950 aiov[0].iov_base = (caddr_t)buf;
1951 aiov[0].iov_len = size;
1952 iovcnt = 1;
1953 cnt = (ssize_t)size;
1954 (void) VOP_RWLOCK(vp, rwflag, NULL);
1955
1956 auio.uio_loffset = fileoff;
1957 auio.uio_iov = aiov;
1958 auio.uio_iovcnt = iovcnt;
1959 auio.uio_resid = cnt;
1960 auio.uio_segflg = UIO_SYSSPACE;
1961 auio.uio_llimit = MAXOFFSET_T;
1962 auio.uio_fmode = fflag;
1963 auio.uio_extflg = UIO_COPY_CACHED;
1964
1965 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1966
1967 /* If read sync is not asked for, filter sync flags */
1968 if ((ioflag & FRSYNC) == 0)
1969 ioflag &= ~(FSYNC|FDSYNC);
1970 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1971 cnt -= auio.uio_resid;
1972
1973 VOP_RWUNLOCK(vp, rwflag, NULL);
1974
1975 if (error == EINTR && cnt != 0)
1976 error = 0;
1977out:
1978 if (error != 0) {
1979 *err = error;
1980 return (0);
1981 } else {
1982 *err = 0;
1983 return (cnt);
1984 }
1985}
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001986
1987int
1988so_copyin(const void *from, void *to, size_t size, int fromkernel)
1989{
1990 if (fromkernel) {
1991 bcopy(from, to, size);
1992 return (0);
1993 }
1994 return (xcopyin(from, to, size));
1995}
1996
1997int
1998so_copyout(const void *from, void *to, size_t size, int tokernel)
1999{
2000 if (tokernel) {
2001 bcopy(from, to, size);
2002 return (0);
2003 }
2004 return (xcopyout(from, to, size));
2005}