PSARC 2007/587 Volo -- Low Latency Socket Framework
PSARC 2008/694 Volo Interfaces Amendment
6765829 Integration of project Volo PSARC/2007/587
6644935 mblk cred_t reference counting limits scalability
6693633 TCP receive does not scale because of heavy refcounting of cred structures
4764841 connect/accept is slow on Solaris when compared to Linux
5105708 socket creation retains hold on accessvp
4764836 setsockopt is slow on Solaris when compared to Linux
4772191 socket close(2) is slow on Solaris when compared to Linux
--HG--
rename : usr/src/uts/common/fs/sockfs/socksctp.c => usr/src/uts/common/inet/sockmods/socksctp.c
rename : usr/src/uts/common/fs/sockfs/socksctp.h => usr/src/uts/common/inet/sockmods/socksctp.h
rename : usr/src/uts/common/fs/sockfs/socksctpsubr.c => usr/src/uts/common/inet/sockmods/socksctpsubr.c
rename : usr/src/uts/common/fs/sockfs/socksdp.c => usr/src/uts/common/inet/sockmods/socksdp.c
rename : usr/src/uts/common/fs/sockfs/socksdp.h => usr/src/uts/common/inet/sockmods/socksdp.h
diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h
new file mode 100644
index 0000000..8f60ea9
--- /dev/null
+++ b/usr/src/uts/common/sys/socket_proto.h
@@ -0,0 +1,182 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SOCKET_PROTO_H_
+#define _SYS_SOCKET_PROTO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/socket.h>
+
+/*
+ * Generation count
+ */
+typedef uint64_t sock_connid_t;
+
+#define SOCK_CONNID_INIT(id) { \
+ (id) = 0; \
+}
+#define SOCK_CONNID_BUMP(id) (++(id))
+#define SOCK_CONNID_LT(id1, id2) ((int64_t)((id1)-(id2)) < 0)
+
+/* Socket protocol properties */
+struct sock_proto_props {
+ uint_t sopp_flags; /* options to set */
+ ushort_t sopp_wroff; /* write offset */
+ ssize_t sopp_txhiwat; /* tx hi water mark */
+ ssize_t sopp_txlowat; /* tx lo water mark */
+ ssize_t sopp_rxhiwat; /* recv high water mark */
+ ssize_t sopp_rxlowat; /* recv low water mark */
+ ssize_t sopp_maxblk; /* maximum message block size */
+ ssize_t sopp_maxpsz; /* maximum packet size */
+ ssize_t sopp_minpsz; /* minimum packet size */
+ ushort_t sopp_tail; /* space available at the end */
+ uint_t sopp_zcopyflag; /* zero copy flag */
+ boolean_t sopp_oobinline; /* OOB inline */
+ uint_t sopp_rcvtimer; /* delayed recv notification (time) */
+ uint32_t sopp_rcvthresh; /* delayed recv notification (bytes) */
+ socklen_t sopp_maxaddrlen; /* maximum size of protocol address */
+};
+
+/* flags to determine which socket options are set */
+#define SOCKOPT_WROFF 0x0001 /* set write offset */
+#define SOCKOPT_RCVHIWAT 0x0002 /* set read side high water */
+#define SOCKOPT_RCVLOWAT 0x0004 /* set read side high water */
+#define SOCKOPT_MAXBLK 0x0008 /* set maximum message block size */
+#define SOCKOPT_TAIL 0x0010 /* set the extra allocated space */
+#define SOCKOPT_ZCOPY 0x0020 /* set/unset zero copy for sendfile */
+#define SOCKOPT_MAXPSZ 0x0040 /* set maxpsz for protocols */
+#define SOCKOPT_OOBINLINE 0x0080 /* set oob inline processing */
+#define SOCKOPT_RCVTIMER 0x0100
+#define SOCKOPT_RCVTHRESH 0x0200
+#define SOCKOPT_MAXADDRLEN 0x0400 /* set max address length */
+#define SOCKOPT_MINPSZ 0x0800 /* set minpsz for protocols */
+
+#define IS_SO_OOB_INLINE(so) ((so)->so_proto_props.sopp_oobinline)
+
+#ifdef _KERNEL
+
+struct T_capability_ack;
+
+typedef struct sock_upcalls_s sock_upcalls_t;
+typedef struct sock_downcalls_s sock_downcalls_t;
+
+/*
+ * Upcall and downcall handle for sockfs and transport layer.
+ */
+typedef struct __sock_upper_handle *sock_upper_handle_t;
+typedef struct __sock_lower_handle *sock_lower_handle_t;
+
+struct sock_downcalls_s {
+ void (*sd_activate)(sock_lower_handle_t, sock_upper_handle_t,
+ sock_upcalls_t *, int, cred_t *);
+ int (*sd_accept)(sock_lower_handle_t, sock_lower_handle_t,
+ sock_upper_handle_t, cred_t *);
+ int (*sd_bind)(sock_lower_handle_t, struct sockaddr *, socklen_t,
+ cred_t *);
+ int (*sd_listen)(sock_lower_handle_t, int, cred_t *);
+ int (*sd_connect)(sock_lower_handle_t, const struct sockaddr *,
+ socklen_t, sock_connid_t *, cred_t *);
+ int (*sd_getpeername)(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+ int (*sd_getsockname)(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+ int (*sd_getsockopt)(sock_lower_handle_t, int, int, void *,
+ socklen_t *, cred_t *);
+ int (*sd_setsockopt)(sock_lower_handle_t, int, int, const void *,
+ socklen_t, cred_t *);
+ int (*sd_send)(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
+ cred_t *);
+ int (*sd_send_uio)(sock_lower_handle_t, uio_t *, struct nmsghdr *,
+ cred_t *);
+ int (*sd_recv_uio)(sock_lower_handle_t, uio_t *, struct nmsghdr *,
+ cred_t *);
+ short (*sd_poll)(sock_lower_handle_t, short, int, cred_t *);
+ int (*sd_shutdown)(sock_lower_handle_t, int, cred_t *);
+ void (*sd_clr_flowctrl)(sock_lower_handle_t);
+ int (*sd_ioctl)(sock_lower_handle_t, int, intptr_t, int,
+ int32_t *, cred_t *);
+ int (*sd_close)(sock_lower_handle_t, int, cred_t *);
+};
+
+typedef sock_lower_handle_t (*so_proto_create_func_t)(int, int, int,
+ sock_downcalls_t **, uint_t *, int *, int, cred_t *);
+
+typedef void (*so_proto_quiesced_cb_t)(sock_upper_handle_t, queue_t *,
+ struct T_capability_ack *, struct sockaddr *, socklen_t,
+ struct sockaddr *, socklen_t, short);
+typedef void (*so_proto_fallback_func_t)(sock_lower_handle_t, queue_t *,
+ boolean_t, so_proto_quiesced_cb_t);
+
+/*
+ * Upcalls and related information
+ */
+
+/*
+ * su_opctl() actions
+ */
+typedef enum sock_opctl_action {
+ SOCK_OPCTL_ENAB_ACCEPT = 0,
+ SOCK_OPCTL_SHUT_SEND,
+ SOCK_OPCTL_SHUT_RECV
+} sock_opctl_action_t;
+
+struct sock_upcalls_s {
+ sock_upper_handle_t (*su_newconn)(sock_upper_handle_t,
+ sock_lower_handle_t, sock_downcalls_t *, cred_t *, pid_t,
+ sock_upcalls_t **);
+ void (*su_connected)(sock_upper_handle_t, sock_connid_t, cred_t *,
+ pid_t);
+ int (*su_disconnected)(sock_upper_handle_t, sock_connid_t, int);
+ void (*su_opctl)(sock_upper_handle_t, sock_opctl_action_t,
+ uintptr_t);
+ ssize_t (*su_recv)(sock_upper_handle_t, mblk_t *, size_t, int,
+ int *, boolean_t *);
+ void (*su_set_proto_props)(sock_upper_handle_t,
+ struct sock_proto_props *);
+ void (*su_txq_full)(sock_upper_handle_t, boolean_t);
+ void (*su_signal_oob)(sock_upper_handle_t, ssize_t);
+ void (*su_zcopy_notify)(sock_upper_handle_t);
+ void (*su_set_error)(sock_upper_handle_t, int);
+};
+
+#define SOCK_UC_VERSION sizeof (sock_upcalls_t)
+#define SOCK_DC_VERSION sizeof (sock_downcalls_t)
+
+#define SOCKET_RECVHIWATER (48 * 1024)
+#define SOCKET_RECVLOWATER 1024
+
+#define SOCKET_NO_RCVTIMER 0
+#define SOCKET_TIMER_INTERVAL 50
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SOCKET_PROTO_H_ */