| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. |
| * Copyright 2013 Nexenta Systems, Inc. All rights reserved. |
| * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. |
| */ |
| /* Copyright (c) 1990 Mentat Inc. */ |
| |
| #include <sys/sysmacros.h> |
| #include <sys/types.h> |
| #include <sys/stream.h> |
| #include <sys/stropts.h> |
| #include <sys/strlog.h> |
| #include <sys/strsun.h> |
| #define _SUN_TPI_VERSION 2 |
| #include <sys/tihdr.h> |
| #include <sys/timod.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/strsubr.h> |
| #include <sys/suntpi.h> |
| #include <sys/xti_inet.h> |
| #include <sys/kmem.h> |
| #include <sys/cred_impl.h> |
| #include <sys/policy.h> |
| #include <sys/priv.h> |
| #include <sys/ucred.h> |
| #include <sys/zone.h> |
| |
| #include <sys/socket.h> |
| #include <sys/socketvar.h> |
| #include <sys/sockio.h> |
| #include <sys/vtrace.h> |
| #include <sys/sdt.h> |
| #include <sys/debug.h> |
| #include <sys/isa_defs.h> |
| #include <sys/random.h> |
| #include <netinet/in.h> |
| #include <netinet/ip6.h> |
| #include <netinet/icmp6.h> |
| #include <netinet/udp.h> |
| |
| #include <inet/common.h> |
| #include <inet/ip.h> |
| #include <inet/ip_impl.h> |
| #include <inet/ipsec_impl.h> |
| #include <inet/ip6.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ip_if.h> |
| #include <inet/ip_multi.h> |
| #include <inet/ip_ndp.h> |
| #include <inet/proto_set.h> |
| #include <inet/mib2.h> |
| #include <inet/optcom.h> |
| #include <inet/snmpcom.h> |
| #include <inet/kstatcom.h> |
| #include <inet/ipclassifier.h> |
| #include <sys/squeue_impl.h> |
| #include <inet/ipnet.h> |
| #include <sys/ethernet.h> |
| |
| #include <sys/tsol/label.h> |
| #include <sys/tsol/tnet.h> |
| #include <rpc/pmap_prot.h> |
| |
| #include <inet/udp_impl.h> |
| |
| /* |
| * Synchronization notes: |
| * |
| * UDP is MT and uses the usual kernel synchronization primitives. There are 2 |
| * locks, the fanout lock (uf_lock) and conn_lock. conn_lock |
| * protects the contents of the udp_t. uf_lock protects the address and the |
| * fanout information. |
| * The lock order is conn_lock -> uf_lock. |
| * |
| * The fanout lock uf_lock: |
| * When a UDP endpoint is bound to a local port, it is inserted into |
| * a bind hash list. The list consists of an array of udp_fanout_t buckets. |
| * The size of the array is controlled by the udp_bind_fanout_size variable. |
| * This variable can be changed in /etc/system if the default value is |
| * not large enough. Each bind hash bucket is protected by a per bucket |
| * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t |
| * structure and a few other fields in the udp_t. A UDP endpoint is removed |
| * from the bind hash list only when it is being unbound or being closed. |
| * The per bucket lock also protects a UDP endpoint's state changes. |
| * |
| * Plumbing notes: |
| * UDP is always a device driver. For compatibility with mibopen() code |
| * it is possible to I_PUSH "udp", but that results in pushing a passthrough |
| * dummy module. |
| * |
| * The above implies that we don't support any intermediate module to |
| * reside in between /dev/ip and udp -- in fact, we never supported such |
| * scenario in the past as the inter-layer communication semantics have |
| * always been private. |
| */ |
| |
| /* For /etc/system control */ |
| uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; |
| |
| static void udp_addr_req(queue_t *q, mblk_t *mp); |
| static void udp_tpi_bind(queue_t *q, mblk_t *mp); |
| static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); |
| static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); |
| static int udp_build_hdr_template(conn_t *, const in6_addr_t *, |
| const in6_addr_t *, in_port_t, uint32_t); |
| static void udp_capability_req(queue_t *q, mblk_t *mp); |
| static int udp_tpi_close(queue_t *q, int flags, cred_t *); |
| static void udp_close_free(conn_t *); |
| static void udp_tpi_connect(queue_t *q, mblk_t *mp); |
| static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); |
| static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, |
| int sys_error); |
| static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, |
| t_scalar_t tlierr, int sys_error); |
| static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, |
| cred_t *cr); |
| static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, |
| char *value, caddr_t cp, cred_t *cr); |
| static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, |
| char *value, caddr_t cp, cred_t *cr); |
| static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); |
| static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, |
| ip_recv_attr_t *ira); |
| static void udp_info_req(queue_t *q, mblk_t *mp); |
| static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); |
| static int udp_lrput(queue_t *, mblk_t *); |
| static int udp_lwput(queue_t *, mblk_t *); |
| static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, |
| cred_t *credp, boolean_t isv6); |
| static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, |
| cred_t *credp); |
| static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, |
| cred_t *credp); |
| static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); |
| int udp_opt_set(conn_t *connp, uint_t optset_context, |
| int level, int name, uint_t inlen, |
| uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, |
| void *thisdg_attrs, cred_t *cr); |
| int udp_opt_get(conn_t *connp, int level, int name, |
| uchar_t *ptr); |
| static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, |
| pid_t pid); |
| static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, |
| pid_t pid, ip_xmit_attr_t *ixa); |
| static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, |
| sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, |
| ip_xmit_attr_t *ixa); |
| static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, |
| const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, |
| int *); |
| static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, |
| mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); |
| static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); |
| static void udp_ud_err_connected(conn_t *, t_scalar_t); |
| static void udp_tpi_unbind(queue_t *q, mblk_t *mp); |
| static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, |
| boolean_t random); |
| static void udp_wput_other(queue_t *q, mblk_t *mp); |
| static void udp_wput_iocdata(queue_t *q, mblk_t *mp); |
| static int udp_wput_fallback(queue_t *q, mblk_t *mp); |
| static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); |
| |
| static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); |
| static void udp_stack_fini(netstackid_t stackid, void *arg); |
| |
| /* Common routines for TPI and socket module */ |
| static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); |
| |
| /* Common routine for TPI and socket module */ |
| static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); |
| static void udp_do_close(conn_t *); |
| static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, |
| boolean_t); |
| static int udp_do_unbind(conn_t *); |
| |
| int udp_getsockname(sock_lower_handle_t, |
| struct sockaddr *, socklen_t *, cred_t *); |
| int udp_getpeername(sock_lower_handle_t, |
| struct sockaddr *, socklen_t *, cred_t *); |
| static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, |
| cred_t *, pid_t); |
| |
| #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) |
| |
| /* |
| * Checks if the given destination addr/port is allowed out. |
| * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. |
| * Called for each connect() and for sendto()/sendmsg() to a different |
| * destination. |
| * For connect(), called in udp_connect(). |
| * For sendto()/sendmsg(), called in udp_output_newdst(). |
| * |
| * This macro assumes that the cl_inet_connect2 hook is not NULL. |
| * Please check this before calling this macro. |
| * |
| * void |
| * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, |
| * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); |
| */ |
| #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ |
| (err) = 0; \ |
| /* \ |
| * Running in cluster mode - check and register active \ |
| * "connection" information \ |
| */ \ |
| if ((cp)->conn_ipversion == IPV4_VERSION) \ |
| (err) = (*cl_inet_connect2)( \ |
| (cp)->conn_netstack->netstack_stackid, \ |
| IPPROTO_UDP, is_outgoing, AF_INET, \ |
| (uint8_t *)&((cp)->conn_laddr_v4), \ |
| (cp)->conn_lport, \ |
| (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ |
| (in_port_t)(fport), NULL); \ |
| else \ |
| (err) = (*cl_inet_connect2)( \ |
| (cp)->conn_netstack->netstack_stackid, \ |
| IPPROTO_UDP, is_outgoing, AF_INET6, \ |
| (uint8_t *)&((cp)->conn_laddr_v6), \ |
| (cp)->conn_lport, \ |
| (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ |
| } |
| |
| static struct module_info udp_mod_info = { |
| UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER |
| }; |
| |
| /* |
| * Entry points for UDP as a device. |
| * We have separate open functions for the /dev/udp and /dev/udp6 devices. |
| */ |
| static struct qinit udp_rinitv4 = { |
| NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL |
| }; |
| |
| static struct qinit udp_rinitv6 = { |
| NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL |
| }; |
| |
| static struct qinit udp_winit = { |
| udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info |
| }; |
| |
| /* UDP entry point during fallback */ |
| struct qinit udp_fallback_sock_winit = { |
| udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info |
| }; |
| |
| /* |
| * UDP needs to handle I_LINK and I_PLINK since ifconfig |
| * likes to use it as a place to hang the various streams. |
| */ |
| static struct qinit udp_lrinit = { |
| udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info |
| }; |
| |
| static struct qinit udp_lwinit = { |
| udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info |
| }; |
| |
| /* For AF_INET aka /dev/udp */ |
| struct streamtab udpinfov4 = { |
| &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit |
| }; |
| |
| /* For AF_INET6 aka /dev/udp6 */ |
| struct streamtab udpinfov6 = { |
| &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit |
| }; |
| |
| #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) |
| |
| /* Default structure copied into T_INFO_ACK messages */ |
| static struct T_info_ack udp_g_t_info_ack_ipv4 = { |
| T_INFO_ACK, |
| UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ |
| T_INVALID, /* ETSU_size. udp does not support expedited data. */ |
| T_INVALID, /* CDATA_size. udp does not support connect data. */ |
| T_INVALID, /* DDATA_size. udp does not support disconnect data. */ |
| sizeof (sin_t), /* ADDR_size. */ |
| 0, /* OPT_size - not initialized here */ |
| UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ |
| T_CLTS, /* SERV_type. udp supports connection-less. */ |
| TS_UNBND, /* CURRENT_state. This is set from udp_state. */ |
| (XPG4_1|SENDZERO) /* PROVIDER_flag */ |
| }; |
| |
| #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) |
| |
| static struct T_info_ack udp_g_t_info_ack_ipv6 = { |
| T_INFO_ACK, |
| UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ |
| T_INVALID, /* ETSU_size. udp does not support expedited data. */ |
| T_INVALID, /* CDATA_size. udp does not support connect data. */ |
| T_INVALID, /* DDATA_size. udp does not support disconnect data. */ |
| sizeof (sin6_t), /* ADDR_size. */ |
| 0, /* OPT_size - not initialized here */ |
| UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ |
| T_CLTS, /* SERV_type. udp supports connection-less. */ |
| TS_UNBND, /* CURRENT_state. This is set from udp_state. */ |
| (XPG4_1|SENDZERO) /* PROVIDER_flag */ |
| }; |
| |
| /* |
| * UDP tunables related declarations. Definitions are in udp_tunables.c |
| */ |
| extern mod_prop_info_t udp_propinfo_tbl[]; |
| extern int udp_propinfo_count; |
| |
| /* Setable in /etc/system */ |
| /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ |
| uint32_t udp_random_anon_port = 1; |
| |
| /* |
| * Hook functions to enable cluster networking. |
| * On non-clustered systems these vectors must always be NULL |
| */ |
| |
| void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, |
| sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, |
| void *args) = NULL; |
| void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, |
| sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, |
| void *args) = NULL; |
| |
| typedef union T_primitives *t_primp_t; |
| |
| /* |
| * Return the next anonymous port in the privileged port range for |
| * bind checking. |
| * |
| * Trusted Extension (TX) notes: TX allows administrator to mark or |
| * reserve ports as Multilevel ports (MLP). MLP has special function |
| * on TX systems. Once a port is made MLP, it's not available as |
| * ordinary port. This creates "holes" in the port name space. It |
| * may be necessary to skip the "holes" find a suitable anon port. |
| */ |
| static in_port_t |
| udp_get_next_priv_port(udp_t *udp) |
| { |
| static in_port_t next_priv_port = IPPORT_RESERVED - 1; |
| in_port_t nextport; |
| boolean_t restart = B_FALSE; |
| udp_stack_t *us = udp->udp_us; |
| |
| retry: |
| if (next_priv_port < us->us_min_anonpriv_port || |
| next_priv_port >= IPPORT_RESERVED) { |
| next_priv_port = IPPORT_RESERVED - 1; |
| if (restart) |
| return (0); |
| restart = B_TRUE; |
| } |
| |
| if (is_system_labeled() && |
| (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), |
| next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { |
| next_priv_port = nextport; |
| goto retry; |
| } |
| |
| return (next_priv_port--); |
| } |
| |
| /* |
| * Hash list removal routine for udp_t structures. |
| */ |
| static void |
| udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) |
| { |
| udp_t *udpnext; |
| kmutex_t *lockp; |
| udp_stack_t *us = udp->udp_us; |
| conn_t *connp = udp->udp_connp; |
| |
| if (udp->udp_ptpbhn == NULL) |
| return; |
| |
| /* |
| * Extract the lock pointer in case there are concurrent |
| * hash_remove's for this instance. |
| */ |
| ASSERT(connp->conn_lport != 0); |
| if (!caller_holds_lock) { |
| lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, |
| us->us_bind_fanout_size)].uf_lock; |
| ASSERT(lockp != NULL); |
| mutex_enter(lockp); |
| } |
| if (udp->udp_ptpbhn != NULL) { |
| udpnext = udp->udp_bind_hash; |
| if (udpnext != NULL) { |
| udpnext->udp_ptpbhn = udp->udp_ptpbhn; |
| udp->udp_bind_hash = NULL; |
| } |
| *udp->udp_ptpbhn = udpnext; |
| udp->udp_ptpbhn = NULL; |
| } |
| if (!caller_holds_lock) { |
| mutex_exit(lockp); |
| } |
| } |
| |
| static void |
| udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) |
| { |
| conn_t *connp = udp->udp_connp; |
| udp_t **udpp; |
| udp_t *udpnext; |
| conn_t *connext; |
| |
| ASSERT(MUTEX_HELD(&uf->uf_lock)); |
| ASSERT(udp->udp_ptpbhn == NULL); |
| udpp = &uf->uf_udp; |
| udpnext = udpp[0]; |
| if (udpnext != NULL) { |
| /* |
| * If the new udp bound to the INADDR_ANY address |
| * and the first one in the list is not bound to |
| * INADDR_ANY we skip all entries until we find the |
| * first one bound to INADDR_ANY. |
| * This makes sure that applications binding to a |
| * specific address get preference over those binding to |
| * INADDR_ANY. |
| */ |
| connext = udpnext->udp_connp; |
| if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && |
| !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { |
| while ((udpnext = udpp[0]) != NULL && |
| !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { |
| udpp = &(udpnext->udp_bind_hash); |
| } |
| if (udpnext != NULL) |
| udpnext->udp_ptpbhn = &udp->udp_bind_hash; |
| } else { |
| udpnext->udp_ptpbhn = &udp->udp_bind_hash; |
| } |
| } |
| udp->udp_bind_hash = udpnext; |
| udp->udp_ptpbhn = udpp; |
| udpp[0] = udp; |
| } |
| |
| /* |
| * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message |
| * passed to udp_wput. |
| * It associates a port number and local address with the stream. |
| * It calls IP to verify the local IP address, and calls IP to insert |
| * the conn_t in the fanout table. |
| * If everything is ok it then sends the T_BIND_ACK back up. |
| * |
| * Note that UDP over IPv4 and IPv6 sockets can use the same port number |
| * without setting SO_REUSEADDR. This is needed so that they |
| * can be viewed as two independent transport protocols. |
| * However, anonymouns ports are allocated from the same range to avoid |
| * duplicating the us->us_next_port_to_try. |
| */ |
| static void |
| udp_tpi_bind(queue_t *q, mblk_t *mp) |
| { |
| sin_t *sin; |
| sin6_t *sin6; |
| mblk_t *mp1; |
| struct T_bind_req *tbr; |
| conn_t *connp; |
| udp_t *udp; |
| int error; |
| struct sockaddr *sa; |
| cred_t *cr; |
| |
| /* |
| * All Solaris components should pass a db_credp |
| * for this TPI message, hence we ASSERT. |
| * But in case there is some other M_PROTO that looks |
| * like a TPI message sent by some other kernel |
| * component, we check and return an error. |
| */ |
| cr = msg_getcred(mp, NULL); |
| ASSERT(cr != NULL); |
| if (cr == NULL) { |
| udp_err_ack(q, mp, TSYSERR, EINVAL); |
| return; |
| } |
| |
| connp = Q_TO_CONN(q); |
| udp = connp->conn_udp; |
| if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { |
| (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, |
| "udp_bind: bad req, len %u", |
| (uint_t)(mp->b_wptr - mp->b_rptr)); |
| udp_err_ack(q, mp, TPROTO, 0); |
| return; |
| } |
| if (udp->udp_state != TS_UNBND) { |
| (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, |
| "udp_bind: bad state, %u", udp->udp_state); |
| udp_err_ack(q, mp, TOUTSTATE, 0); |
| return; |
| } |
| /* |
| * Reallocate the message to make sure we have enough room for an |
| * address. |
| */ |
| mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); |
| if (mp1 == NULL) { |
| udp_err_ack(q, mp, TSYSERR, ENOMEM); |
| return; |
| } |
| |
| mp = mp1; |
| |
| /* Reset the message type in preparation for shipping it back. */ |
| DB_TYPE(mp) = M_PCPROTO; |
| |
| tbr = (struct T_bind_req *)mp->b_rptr; |
| switch (tbr->ADDR_length) { |
| case 0: /* Request for a generic port */ |
| tbr->ADDR_offset = sizeof (struct T_bind_req); |
| if (connp->conn_family == AF_INET) { |
| tbr->ADDR_length = sizeof (sin_t); |
| sin = (sin_t *)&tbr[1]; |
| *sin = sin_null; |
| sin->sin_family = AF_INET; |
| mp->b_wptr = (uchar_t *)&sin[1]; |
| sa = (struct sockaddr *)sin; |
| } else { |
| ASSERT(connp->conn_family == AF_INET6); |
| tbr->ADDR_length = sizeof (sin6_t); |
| sin6 = (sin6_t *)&tbr[1]; |
| *sin6 = sin6_null; |
| sin6->sin6_family = AF_INET6; |
| mp->b_wptr = (uchar_t *)&sin6[1]; |
| sa = (struct sockaddr *)sin6; |
| } |
| break; |
| |
| case sizeof (sin_t): /* Complete IPv4 address */ |
| sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, |
| sizeof (sin_t)); |
| if (sa == NULL || !OK_32PTR((char *)sa)) { |
| udp_err_ack(q, mp, TSYSERR, EINVAL); |
| return; |
| } |
| if (connp->conn_family != AF_INET || |
| sa->sa_family != AF_INET) { |
| udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); |
| return; |
| } |
| break; |
| |
| case sizeof (sin6_t): /* complete IPv6 address */ |
| sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, |
| sizeof (sin6_t)); |
| if (sa == NULL || !OK_32PTR((char *)sa)) { |
| udp_err_ack(q, mp, TSYSERR, EINVAL); |
| return; |
| } |
| if (connp->conn_family != AF_INET6 || |
| sa->sa_family != AF_INET6) { |
| udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); |
| return; |
| } |
| break; |
| |
| default: /* Invalid request */ |
| (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, |
| "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); |
| udp_err_ack(q, mp, TBADADDR, 0); |
| return; |
| } |
| |
| error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, |
| tbr->PRIM_type != O_T_BIND_REQ); |
| |
| if (error != 0) { |
| if (error > 0) { |
| udp_err_ack(q, mp, TSYSERR, error); |
| } else { |
| udp_err_ack(q, mp, -error, 0); |
| } |
| } else { |
| tbr->PRIM_type = T_BIND_ACK; |
| qreply(q, mp); |
| } |
| } |
| |
| /* |
| * This routine handles each T_CONN_REQ message passed to udp. It |
| * associates a default destination address with the stream. |
| * |
| * After various error checks are completed, udp_connect() lays |
| * the target address and port into the composite header template. |
| * Then we ask IP for information, including a source address if we didn't |
| * already have one. Finally we send up the T_OK_ACK reply message. |
| */ |
| static void |
| udp_tpi_connect(queue_t *q, mblk_t *mp) |
| { |
| conn_t *connp = Q_TO_CONN(q); |
| int error; |
| socklen_t len; |
| struct sockaddr *sa; |
| struct T_conn_req *tcr; |
| cred_t *cr; |
| pid_t pid; |
| /* |
| * All Solaris components should pass a db_credp |
| * for this TPI message, hence we ASSERT. |
| * But in case there is some other M_PROTO that looks |
| * like a TPI message sent by some other kernel |
| * component, we check and return an error. |
| */ |
| cr = msg_getcred(mp, &pid); |
| ASSERT(cr != NULL); |
| if (cr == NULL) { |
| udp_err_ack(q, mp, TSYSERR, EINVAL); |
| return; |
| } |
| |
| tcr = (struct T_conn_req *)mp->b_rptr; |
| |
| /* A bit of sanity checking */ |
| if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { |
| udp_err_ack(q, mp, TPROTO, 0); |
| return; |
| } |
| |
| if (tcr->OPT_length != 0) { |
| udp_err_ack(q, mp, TBADOPT, 0); |
| return; |
| } |
| |
| /* |
| * Determine packet type based on type of address passed in |
| * the request should contain an IPv4 or IPv6 address. |
| * Make sure that address family matches the type of |
| * family of the address passed down. |
| */ |
| len = tcr->DEST_length; |
| switch (tcr->DEST_length) { |
| default: |
| udp_err_ack(q, mp, TBADADDR, 0); |
| return; |
| |
| case sizeof (sin_t): |
| sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, |
| sizeof (sin_t)); |
| break; |
| |
| case sizeof (sin6_t): |
| sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, |
| sizeof (sin6_t)); |
| break; |
| } |
| |
| error = proto_verify_ip_addr(connp->conn_family, sa, len); |
| if (error != 0) { |
| udp_err_ack(q, mp, TSYSERR, error); |
| return; |
| } |
| |
| error = udp_do_connect(connp, sa, len, cr, pid); |
| if (error != 0) { |
| if (error < 0) |
| udp_err_ack(q, mp, -error, 0); |
| else |
| udp_err_ack(q, mp, TSYSERR, error); |
| } else { |
| mblk_t *mp1; |
| /* |
| * We have to send a connection confirmation to |
| * keep TLI happy. |
| */ |
| if (connp->conn_family == AF_INET) { |
| mp1 = mi_tpi_conn_con(NULL, (char *)sa, |
| sizeof (sin_t), NULL, 0); |
| } else { |
| mp1 = mi_tpi_conn_con(NULL, (char *)sa, |
| sizeof (sin6_t), NULL, 0); |
| } |
| if (mp1 == NULL) { |
| udp_err_ack(q, mp, TSYSERR, ENOMEM); |
| return; |
| } |
| |
| /* |
| * Send ok_ack for T_CONN_REQ |
| */ |
| mp = mi_tpi_ok_ack_alloc(mp); |
| if (mp == NULL) { |
| /* Unable to reuse the T_CONN_REQ for the ack. */ |
| udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); |
| return; |
| } |
| |
| putnext(connp->conn_rq, mp); |
| putnext(connp->conn_rq, mp1); |
| } |
| } |
| |
| /* ARGSUSED */ |
| static int |
| udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused) |
| { |
| conn_t *connp; |
| |
| if (flags & SO_FALLBACK) { |
| /* |
| * stream is being closed while in fallback |
| * simply free the resources that were allocated |
| */ |
| inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); |
| qprocsoff(q); |
| goto done; |
| } |
| |
| connp = Q_TO_CONN(q); |
| udp_do_close(connp); |
| done: |
| q->q_ptr = WR(q)->q_ptr = NULL; |
| return (0); |
| } |
| |
| static void |
| udp_close_free(conn_t *connp) |
| { |
| udp_t *udp = connp->conn_udp; |
| |
| /* If there are any options associated with the stream, free them. */ |
| if (udp->udp_recv_ipp.ipp_fields != 0) |
| ip_pkt_free(&udp->udp_recv_ipp); |
| |
| /* |
| * Clear any fields which the kmem_cache constructor clears. |
| * Only udp_connp needs to be preserved. |
| * TBD: We should make this more efficient to avoid clearing |
| * everything. |
| */ |
| ASSERT(udp->udp_connp == connp); |
| bzero(udp, sizeof (udp_t)); |
| udp->udp_connp = connp; |
| } |
| |
| static int |
| udp_do_disconnect(conn_t *connp) |
| { |
| udp_t *udp; |
| udp_fanout_t *udpf; |
| udp_stack_t *us; |
| int error; |
| |
| udp = connp->conn_udp; |
| us = udp->udp_us; |
| mutex_enter(&connp->conn_lock); |
| if (udp->udp_state != TS_DATA_XFER) { |
| mutex_exit(&connp->conn_lock); |
| return (-TOUTSTATE); |
| } |
| udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, |
| us->us_bind_fanout_size)]; |
| mutex_enter(&udpf->uf_lock); |
| if (connp->conn_mcbc_bind) |
| connp->conn_saddr_v6 = ipv6_all_zeros; |
| else |
| connp->conn_saddr_v6 = connp->conn_bound_addr_v6; |
| connp->conn_laddr_v6 = connp->conn_bound_addr_v6; |
| connp->conn_faddr_v6 = ipv6_all_zeros; |
| connp->conn_fport = 0; |
| udp->udp_state = TS_IDLE; |
| mutex_exit(&udpf->uf_lock); |
| |
| /* Remove any remnants of mapped address binding */ |
| if (connp->conn_family == AF_INET6) |
| connp->conn_ipversion = IPV6_VERSION; |
| |
| connp->conn_v6lastdst = ipv6_all_zeros; |
| error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, |
| &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); |
| mutex_exit(&connp->conn_lock); |
| if (error != 0) |
| return (error); |
| |
| /* |
| * Tell IP to remove the full binding and revert |
| * to the local address binding. |
| */ |
| return (ip_laddr_fanout_insert(connp)); |
| } |
| |
| static void |
| udp_tpi_disconnect(queue_t *q, mblk_t *mp) |
| { |
| conn_t *connp = Q_TO_CONN(q); |
| int error; |
| |
| /* |
| * Allocate the largest primitive we need to send back |
| * T_error_ack is > than T_ok_ack |
| */ |
| mp = reallocb(mp, sizeof (struct T_error_ack), 1); |
| if (mp == NULL) { |
| /* Unable to reuse the T_DISCON_REQ for the ack. */ |
| udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); |
| return; |
| } |
| |
| error = udp_do_disconnect(connp); |
| |
| if (error != 0) { |
| if (error < 0) { |
| udp_err_ack(q, mp, -error, 0); |
| } else { |
| udp_err_ack(q, mp, TSYSERR, error); |
| } |
| } else { |
| mp = mi_tpi_ok_ack_alloc(mp); |
| ASSERT(mp != NULL); |
| qreply(q, mp); |
| } |
| } |
| |
| int |
| udp_disconnect(conn_t *connp) |
| { |
| int error; |
| |
| connp->conn_dgram_errind = B_FALSE; |
| error = udp_do_disconnect(connp); |
| if (error < 0) |
| error = proto_tlitosyserr(-error); |
| |
| return (error); |
| } |
| |
| /* This routine creates a T_ERROR_ACK message and passes it upstream. */ |
| static void |
| udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) |
| { |
| if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) |
| qreply(q, mp); |
| } |
| |
| /* Shorthand to generate and send TPI error acks to our client */ |
| static void |
| udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, |
| t_scalar_t t_error, int sys_error) |
| { |
| struct T_error_ack *teackp; |
| |
| if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), |
| M_PCPROTO, T_ERROR_ACK)) != NULL) { |
| teackp = (struct T_error_ack *)mp->b_rptr; |
| teackp->ERROR_prim = primitive; |
| teackp->TLI_error = t_error; |
| teackp->UNIX_error = sys_error; |
| qreply(q, mp); |
| } |
| } |
| |
| /* At minimum we need 4 bytes of UDP header */ |
| #define ICMP_MIN_UDP_HDR 4 |
| |
| /* |
| * udp_icmp_input is called as conn_recvicmp to process ICMP messages. |
| * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. |
| * Assumes that IP has pulled up everything up to and including the ICMP header. |
| */ |
| /* ARGSUSED2 */ |
| static void |
| udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) |
| { |
| conn_t *connp = (conn_t *)arg1; |
| icmph_t *icmph; |
| ipha_t *ipha; |
| int iph_hdr_length; |
| udpha_t *udpha; |
| sin_t sin; |
| sin6_t sin6; |
| mblk_t *mp1; |
| int error = 0; |
| udp_t *udp = connp->conn_udp; |
| |
| ipha = (ipha_t *)mp->b_rptr; |
| |
| ASSERT(OK_32PTR(mp->b_rptr)); |
| |
| if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { |
| ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); |
| udp_icmp_error_ipv6(connp, mp, ira); |
| return; |
| } |
| ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); |
| |
| /* Skip past the outer IP and ICMP headers */ |
| ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); |
| iph_hdr_length = ira->ira_ip_hdr_length; |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ |
| |
| /* Skip past the inner IP and find the ULP header */ |
| iph_hdr_length = IPH_HDR_LENGTH(ipha); |
| udpha = (udpha_t *)((char *)ipha + iph_hdr_length); |
| |
| switch (icmph->icmph_type) { |
| case ICMP_DEST_UNREACHABLE: |
| switch (icmph->icmph_code) { |
| case ICMP_FRAGMENTATION_NEEDED: { |
| ipha_t *ipha; |
| ip_xmit_attr_t *ixa; |
| /* |
| * IP has already adjusted the path MTU. |
| * But we need to adjust DF for IPv4. |
| */ |
| if (connp->conn_ipversion != IPV4_VERSION) |
| break; |
| |
| ixa = conn_get_ixa(connp, B_FALSE); |
| if (ixa == NULL || ixa->ixa_ire == NULL) { |
| /* |
| * Some other thread holds conn_ixa. We will |
| * redo this on the next ICMP too big. |
| */ |
| if (ixa != NULL) |
| ixa_refrele(ixa); |
| break; |
| } |
| (void) ip_get_pmtu(ixa); |
| |
| mutex_enter(&connp->conn_lock); |
| ipha = (ipha_t *)connp->conn_ht_iphc; |
| if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { |
| ipha->ipha_fragment_offset_and_flags |= |
| IPH_DF_HTONS; |
| } else { |
| ipha->ipha_fragment_offset_and_flags &= |
| ~IPH_DF_HTONS; |
| } |
| mutex_exit(&connp->conn_lock); |
| ixa_refrele(ixa); |
| break; |
| } |
| case ICMP_PORT_UNREACHABLE: |
| case ICMP_PROTOCOL_UNREACHABLE: |
| error = ECONNREFUSED; |
| break; |
| default: |
| /* Transient errors */ |
| break; |
| } |
| break; |
| default: |
| /* Transient errors */ |
| break; |
| } |
| if (error == 0) { |
| freemsg(mp); |
| return; |
| } |
| |
| /* |
| * Deliver T_UDERROR_IND when the application has asked for it. |
| * The socket layer enables this automatically when connected. |
| */ |
| if (!connp->conn_dgram_errind) { |
| freemsg(mp); |
| return; |
| } |
| |
| switch (connp->conn_family) { |
| case AF_INET: |
| sin = sin_null; |
| sin.sin_family = AF_INET; |
| sin.sin_addr.s_addr = ipha->ipha_dst; |
| sin.sin_port = udpha->uha_dst_port; |
| if (IPCL_IS_NONSTR(connp)) { |
| mutex_enter(&connp->conn_lock); |
| if (udp->udp_state == TS_DATA_XFER) { |
| if (sin.sin_port == connp->conn_fport && |
| sin.sin_addr.s_addr == |
| connp->conn_faddr_v4) { |
| mutex_exit(&connp->conn_lock); |
| (*connp->conn_upcalls->su_set_error) |
| (connp->conn_upper_handle, error); |
| goto done; |
| } |
| } else { |
| udp->udp_delayed_error = error; |
| *((sin_t *)&udp->udp_delayed_addr) = sin; |
| } |
| mutex_exit(&connp->conn_lock); |
| } else { |
| mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), |
| NULL, 0, error); |
| if (mp1 != NULL) |
| putnext(connp->conn_rq, mp1); |
| } |
| break; |
| case AF_INET6: |
| sin6 = sin6_null; |
| sin6.sin6_family = AF_INET6; |
| IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); |
| sin6.sin6_port = udpha->uha_dst_port; |
| if (IPCL_IS_NONSTR(connp)) { |
| mutex_enter(&connp->conn_lock); |
| if (udp->udp_state == TS_DATA_XFER) { |
| if (sin6.sin6_port == connp->conn_fport && |
| IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, |
| &connp->conn_faddr_v6)) { |
| mutex_exit(&connp->conn_lock); |
| (*connp->conn_upcalls->su_set_error) |
| (connp->conn_upper_handle, error); |
| goto done; |
| } |
| } else { |
| udp->udp_delayed_error = error; |
| *((sin6_t *)&udp->udp_delayed_addr) = sin6; |
| } |
| mutex_exit(&connp->conn_lock); |
| } else { |
| mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), |
| NULL, 0, error); |
| if (mp1 != NULL) |
| putnext(connp->conn_rq, mp1); |
| } |
| break; |
| } |
| done: |
| freemsg(mp); |
| } |
| |
| /* |
| * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. |
| * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. |
| * Assumes that IP has pulled up all the extension headers as well as the |
| * ICMPv6 header. |
| */ |
| static void |
| udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) |
| { |
| icmp6_t *icmp6; |
| ip6_t *ip6h, *outer_ip6h; |
| uint16_t iph_hdr_length; |
| uint8_t *nexthdrp; |
| udpha_t *udpha; |
| sin6_t sin6; |
| mblk_t *mp1; |
| int error = 0; |
| udp_t *udp = connp->conn_udp; |
| udp_stack_t *us = udp->udp_us; |
| |
| outer_ip6h = (ip6_t *)mp->b_rptr; |
| #ifdef DEBUG |
| if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) |
| iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); |
| else |
| iph_hdr_length = IPV6_HDR_LEN; |
| ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); |
| #endif |
| /* Skip past the outer IP and ICMP headers */ |
| iph_hdr_length = ira->ira_ip_hdr_length; |
| icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; |
| |
| /* Skip past the inner IP and find the ULP header */ |
| ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ |
| if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { |
| freemsg(mp); |
| return; |
| } |
| udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); |
| |
| switch (icmp6->icmp6_type) { |
| case ICMP6_DST_UNREACH: |
| switch (icmp6->icmp6_code) { |
| case ICMP6_DST_UNREACH_NOPORT: |
| error = ECONNREFUSED; |
| break; |
| case ICMP6_DST_UNREACH_ADMIN: |
| case ICMP6_DST_UNREACH_NOROUTE: |
| case ICMP6_DST_UNREACH_BEYONDSCOPE: |
| case ICMP6_DST_UNREACH_ADDR: |
| /* Transient errors */ |
| break; |
| default: |
| break; |
| } |
| break; |
| case ICMP6_PACKET_TOO_BIG: { |
| struct T_unitdata_ind *tudi; |
| struct T_opthdr *toh; |
| size_t udi_size; |
| mblk_t *newmp; |
| t_scalar_t opt_length = sizeof (struct T_opthdr) + |
| sizeof (struct ip6_mtuinfo); |
| sin6_t *sin6; |
| struct ip6_mtuinfo *mtuinfo; |
| |
| /* |
| * If the application has requested to receive path mtu |
| * information, send up an empty message containing an |
| * IPV6_PATHMTU ancillary data item. |
| */ |
| if (!connp->conn_ipv6_recvpathmtu) |
| break; |
| |
| udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + |
| opt_length; |
| if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { |
| UDPS_BUMP_MIB(us, udpInErrors); |
| break; |
| } |
| |
| /* |
| * newmp->b_cont is left to NULL on purpose. This is an |
| * empty message containing only ancillary data. |
| */ |
| newmp->b_datap->db_type = M_PROTO; |
| tudi = (struct T_unitdata_ind *)newmp->b_rptr; |
| newmp->b_wptr = (uchar_t *)tudi + udi_size; |
| tudi->PRIM_type = T_UNITDATA_IND; |
| tudi->SRC_length = sizeof (sin6_t); |
| tudi->SRC_offset = sizeof (struct T_unitdata_ind); |
| tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); |
| tudi->OPT_length = opt_length; |
| |
| sin6 = (sin6_t *)&tudi[1]; |
| bzero(sin6, sizeof (sin6_t)); |
| sin6->sin6_family = AF_INET6; |
| sin6->sin6_addr = connp->conn_faddr_v6; |
| |
| toh = (struct T_opthdr *)&sin6[1]; |
| toh->level = IPPROTO_IPV6; |
| toh->name = IPV6_PATHMTU; |
| toh->len = opt_length; |
| toh->status = 0; |
| |
| mtuinfo = (struct ip6_mtuinfo *)&toh[1]; |
| bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); |
| mtuinfo->ip6m_addr.sin6_family = AF_INET6; |
| mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; |
| mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; |
| /* |
| * We've consumed everything we need from the original |
| * message. Free it, then send our empty message. |
| */ |
| freemsg(mp); |
| udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); |
| return; |
| } |
| case ICMP6_TIME_EXCEEDED: |
| /* Transient errors */ |
| break; |
| case ICMP6_PARAM_PROB: |
| /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ |
| if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && |
| (uchar_t *)ip6h + icmp6->icmp6_pptr == |
| (uchar_t *)nexthdrp) { |
| error = ECONNREFUSED; |
| break; |
| } |
| break; |
| } |
| if (error == 0) { |
| freemsg(mp); |
| return; |
| } |
| |
| /* |
| * Deliver T_UDERROR_IND when the application has asked for it. |
| * The socket layer enables this automatically when connected. |
| */ |
| if (!connp->conn_dgram_errind) { |
| freemsg(mp); |
| return; |
| } |
| |
| sin6 = sin6_null; |
| sin6.sin6_family = AF_INET6; |
| sin6.sin6_addr = ip6h->ip6_dst; |
| sin6.sin6_port = udpha->uha_dst_port; |
| sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; |
| |
| if (IPCL_IS_NONSTR(connp)) { |
| mutex_enter(&connp->conn_lock); |
| if (udp->udp_state == TS_DATA_XFER) { |
| if (sin6.sin6_port == connp->conn_fport && |
| IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, |
| &connp->conn_faddr_v6)) { |
| mutex_exit(&connp->conn_lock); |
| (*connp->conn_upcalls->su_set_error) |
| (connp->conn_upper_handle, error); |
| goto done; |
| } |
| } else { |
| udp->udp_delayed_error = error; |
| *((sin6_t *)&udp->udp_delayed_addr) = sin6; |
| } |
| mutex_exit(&connp->conn_lock); |
| } else { |
| mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), |
| NULL, 0, error); |
| if (mp1 != NULL) |
| putnext(connp->conn_rq, mp1); |
| } |
| done: |
| freemsg(mp); |
| } |
| |
| /* |
| * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. |
| * The local address is filled in if endpoint is bound. The remote address |
| * is filled in if remote address has been precified ("connected endpoint") |
| * (The concept of connected CLTS sockets is alien to published TPI |
| * but we support it anyway). |
| */ |
| static void |
| udp_addr_req(queue_t *q, mblk_t *mp) |
| { |
| struct sockaddr *sa; |
| mblk_t *ackmp; |
| struct T_addr_ack *taa; |
| udp_t *udp = Q_TO_UDP(q); |
| conn_t *connp = udp->udp_connp; |
| uint_t addrlen; |
| |
| /* Make it large enough for worst case */ |
| ackmp = reallocb(mp, sizeof (struct T_addr_ack) + |
| 2 * sizeof (sin6_t), 1); |
| if (ackmp == NULL) { |
| udp_err_ack(q, mp, TSYSERR, ENOMEM); |
| return; |
| } |
| taa = (struct T_addr_ack *)ackmp->b_rptr; |
| |
| bzero(taa, sizeof (struct T_addr_ack)); |
| ackmp->b_wptr = (uchar_t *)&taa[1]; |
| |
| taa->PRIM_type = T_ADDR_ACK; |
| ackmp->b_datap->db_type = M_PCPROTO; |
| |
| if (connp->conn_family == AF_INET) |
| addrlen = sizeof (sin_t); |
| else |
| addrlen = sizeof (sin6_t); |
| |
| mutex_enter(&connp->conn_lock); |
| /* |
| * Note: Following code assumes 32 bit alignment of basic |
| * data structures like sin_t and struct T_addr_ack. |
| */ |
| if (udp->udp_state != TS_UNBND) { |
| /* |
| * Fill in local address first |
| */ |
| taa->LOCADDR_offset = sizeof (*taa); |
| taa->LOCADDR_length = addrlen; |
| sa = (struct sockaddr *)&taa[1]; |
| (void) conn_getsockname(connp, sa, &addrlen); |
| ackmp->b_wptr += addrlen; |
| } |
| if (udp->udp_state == TS_DATA_XFER) { |
| /* |
| * connected, fill remote address too |
| */ |
| taa->REMADDR_length = addrlen; |
| /* assumed 32-bit alignment */ |
| taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; |
| sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); |
| (void) conn_getpeername(connp, sa, &addrlen); |
| ackmp->b_wptr += addrlen; |
| } |
| mutex_exit(&connp->conn_lock); |
| ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); |
| qreply(q, ackmp); |
| } |
| |
| static void |
| udp_copy_info(struct T_info_ack *tap, udp_t *udp) |
| { |
| conn_t *connp = udp->udp_connp; |
| |
| if (connp->conn_family == AF_INET) { |
| *tap = udp_g_t_info_ack_ipv4; |
| } else { |
| *tap = udp_g_t_info_ack_ipv6; |
| } |
| tap->CURRENT_state = udp->udp_state; |
| tap->OPT_size = udp_max_optsize; |
| } |
| |
| static void |
| udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, |
| t_uscalar_t cap_bits1) |
| { |
| tcap->CAP_bits1 = 0; |
| |
| if (cap_bits1 & TC1_INFO) { |
| udp_copy_info(&tcap->INFO_ack, udp); |
| tcap->CAP_bits1 |= TC1_INFO; |
| } |
| } |
| |
| /* |
| * This routine responds to T_CAPABILITY_REQ messages. It is called by |
| * udp_wput. Much of the T_CAPABILITY_ACK information is copied from |
| * udp_g_t_info_ack. The current state of the stream is copied from |
| * udp_state. |
| */ |
| static void |
| udp_capability_req(queue_t *q, mblk_t *mp) |
| { |
| t_uscalar_t cap_bits1; |
| struct T_capability_ack *tcap; |
| udp_t *udp = Q_TO_UDP(q); |
| |
| cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; |
| |
| mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), |
| mp->b_datap->db_type, T_CAPABILITY_ACK); |
| if (!mp) |
| return; |
| |
| tcap = (struct T_capability_ack *)mp->b_rptr; |
| udp_do_capability_ack(udp, tcap, cap_bits1); |
| |
| qreply(q, mp); |
| } |
| |
| /* |
| * This routine responds to T_INFO_REQ messages. It is called by udp_wput. |
| * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. |
| * The current state of the stream is copied from udp_state. |
| */ |
| static void |
| udp_info_req(queue_t *q, mblk_t *mp) |
| { |
| udp_t *udp = Q_TO_UDP(q); |
| |
| /* Create a T_INFO_ACK message. */ |
| mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, |
| T_INFO_ACK); |
| if (!mp) |
| return; |
| udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); |
| qreply(q, mp); |
| } |
| |
| /* For /dev/udp aka AF_INET open */ |
| static int |
| udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) |
| { |
| return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); |
| } |
| |
| /* For /dev/udp6 aka AF_INET6 open */ |
| static int |
| udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) |
| { |
| return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); |
| } |
| |
| /* |
| * This is the open routine for udp. It allocates a udp_t structure for |
| * the stream and, on the first open of the module, creates an ND table. |
| */ |
| static int |
| udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, |
| boolean_t isv6) |
| { |
| udp_t *udp; |
| conn_t *connp; |
| dev_t conn_dev; |
| vmem_t *minor_arena; |
| int err; |
| |
| /* If the stream is already open, return immediately. */ |
| if (q->q_ptr != NULL) |
| return (0); |
| |
| if (sflag == MODOPEN) |
| return (EINVAL); |
| |
| if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && |
| ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { |
| minor_arena = ip_minor_arena_la; |
| } else { |
| /* |
| * Either minor numbers in the large arena were exhausted |
| * or a non socket application is doing the open. |
| * Try to allocate from the small arena. |
| */ |
| if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) |
| return (EBUSY); |
| |
| minor_arena = ip_minor_arena_sa; |
| } |
| |
| if (flag & SO_FALLBACK) { |
| /* |
| * Non streams socket needs a stream to fallback to |
| */ |
| RD(q)->q_ptr = (void *)conn_dev; |
| WR(q)->q_qinfo = &udp_fallback_sock_winit; |
| WR(q)->q_ptr = (void *)minor_arena; |
| qprocson(q); |
| return (0); |
| } |
| |
| connp = udp_do_open(credp, isv6, KM_SLEEP, &err); |
| if (connp == NULL) { |
| inet_minor_free(minor_arena, conn_dev); |
| return (err); |
| } |
| udp = connp->conn_udp; |
| |
| *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); |
| connp->conn_dev = conn_dev; |
| connp->conn_minor_arena = minor_arena; |
| |
| /* |
| * Initialize the udp_t structure for this stream. |
| */ |
| q->q_ptr = connp; |
| WR(q)->q_ptr = connp; |
| connp->conn_rq = q; |
| connp->conn_wq = WR(q); |
| |
| /* |
| * Since this conn_t/udp_t is not yet visible to anybody else we don't |
| * need to lock anything. |
| */ |
| ASSERT(connp->conn_proto == IPPROTO_UDP); |
| ASSERT(connp->conn_udp == udp); |
| ASSERT(udp->udp_connp == connp); |
| |
| if (flag & SO_SOCKSTR) { |
| udp->udp_issocket = B_TRUE; |
| } |
| |
| WR(q)->q_hiwat = connp->conn_sndbuf; |
| WR(q)->q_lowat = connp->conn_sndlowat; |
| |
| qprocson(q); |
| |
| /* Set the Stream head write offset and high watermark. */ |
| (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); |
| (void) proto_set_rx_hiwat(q, connp, |
| udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); |
| |
| mutex_enter(&connp->conn_lock); |
| connp->conn_state_flags &= ~CONN_INCIPIENT; |
| mutex_exit(&connp->conn_lock); |
| return (0); |
| } |
| |
| /* |
| * Which UDP options OK to set through T_UNITDATA_REQ... |
| */ |
| /* ARGSUSED */ |
| static boolean_t |
| udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) |
| { |
| return (B_TRUE); |
| } |
| |
| /* |
| * This routine gets default values of certain options whose default |
| * values are maintained by protcol specific code |
| */ |
| int |
| udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) |
| { |
| udp_t *udp = Q_TO_UDP(q); |
| udp_stack_t *us = udp->udp_us; |
| int *i1 = (int *)ptr; |
| |
| switch (level) { |
| case IPPROTO_IP: |
| switch (name) { |
| case IP_MULTICAST_TTL: |
| *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; |
| return (sizeof (uchar_t)); |
| case IP_MULTICAST_LOOP: |
| *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; |
| return (sizeof (uchar_t)); |
| } |
| break; |
| case IPPROTO_IPV6: |
| switch (name) { |
| case IPV6_MULTICAST_HOPS: |
| *i1 = IP_DEFAULT_MULTICAST_TTL; |
| return (sizeof (int)); |
| case IPV6_MULTICAST_LOOP: |
| *i1 = IP_DEFAULT_MULTICAST_LOOP; |
| return (sizeof (int)); |
| case IPV6_UNICAST_HOPS: |
| *i1 = us->us_ipv6_hoplimit; |
| return (sizeof (int)); |
| } |
| break; |
| } |
| return (-1); |
| } |
| |
| /* |
| * This routine retrieves the current status of socket options. |
| * It returns the size of the option retrieved, or -1. |
| */ |
| int |
| udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, |
| uchar_t *ptr) |
| { |
| int *i1 = (int *)ptr; |
| udp_t *udp = connp->conn_udp; |
| int len; |
| conn_opt_arg_t coas; |
| int retval; |
| |
| coas.coa_connp = connp; |
| coas.coa_ixa = connp->conn_ixa; |
| coas.coa_ipp = &connp->conn_xmit_ipp; |
| coas.coa_ancillary = B_FALSE; |
| coas.coa_changed = 0; |
| |
| /* |
| * We assume that the optcom framework has checked for the set |
| * of levels and names that are supported, hence we don't worry |
| * about rejecting based on that. |
| * First check for UDP specific handling, then pass to common routine. |
| */ |
| switch (level) { |
| case IPPROTO_IP: |
| /* |
| * Only allow IPv4 option processing on IPv4 sockets. |
| */ |
| if (connp->conn_family != AF_INET) |
| return (-1); |
| |
| switch (name) { |
| case IP_OPTIONS: |
| case T_IP_OPTIONS: |
| mutex_enter(&connp->conn_lock); |
| if (!(udp->udp_recv_ipp.ipp_fields & |
| IPPF_IPV4_OPTIONS)) { |
| mutex_exit(&connp->conn_lock); |
| return (0); |
| } |
| |
| len = udp->udp_recv_ipp.ipp_ipv4_options_len; |
| ASSERT(len != 0); |
| bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); |
| mutex_exit(&connp->conn_lock); |
| return (len); |
| } |
| break; |
| case IPPROTO_UDP: |
| switch (name) { |
| case UDP_NAT_T_ENDPOINT: |
| mutex_enter(&connp->conn_lock); |
| *i1 = udp->udp_nat_t_endpoint; |
| mutex_exit(&connp->conn_lock); |
| return (sizeof (int)); |
| case UDP_RCVHDR: |
| mutex_enter(&connp->conn_lock); |
| *i1 = udp->udp_rcvhdr ? 1 : 0; |
| mutex_exit(&connp->conn_lock); |
| return (sizeof (int)); |
| } |
| } |
| mutex_enter(&connp->conn_lock); |
| retval = conn_opt_get(&coas, level, name, ptr); |
| mutex_exit(&connp->conn_lock); |
| return (retval); |
| } |
| |
| /* |
| * This routine retrieves the current status of socket options. |
| * It returns the size of the option retrieved, or -1. |
| */ |
| int |
| udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) |
| { |
| conn_t *connp = Q_TO_CONN(q); |
| int err; |
| |
| err = udp_opt_get(connp, level, name, ptr); |
| return (err); |
| } |
| |
| /* |
| * This routine sets socket options. |
| */ |
| int |
| udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, |
| uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) |
| { |
| conn_t *connp = coa->coa_connp; |
| ip_xmit_attr_t *ixa = coa->coa_ixa; |
| udp_t *udp = connp->conn_udp; |
| udp_stack_t *us = udp->udp_us; |
| int *i1 = (int *)invalp; |
| boolean_t onoff = (*i1 == 0) ? 0 : 1; |
| int error; |
| |
| ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); |
| /* |
| * First do UDP specific sanity checks and handle UDP specific |
| * options. Note that some IPPROTO_UDP options are handled |
| * by conn_opt_set. |
| */ |
| switch (level) { |
| case SOL_SOCKET: |
| switch (name) { |
| case SO_SNDBUF: |
| if (*i1 > us->us_max_buf) { |
| return (ENOBUFS); |
| } |
| break; |
| case SO_RCVBUF: |
| if (*i1 > us->us_max_buf) { |
| return (ENOBUFS); |
| } |
| break; |
| |
| case SCM_UCRED: { |
| struct ucred_s *ucr; |
| cred_t *newcr; |
| ts_label_t *tsl; |
| |
| /* |
| * Only sockets that have proper privileges and are |
| * bound to MLPs will have any other value here, so |
| * this implicitly tests for privilege to set label. |
| */ |
| if (connp->conn_mlp_type == mlptSingle) |
| break; |
| |
| ucr = (struct ucred_s *)invalp; |
| if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || |
| ucr->uc_labeloff < sizeof (*ucr) || |
| ucr->uc_labeloff + sizeof (bslabel_t) > inlen) |
| return (EINVAL); |
| if (!checkonly) { |
| /* |
| * Set ixa_tsl to the new label. |
| * We assume that crgetzoneid doesn't change |
| * as part of the SCM_UCRED. |
| */ |
| ASSERT(cr != NULL); |
| if ((tsl = crgetlabel(cr)) == NULL) |
| return (EINVAL); |
| newcr = copycred_from_bslabel(cr, UCLABEL(ucr), |
| tsl->tsl_doi, KM_NOSLEEP); |
| if (newcr == NULL) |
| return (ENOSR); |
| ASSERT(newcr->cr_label != NULL); |
| /* |
| * Move the hold on the cr_label to ixa_tsl by |
| * setting cr_label to NULL. Then release newcr. |
| */ |
| ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); |
| ixa->ixa_flags |= IXAF_UCRED_TSL; |
| newcr->cr_label = NULL; |
| crfree(newcr); |
| coa->coa_changed |= COA_HEADER_CHANGED; |
| coa->coa_changed |= COA_WROFF_CHANGED; |
| } |
| /* Fully handled this option. */ |
| return (0); |
| } |
| } |
| break; |
| case IPPROTO_UDP: |
| switch (name) { |
| case UDP_NAT_T_ENDPOINT: |
| if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { |
| return (error); |
| } |
| |
| /* |
| * Use conn_family instead so we can avoid ambiguitites |
| * with AF_INET6 sockets that may switch from IPv4 |
| * to IPv6. |
| */ |
| if (connp->conn_family != AF_INET) { |
| return (EAFNOSUPPORT); |
| } |
| |
| if (!checkonly) { |
| mutex_enter(&connp->conn_lock); |
| udp->udp_nat_t_endpoint = onoff; |
| mutex_exit(&connp->conn_lock); |
| coa->coa_changed |= COA_HEADER_CHANGED; |
| coa->coa_changed |= COA_WROFF_CHANGED; |
| } |
| /* Fully handled this option. */ |
| return (0); |
| case UDP_RCVHDR: |
| mutex_enter(&connp->conn_lock); |
| udp->udp_rcvhdr = onoff; |
| mutex_exit(&connp->conn_lock); |
| return (0); |
| } |
| break; |
| } |
| error = conn_opt_set(coa, level, name, inlen, invalp, |
| checkonly, cr); |
| return (error); |
| } |
| |
| /* |
| * This routine sets socket options. |
| */ |
| int |
| udp_opt_set(conn_t *connp, uint_t optset_context, int level, |
| int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, |
| uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) |
| { |
| udp_t *udp = connp->conn_udp; |
| int err; |
| conn_opt_arg_t coas, *coa; |
| boolean_t checkonly; |
| udp_stack_t *us = udp->udp_us; |
| |
| switch (optset_context) { |
| case SETFN_OPTCOM_CHECKONLY: |
| checkonly = B_TRUE; |
| /* |
| * Note: Implies T_CHECK semantics for T_OPTCOM_REQ |
| * inlen != 0 implies value supplied and |
| * we have to "pretend" to set it. |
| * inlen == 0 implies that there is no |
| * value part in T_CHECK request and just validation |
| * done elsewhere should be enough, we just return here. |
| */ |
| if (inlen == 0) { |
| *outlenp = 0; |
| return (0); |
| } |
| break; |
| case SETFN_OPTCOM_NEGOTIATE: |
| checkonly = B_FALSE; |
| break; |
| case SETFN_UD_NEGOTIATE: |
| case SETFN_CONN_NEGOTIATE: |
| checkonly = B_FALSE; |
| /* |
| * Negotiating local and "association-related" options |
| * through T_UNITDATA_REQ. |
| * |
| * Following routine can filter out ones we do not |
| * want to be "set" this way. |
| */ |
| if (!udp_opt_allow_udr_set(level, name)) { |
| *outlenp = 0; |
| return (EINVAL); |
| } |
| break; |
| default: |
| /* |
| * We should never get here |
| */ |
| *outlenp = 0; |
| return (EINVAL); |
| } |
| |
| ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || |
| (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); |
| |
| if (thisdg_attrs != NULL) { |
| /* Options from T_UNITDATA_REQ */ |
| coa = (conn_opt_arg_t *)thisdg_attrs; |
| ASSERT(coa->coa_connp == connp); |
| ASSERT(coa->coa_ixa != NULL); |
| ASSERT(coa->coa_ipp != NULL); |
| ASSERT(coa->coa_ancillary); |
| } else { |
| coa = &coas; |
| coas.coa_connp = connp; |
| /* Get a reference on conn_ixa to prevent concurrent mods */ |
| coas.coa_ixa = conn_get_ixa(connp, B_TRUE); |
| if (coas.coa_ixa == NULL) { |
| *outlenp = 0; |
| return (ENOMEM); |
| } |
| coas.coa_ipp = &connp->conn_xmit_ipp; |
| coas.coa_ancillary = B_FALSE; |
| coas.coa_changed = 0; |
| } |
| |
| err = udp_do_opt_set(coa, level, name, inlen, invalp, |
| cr, checkonly); |
| if (err != 0) { |
| errout: |
| if (!coa->coa_ancillary) |
| ixa_refrele(coa->coa_ixa); |
| *outlenp = 0; |
| return (err); |
| } |
| /* Handle DHCPINIT here outside of lock */ |
| if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { |
| uint_t ifindex; |
| ill_t *ill; |
| |
| ifindex = *(uint_t *)invalp; |
| if (ifindex == 0) { |
| ill = NULL; |
| } else { |
| ill = ill_lookup_on_ifindex(ifindex, B_FALSE, |
| coa->coa_ixa->ixa_ipst); |
| if (ill == NULL) { |
| err = ENXIO; |
| goto errout; |
| } |
| |
| mutex_enter(&ill->ill_lock); |
| if (ill->ill_state_flags & ILL_CONDEMNED) { |
| mutex_exit(&ill->ill_lock); |
| ill_refrele(ill); |
| err = ENXIO; |
| goto errout; |
| } |
| if (IS_VNI(ill)) { |
| mutex_exit(&ill->ill_lock); |
| ill_refrele(ill); |
| err = EINVAL; |
| goto errout; |
| } |
| } |
| mutex_enter(&connp->conn_lock); |
| |
| if (connp->conn_dhcpinit_ill != NULL) { |
| /* |
| * We've locked the conn so conn_cleanup_ill() |
| * cannot clear conn_dhcpinit_ill -- so it's |
| * safe to access the ill. |
| */ |
| ill_t *oill = connp->conn_dhcpinit_ill; |
| |
| ASSERT(oill->ill_dhcpinit != 0); |
| atomic_dec_32(&oill->ill_dhcpinit); |
| ill_set_inputfn(connp->conn_dhcpinit_ill); |
| connp->conn_dhcpinit_ill = NULL; |
| } |
| |
| if (ill != NULL) { |
| connp->conn_dhcpinit_ill = ill; |
| atomic_inc_32(&ill->ill_dhcpinit); |
| ill_set_inputfn(ill); |
| mutex_exit(&connp->conn_lock); |
| mutex_exit(&ill->ill_lock); |
| ill_refrele(ill); |
| } else { |
| mutex_exit(&connp->conn_lock); |
| } |
| } |
| |
| /* |
| * Common case of OK return with outval same as inval. |
| */ |
| if (invalp != outvalp) { |
| /* don't trust bcopy for identical src/dst */ |
| (void) bcopy(invalp, outvalp, inlen); |
| } |
| *outlenp = inlen; |
| |
| /* |
| * If this was not ancillary data, then we rebuild the headers, |
| * update the IRE/NCE, and IPsec as needed. |
| * Since the label depends on the destination we go through |
| * ip_set_destination first. |
| */ |
| if (coa->coa_ancillary) { |
| return (0); |
| } |
| |
| if (coa->coa_changed & COA_ROUTE_CHANGED) { |
| in6_addr_t saddr, faddr, nexthop; |
| in_port_t fport; |
| |
| /* |
| * We clear lastdst to make sure we pick up the change |
| * next time sending. |
| * If we are connected we re-cache the information. |
| * We ignore errors to preserve BSD behavior. |
| * Note that we don't redo IPsec policy lookup here |
| * since the final destination (or source) didn't change. |
| */ |
| mutex_enter(&connp->conn_lock); |
| connp->conn_v6lastdst = ipv6_all_zeros; |
| |
| ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, |
| &connp->conn_faddr_v6, &nexthop); |
| saddr = connp->conn_saddr_v6; |
| faddr = connp->conn_faddr_v6; |
| fport = connp->conn_fport; |
| mutex_exit(&connp->conn_lock); |
| |
| if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && |
| !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { |
| (void) ip_attr_connect(connp, coa->coa_ixa, |
| &saddr, &faddr, &nexthop, fport, NULL, NULL, |
| IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); |
| } |
| } |
| |
| ixa_refrele(coa->coa_ixa); |
| |
| if (coa->coa_changed & COA_HEADER_CHANGED) { |
| /* |
| * Rebuild the header template if we are connected. |
| * Otherwise clear conn_v6lastdst so we rebuild the header |
| * in the data path. |
| */ |
| mutex_enter(&connp->conn_lock); |
| if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && |
| !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { |
| err = udp_build_hdr_template(connp, |
| &connp->conn_saddr_v6, &connp->conn_faddr_v6, |
| connp->conn_fport, connp->conn_flowinfo); |
| if (err != 0) { |
| mutex_exit(&connp->conn_lock); |
| return (err); |
| } |
| } else { |
| connp->conn_v6lastdst = ipv6_all_zeros; |
| } |
| mutex_exit(&connp->conn_lock); |
| } |
| if (coa->coa_changed & COA_RCVBUF_CHANGED) { |
| (void) proto_set_rx_hiwat(connp->conn_rq, connp, |
| connp->conn_rcvbuf); |
| } |
| if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { |
| connp->conn_wq->q_hiwat = connp->conn_sndbuf; |
| } |
| if (coa->coa_changed & COA_WROFF_CHANGED) { |
| /* Increase wroff if needed */ |
| uint_t wroff; |
| |
| mutex_enter(&connp->conn_lock); |
| wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; |
| if (udp->udp_nat_t_endpoint) |
| wroff += sizeof (uint32_t); |
| if (wroff > connp->conn_wroff) { |
| connp->conn_wroff = wroff; |
| mutex_exit(&connp->conn_lock); |
| (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); |
| } else { |
| mutex_exit(&connp->conn_lock); |
| } |
| } |
| return (err); |
| } |
| |
| /* This routine sets socket options. */ |
| int |
| udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, |
| uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, |
| void *thisdg_attrs, cred_t *cr) |
| { |
| conn_t *connp = Q_TO_CONN(q); |
| int error; |
| |
| error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, |
| outlenp, outvalp, thisdg_attrs, cr); |
| return (error); |
| } |
| |
| /* |
| * Setup IP and UDP headers. |
| * Returns NULL on allocation failure, in which case data_mp is freed. |
| */ |
| mblk_t * |
| udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, |
| const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, |
| uint32_t flowinfo, mblk_t *data_mp, int *errorp) |
| { |
| mblk_t *mp; |
| udpha_t *udpha; |
| udp_stack_t *us = connp->conn_netstack->netstack_udp; |
| uint_t data_len; |
| uint32_t cksum; |
| udp_t *udp = connp->conn_udp; |
| boolean_t insert_spi = udp->udp_nat_t_endpoint; |
| uint_t ulp_hdr_len; |
| |
| data_len = msgdsize(data_mp); |
| ulp_hdr_len = UDPH_SIZE; |
| if (insert_spi) |
| ulp_hdr_len += sizeof (uint32_t); |
| |
| mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, |
| ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); |
| if (mp == NULL) { |
| ASSERT(*errorp != 0); |
| return (NULL); |
| } |
| |
| data_len += ulp_hdr_len; |
| ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; |
| |
| udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); |
| udpha->uha_src_port = connp->conn_lport; |
| udpha->uha_dst_port = dstport; |
| udpha->uha_checksum = 0; |
| udpha->uha_length = htons(data_len); |
| |
| /* |
| * If there was a routing option/header then conn_prepend_hdr |
| * has massaged it and placed the pseudo-header checksum difference |
| * in the cksum argument. |
| * |
| * Setup header length and prepare for ULP checksum done in IP. |
| * |
| * We make it easy for IP to include our pseudo header |
| * by putting our length in uha_checksum. |
| * The IP source, destination, and length have already been set by |
| * conn_prepend_hdr. |
| */ |
| cksum += data_len; |
| cksum = (cksum >> 16) + (cksum & 0xFFFF); |
| ASSERT(cksum < 0x10000); |
| |
| if (ixa->ixa_flags & IXAF_IS_IPV4) { |
| ipha_t *ipha = (ipha_t *)mp->b_rptr; |
| |
| ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); |
| |
| /* IP does the checksum if uha_checksum is non-zero */ |
| if (us->us_do_checksum) { |
| if (cksum == 0) |
| udpha->uha_checksum = 0xffff; |
| else |
| udpha->uha_checksum = htons(cksum); |
| } else { |
| udpha->uha_checksum = 0; |
| } |
| } else { |
| ip6_t *ip6h = (ip6_t *)mp->b_rptr; |
| |
| ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); |
| if (cksum == 0) |
| udpha->uha_checksum = 0xffff; |
| else |
| udpha->uha_checksum = htons(cksum); |
| } |
| |
| /* Insert all-0s SPI now. */ |
| if (insert_spi) |
| *((uint32_t *)(udpha + 1)) = 0; |
| |
| return (mp); |
| } |
| |
| static int |
| udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, |
| const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) |
| { |
| udpha_t *udpha; |
| int error; |
| |
| ASSERT(MUTEX_HELD(&connp->conn_lock)); |
| /* |
| * We clear lastdst to make sure we don't use the lastdst path |
| * next time sending since we might not have set v6dst yet. |
| */ |
| connp->conn_v6lastdst = ipv6_all_zeros; |
| |
| error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, |
| flowinfo); |
| if (error != 0) |
| return (error); |
| |
| /* |
| * Any routing header/option has been massaged. The checksum difference |
| * is stored in conn_sum. |
| */ |
| udpha = (udpha_t *)connp->conn_ht_ulp; |
| udpha->uha_src_port = connp->conn_lport; |
| udpha->uha_dst_port = dstport; |
| udpha->uha_checksum = 0; |
| udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ |
| return (0); |
| } |
| |
| static mblk_t * |
| udp_queue_fallback(udp_t *udp, mblk_t *mp) |
| { |
| ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); |
| if (IPCL_IS_NONSTR(udp->udp_connp)) { |
| /* |
| * fallback has started but messages have not been moved yet |
| */ |
| if (udp->udp_fallback_queue_head == NULL) { |
| ASSERT(udp->udp_fallback_queue_tail == NULL); |
| udp->udp_fallback_queue_head = mp; |
| udp->udp_fallback_queue_tail = mp; |
| } else { |
| ASSERT(udp->udp_fallback_queue_tail != NULL); |
| udp->udp_fallback_queue_tail->b_next = mp; |
| udp->udp_fallback_queue_tail = mp; |
| } |
| return (NULL); |
| } else { |
| /* |
| * Fallback completed, let the caller putnext() the mblk. |
| */ |
| return (mp); |
| } |
| } |
| |
| /* |
| * Deliver data to ULP. In case we have a socket, and it's falling back to |
| * TPI, then we'll queue the mp for later processing. |
| */ |
| static void |
| udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) |
| { |
| if (IPCL_IS_NONSTR(connp)) { |
| udp_t *udp = connp->conn_udp; |
| int error; |
| |
| ASSERT(len == msgdsize(mp)); |
| if ((*connp->conn_upcalls->su_recv) |
| (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { |
| mutex_enter(&udp->udp_recv_lock); |
| if (error == ENOSPC) { |
| /* |
| * let's confirm while holding the lock |
| */ |
| if ((*connp->conn_upcalls->su_recv) |
| (connp->conn_upper_handle, NULL, 0, 0, |
| &error, NULL) < 0) { |
| ASSERT(error == ENOSPC); |
| if (error == ENOSPC) { |
| connp->conn_flow_cntrld = |
| B_TRUE; |
| } |
| } |
| mutex_exit(&udp->udp_recv_lock); |
| } else { |
| ASSERT(error == EOPNOTSUPP); |
| mp = udp_queue_fallback(udp, mp); |
| mutex_exit(&udp->udp_recv_lock); |
| if (mp != NULL) |
| putnext(connp->conn_rq, mp); |
| } |
| } |
| ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); |
| } else { |
| if (is_system_labeled()) { |
| ASSERT(ira->ira_cred != NULL); |
| /* |
| * Provide for protocols above UDP such as RPC |
| * NOPID leaves db_cpid unchanged. |
| */ |
| mblk_setcred(mp, ira->ira_cred, NOPID); |
| } |
| |
| putnext(connp->conn_rq, mp); |
| } |
| } |
| |
| /* |
| * This is the inbound data path. |
| * IP has already pulled up the IP plus UDP headers and verified alignment |
| * etc. |
| */ |
| /* ARGSUSED2 */ |
| static void |
| udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) |
| { |
| conn_t *connp = (conn_t *)arg1; |
| struct T_unitdata_ind *tudi; |
| uchar_t *rptr; /* Pointer to IP header */ |
| int hdr_length; /* Length of IP+UDP headers */ |
| int udi_size; /* Size of T_unitdata_ind */ |
| int pkt_len; |
| udp_t *udp; |
| udpha_t *udpha; |
| ip_pkt_t ipps; |
| ip6_t *ip6h; |
| mblk_t *mp1; |
| uint32_t udp_ipv4_options_len; |
| crb_t recv_ancillary; |
| udp_stack_t *us; |
| |
| ASSERT(connp->conn_flags & IPCL_UDPCONN); |
| |
| udp = connp->conn_udp; |
| us = udp->udp_us; |
| rptr = mp->b_rptr; |
| |
| ASSERT(DB_TYPE(mp) == M_DATA); |
| ASSERT(OK_32PTR(rptr)); |
| ASSERT(ira->ira_pktlen == msgdsize(mp)); |
| pkt_len = ira->ira_pktlen; |
| |
| /* |
| * Get a snapshot of these and allow other threads to change |
| * them after that. We need the same recv_ancillary when determining |
| * the size as when adding the ancillary data items. |
| */ |
| mutex_enter(&connp->conn_lock); |
| udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; |
| recv_ancillary = connp->conn_recv_ancillary; |
| mutex_exit(&connp->conn_lock); |
| |
| hdr_length = ira->ira_ip_hdr_length; |
| |
| /* |
| * IP inspected the UDP header thus all of it must be in the mblk. |
| * UDP length check is performed for IPv6 packets and IPv4 packets |
| * to check if the size of the packet as specified |
| * by the UDP header is the same as the length derived from the IP |
| * header. |
| */ |
| udpha = (udpha_t *)(rptr + hdr_length); |
| if (pkt_len != ntohs(udpha->uha_length) + hdr_length) |
| goto tossit; |
| |
| hdr_length += UDPH_SIZE; |
| ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ |
| |
| /* Initialize regardless of IP version */ |
| ipps.ipp_fields = 0; |
| |
| if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || |
| udp_ipv4_options_len > 0) && |
| connp->conn_family == AF_INET) { |
| int err; |
| |
| /* |
| * Record/update udp_recv_ipp with the lock |
| * held. Not needed for AF_INET6 sockets |
| * since they don't support a getsockopt of IP_OPTIONS. |
| */ |
| mutex_enter(&connp->conn_lock); |
| err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, |
| B_TRUE); |
| if (err != 0) { |
| /* Allocation failed. Drop packet */ |
| mutex_exit(&connp->conn_lock); |
| freemsg(mp); |
| UDPS_BUMP_MIB(us, udpInErrors); |
| return; |
| } |
| mutex_exit(&connp->conn_lock); |
| } |
| |
| if (recv_ancillary.crb_all != 0) { |
| /* |
| * Record packet information in the ip_pkt_t |
| */ |
| if (ira->ira_flags & IRAF_IS_IPV4) { |
| ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); |
| ASSERT(MBLKL(mp) >= sizeof (ipha_t)); |
| ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); |
| ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); |
| |
| (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); |
| } else { |
| uint8_t nexthdrp; |
| |
| ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); |
| /* |
| * IPv6 packets can only be received by applications |
| * that are prepared to receive IPv6 addresses. |
| * The IP fanout must ensure this. |
| */ |
| ASSERT(connp->conn_family == AF_INET6); |
| |
| ip6h = (ip6_t *)rptr; |
| |
| /* We don't care about the length, but need the ipp */ |
| hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, |
| &nexthdrp); |
| ASSERT(hdr_length == ira->ira_ip_hdr_length); |
| /* Restore */ |
| hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; |
| ASSERT(nexthdrp == IPPROTO_UDP); |
| } |
| } |
| |
| /* |
| * This is the inbound data path. Packets are passed upstream as |
| * T_UNITDATA_IND messages. |
| */ |
| if (connp->conn_family == AF_INET) { |
| sin_t *sin; |
| |
| ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); |
| |
| /* |
| * Normally only send up the source address. |
| * If any ancillary data items are wanted we add those. |
| */ |
| udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); |
| if (recv_ancillary.crb_all != 0) { |
| udi_size += conn_recvancillary_size(connp, |
| recv_ancillary, ira, mp, &ipps); |
| } |
| |
| /* Allocate a message block for the T_UNITDATA_IND structure. */ |
| mp1 = allocb(udi_size, BPRI_MED); |
| if (mp1 == NULL) { |
| freemsg(mp); |
| UDPS_BUMP_MIB(us, udpInErrors); |
| return; |
| } |
| mp1->b_cont = mp; |
| mp1->b_datap->db_type = M_PROTO; |
| tudi = (struct T_unitdata_ind *)mp1->b_rptr; |
| mp1->b_wptr = (uchar_t *)tudi + udi_size; |
| tudi->PRIM_type = T_UNITDATA_IND; |
| tudi->SRC_length = sizeof (sin_t); |
| tudi->SRC_offset = sizeof (struct T_unitdata_ind); |
| tudi->OPT_offset = sizeof (struct T_unitdata_ind) + |
| sizeof (sin_t); |
| udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); |
| tudi->OPT_length = udi_size; |
| sin = (sin_t *)&tudi[1]; |
| sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; |
| sin->sin_port = udpha->uha_src_port; |
| sin->sin_family = connp->conn_family; |
| *(uint32_t *)&sin->sin_zero[0] = 0; |
| *(uint32_t *)&sin->sin_zero[4] = 0; |
| |
| /* |
| * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or |
| * IP_RECVTTL has been set. |
| */ |
| if (udi_size != 0) { |
| conn_recvancillary_add(connp, recv_ancillary, ira, |
| &ipps, (uchar_t *)&sin[1], udi_size); |
| } |
| } else { |
| sin6_t *sin6; |
| |
| /* |
| * Handle both IPv4 and IPv6 packets for IPv6 sockets. |
| * |
| * Normally we only send up the address. If receiving of any |
| * optional receive side information is enabled, we also send |
| * that up as options. |
| */ |
| udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); |
| |
| if (recv_ancillary.crb_all != 0) { |
| udi_size += conn_recvancillary_size(connp, |
| recv_ancillary, ira, mp, &ipps); |
| } |
| |
| mp1 = allocb(udi_size, BPRI_MED); |
| if (mp1 == NULL) { |
| freemsg(mp); |
| UDPS_BUMP_MIB(us, udpInErrors); |
| return; |
| } |
| mp1->b_cont = mp; |
| mp1->b_datap->db_type = M_PROTO; |
| tudi = (struct T_unitdata_ind *)mp1->b_rptr; |
| mp1->b_wptr = (uchar_t *)tudi + udi_size; |
| tudi->PRIM_type = T_UNITDATA_IND; |
| tudi->SRC_length = sizeof (sin6_t); |
| tudi->SRC_offset = sizeof (struct T_unitdata_ind); |
| tudi->OPT_offset = sizeof (struct T_unitdata_ind) + |
| sizeof (sin6_t); |
| udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); |
| tudi->OPT_length = udi_size; |
| sin6 = (sin6_t *)&tudi[1]; |
| if (ira->ira_flags & IRAF_IS_IPV4) { |
| in6_addr_t v6dst; |
| |
| IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, |
| &sin6->sin6_addr); |
| IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, |
| &v6dst); |
| sin6->sin6_flowinfo = 0; |
| sin6->sin6_scope_id = 0; |
| sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, |
| IPCL_ZONEID(connp), us->us_netstack); |
| } else { |
| ip6h = (ip6_t *)rptr; |
| |
| sin6->sin6_addr = ip6h->ip6_src; |
| /* No sin6_flowinfo per API */ |
| sin6->sin6_flowinfo = 0; |
| /* For link-scope pass up scope id */ |
| if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) |
| sin6->sin6_scope_id = ira->ira_ruifindex; |
| else |
| sin6->sin6_scope_id = 0; |
| sin6->__sin6_src_id = ip_srcid_find_addr( |
| &ip6h->ip6_dst, IPCL_ZONEID(connp), |
| us->us_netstack); |
| } |
| sin6->sin6_port = udpha->uha_src_port; |
| sin6->sin6_family = connp->conn_family; |
| |
| if (udi_size != 0) { |
| conn_recvancillary_add(connp, recv_ancillary, ira, |
| &ipps, (uchar_t *)&sin6[1], udi_size); |
| } |
| } |
| |
| /* |
| * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and |
| * loopback traffic). |
| */ |
| DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, |
| void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); |
| |
| /* Walk past the headers unless IP_RECVHDR was set. */ |
| if (!udp->udp_rcvhdr) { |
| mp->b_rptr = rptr + hdr_length; |
| pkt_len -= hdr_length; |
| } |
| |
| UDPS_BUMP_MIB(us, udpHCInDatagrams); |
| udp_ulp_recv(connp, mp1, pkt_len, ira); |
| return; |
| |
| tossit: |
| freemsg(mp); |
| UDPS_BUMP_MIB(us, udpInErrors); |
| } |
| |
| /* |
| * This routine creates a T_UDERROR_IND message and passes it upstream. |
| * The address and options are copied from the T_UNITDATA_REQ message |
| * passed in mp. This message is freed. |
| */ |
| static void |
| udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) |
| { |
| struct T_unitdata_req *tudr; |
| mblk_t *mp1; |
| uchar_t *destaddr; |
| t_scalar_t destlen; |
| uchar_t *optaddr; |
| t_scalar_t optlen; |
| |
| if ((mp->b_wptr < mp->b_rptr) || |
| (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { |
| goto done; |
| } |
| tudr = (struct T_unitdata_req *)mp->b_rptr; |
| destaddr = mp->b_rptr + tudr->DEST_offset; |
| if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || |
| destaddr + tudr->DEST_length < mp->b_rptr || |
| destaddr + tudr->DEST_length > mp->b_wptr) { |
| goto done; |
| } |
| optaddr = mp->b_rptr + tudr->OPT_offset; |
| if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || |
| optaddr + tudr->OPT_length < mp->b_rptr || |
| optaddr + tudr->OPT_length > mp->b_wptr) { |
| goto done; |
| } |
| destlen = tudr->DEST_length; |
| optlen = tudr->OPT_length; |
| |
| mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, |
| (char *)optaddr, optlen, err); |
| if (mp1 != NULL) |
| qreply(q, mp1); |
| |
| done: |
| freemsg(mp); |
| } |
| |
| /* |
| * This routine removes a port number association from a stream. It |
| * is called by udp_wput to handle T_UNBIND_REQ messages. |
| */ |
| static void |
| udp_tpi_unbind(queue_t *q, mblk_t *mp) |
| { |
| conn_t *connp = Q_TO_CONN(q); |
| int error; |
| |
| error = udp_do_unbind(connp); |
| if (error) { |
| if (error < 0) |
| udp_err_ack(q, mp, -error, 0); |
| else |
| udp_err_ack(q, mp, TSYSERR, error); |
| return; |
| } |
| |
| mp = mi_tpi_ok_ack_alloc(mp); |
| ASSERT(mp != NULL); |
| ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); |
| qreply(q, mp); |
| } |
| |
| /* |
| * Don't let port fall into the privileged range. |
| * Since the extra privileged ports can be arbitrary we also |
| * ensure that we exclude those from consideration. |
| * us->us_epriv_ports is not sorted thus we loop over it until |
| * there are no changes. |
| */ |
| static in_port_t |
| udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) |
| { |
| int i, bump; |
| in_port_t nextport; |
| boolean_t restart = B_FALSE; |
| udp_stack_t *us = udp->udp_us; |
| |
| if (random && udp_random_anon_port != 0) { |
| (void) random_get_pseudo_bytes((uint8_t *)&port, |
| sizeof (in_port_t)); |
| /* |
| * Unless changed by a sys admin, the smallest anon port |
| * is 32768 and the largest anon port is 65535. It is |
| * very likely (50%) for the random port to be smaller |
| * than the smallest anon port. When that happens, |
| * add port % (anon port range) to the smallest anon |
| * port to get the random port. It should fall into the |
| * valid anon port range. |
| */ |
| if ((port < us->us_smallest_anon_port) || |
| (port > us->us_largest_anon_port)) { |
| if (us->us_smallest_anon_port == |
| us->us_largest_anon_port) { |
| bump = 0; |
| } else { |
| bump = port % (us->us_largest_anon_port - |
| us->us_smallest_anon_port); |
| } |
| |
| port = us->us_smallest_anon_port + bump; |
| } |
| } |
| |
| retry: |
| if (port < us->us_smallest_anon_port) |
| port = us->us_smallest_anon_port; |
| |
| if (port > us->us_largest_anon_port) { |
| port = us->us_smallest_anon_port; |
| if (restart) |
| return (0); |
| restart = B_TRUE; |
| } |
| |
| if (port < us->us_smallest_nonpriv_port) |
| port = us->us_smallest_nonpriv_port; |
| |
| for (i = 0; i < us->us_num_epriv_ports; i++) { |
| if (port == us->us_epriv_ports[i]) { |
| port++; |
| /* |
| * Make sure that the port is in the |
| * valid range. |
| */ |
| goto retry; |
| } |
| } |
| |
| if (is_system_labeled() && |
| (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), |
| port, IPPROTO_UDP, B_TRUE)) != 0) { |
| port = nextport; |
| goto retry; |
| } |
| |
| return (port); |
| } |
| |
| /* |
| * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 |
| * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from |
| * the TPI options, otherwise we take them from msg_control. |
| * If both sin and sin6 is set it is a connected socket and we use conn_faddr. |
| * Always consumes mp; never consumes tudr_mp. |
| */ |
| static int |
| udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, |
| mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) |
| { |
| udp_t *udp = connp->conn_udp; |
| udp_stack_t *us = udp->udp_us; |
| int error; |
| ip_xmit_attr_t *ixa; |
| ip_pkt_t *ipp; |
| in6_addr_t v6src; |
| in6_addr_t v6dst; |
| in6_addr_t v6nexthop; |
| in_port_t dstport; |
| uint32_t flowinfo; |
| uint_t srcid; |
| int is_absreq_failure = 0; |
| conn_opt_arg_t coas, *coa; |
| |
| ASSERT(tudr_mp != NULL || msg != NULL); |
| |
| /* |
| * Get ixa before checking state to handle a disconnect race. |
| * |
| * We need an exclusive copy of conn_ixa since the ancillary data |
| * options might modify it. That copy has no pointers hence we |
| * need to set them up once we've parsed the ancillary data. |
| */ |
| ixa = conn_get_ixa_exclusive(connp); |
| if (ixa == NULL) { |
| UDPS_BUMP_MIB(us, udpOutErrors); |
| freemsg(mp); |
| return (ENOMEM); |
| } |
| ASSERT(cr != NULL); |
| ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); |
| ixa->ixa_cred = cr; |
| ixa->ixa_cpid = pid; |
| if (is_system_labeled()) { |
| /* We need to restart with a label based on the cred */ |
| ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); |
| } |
| |
| /* In case previous destination was multicast or multirt */ |
| ip_attr_newdst(ixa); |
| |
| /* Get a copy of conn_xmit_ipp since the options might change it */ |
| ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); |
| if (ipp == NULL) { |
| ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); |
| ixa->ixa_cred = connp->conn_cred; /* Restore */ |
| ixa->ixa_cpid = connp->conn_cpid; |
| ixa_refrele(ixa); |
| UDPS_BUMP_MIB(us, udpOutErrors); |
| freemsg(mp); |
| return (ENOMEM); |
| } |
| mutex_enter(&connp->conn_lock); |
| error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); |
| mutex_exit(&connp->conn_lock); |
| if (error != 0) { |
| UDPS_BUMP_MIB(us, udpOutErrors); |
| freemsg(mp); |
| goto done; |
| } |
| |
| /* |
| * Parse the options and update ixa and ipp as a result. |
| * Note that ixa_tsl can be updated if SCM_UCRED. |
| * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. |
| */ |
| |
| coa = &coas; |
| coa->coa_connp = connp; |
| coa->coa_ixa = ixa; |
| coa->coa_ipp = ipp; |
| coa->coa_ancillary = B_TRUE; |
| coa->coa_changed = 0; |
| |
| if (msg != NULL) { |
| error = process_auxiliary_options(connp, msg->msg_control, |
| msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); |
| } else { |
| struct T_unitdata_req *tudr; |
| |
| tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; |
| ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); |
| error = tpi_optcom_buf(connp->conn_wq, tudr_mp, |
| &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, |
| coa, &is_absreq_failure); |
| } |
| if (error != 0) { |
| /* |
| * Note: No special action needed in this |
| * module for "is_absreq_failure" |
| */ |
| freemsg(mp); |
| UDPS_BUMP_MIB(us, udpOutErrors); |
| goto done; |
| } |
| ASSERT(is_absreq_failure == 0); |
| |
| mutex_enter(&connp->conn_lock); |
| /* |
| * If laddr is unspecified then we look at sin6_src_id. |
| * We will give precedence to a source address set with IPV6_PKTINFO |
| * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't |
| * want ip_attr_connect to select a source (since it can fail) when |
| * IPV6_PKTINFO is specified. |
| * If this doesn't result in a source address then we get a source |
| * from ip_attr_connect() below. |
| */ |
| v6src = connp->conn_saddr_v6; |
| |