blob: 57ee0c55859410c9f6f9e7c803ff170b524cca8e [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
ja97890427e8182006-01-18 09:19:06 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
Erik Nordmarke5e79712010-04-27 02:41:28 -070022 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
Sebastien Roy299625c2013-08-01 17:47:00 -080023 * Copyright (c) 2013 by Delphix. All rights reserved.
Dan McDonalda1ca8b42014-02-14 11:27:16 -050024 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
John Levon10ef8622018-12-18 13:50:32 +000025 * Copyright (c) 2018, Joyent, Inc.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070026 */
27/* Copyright (c) 1990 Mentat Inc. */
28
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070029#include <sys/types.h>
30#include <sys/stream.h>
31#include <sys/stropts.h>
32#include <sys/strlog.h>
33#include <sys/strsun.h>
34#define _SUN_TPI_VERSION 2
35#include <sys/tihdr.h>
36#include <sys/timod.h>
37#include <sys/ddi.h>
38#include <sys/sunddi.h>
jpk45916cd2006-03-24 12:29:20 -080039#include <sys/strsubr.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080040#include <sys/suntpi.h>
41#include <sys/xti_inet.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070042#include <sys/cmn_err.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070043#include <sys/kmem.h>
Casper H.S. Dik134a1f42010-04-28 10:01:37 +020044#include <sys/cred.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070045#include <sys/policy.h>
jpk45916cd2006-03-24 12:29:20 -080046#include <sys/priv.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080047#include <sys/ucred.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070048#include <sys/zone.h>
49
Yu Xiangning0f1702c2008-12-11 20:04:13 -080050#include <sys/sockio.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070051#include <sys/socket.h>
Yu Xiangning0f1702c2008-12-11 20:04:13 -080052#include <sys/socketvar.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080053#include <sys/vtrace.h>
54#include <sys/sdt.h>
55#include <sys/debug.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070056#include <sys/isa_defs.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080057#include <sys/random.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070058#include <netinet/in.h>
59#include <netinet/ip6.h>
60#include <netinet/icmp6.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080061#include <netinet/udp.h>
62
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070063#include <inet/common.h>
64#include <inet/ip.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080065#include <inet/ip_impl.h>
66#include <inet/ipsec_impl.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070067#include <inet/ip6.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080068#include <inet/ip_ire.h>
69#include <inet/ip_if.h>
70#include <inet/ip_multi.h>
71#include <inet/ip_ndp.h>
Yu Xiangning0f1702c2008-12-11 20:04:13 -080072#include <inet/proto_set.h>
Erik Nordmarkbd670b32009-11-11 11:49:49 -080073#include <inet/mib2.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070074#include <inet/nd.h>
75#include <inet/optcom.h>
76#include <inet/snmpcom.h>
77#include <inet/kstatcom.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070078#include <inet/ipclassifier.h>
79
jpk45916cd2006-03-24 12:29:20 -080080#include <sys/tsol/label.h>
81#include <sys/tsol/tnet.h>
82
Erik Nordmarkbd670b32009-11-11 11:49:49 -080083#include <inet/rawip_impl.h>
rshoaib19a30e12006-12-22 01:03:02 -080084
Yu Xiangning0f1702c2008-12-11 20:04:13 -080085#include <sys/disp.h>
rshoaib19a30e12006-12-22 01:03:02 -080086
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070087/*
88 * Synchronization notes:
89 *
Erik Nordmarkbd670b32009-11-11 11:49:49 -080090 * RAWIP is MT and uses the usual kernel synchronization primitives. We use
91 * conn_lock to protect the icmp_t.
nordmarkfc80c0d2007-10-11 22:57:36 -070092 *
93 * Plumbing notes:
94 * ICMP is always a device driver. For compatibility with mibopen() code
95 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
96 * dummy module.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070097 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070098static void icmp_addr_req(queue_t *q, mblk_t *mp);
Yu Xiangning0f1702c2008-12-11 20:04:13 -080099static void icmp_tpi_bind(queue_t *q, mblk_t *mp);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800100static void icmp_bind_proto(icmp_t *icmp);
101static int icmp_build_hdr_template(conn_t *, const in6_addr_t *,
102 const in6_addr_t *, uint32_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700103static void icmp_capability_req(queue_t *q, mblk_t *mp);
Toomas Soome5e1743f2018-10-15 22:13:49 +0300104static int icmp_close(queue_t *q, int flags, cred_t *);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800105static void icmp_close_free(conn_t *);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800106static void icmp_tpi_connect(queue_t *q, mblk_t *mp);
107static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700108static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800109 int sys_error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700110static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800111 t_scalar_t tlierr, int sys_error);
112static void icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2,
113 ip_recv_attr_t *);
114static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
115 ip_recv_attr_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700116static void icmp_info_req(queue_t *q, mblk_t *mp);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800117static void icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300118static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags);
nordmarkfc80c0d2007-10-11 22:57:36 -0700119static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
120 cred_t *credp);
121static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
122 cred_t *credp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700123static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800124int icmp_opt_set(conn_t *connp, uint_t optset_context,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700125 int level, int name, uint_t inlen,
126 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800127 void *thisdg_attrs, cred_t *cr);
128int icmp_opt_get(conn_t *connp, int level, int name,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700129 uchar_t *ptr);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800130static int icmp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
131 sin6_t *sin6, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800132static mblk_t *icmp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
133 const in6_addr_t *, const in6_addr_t *, uint32_t, mblk_t *, int *);
134static mblk_t *icmp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
135 mblk_t *, const in6_addr_t *, uint32_t, int *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700136static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
137 uchar_t *ptr, int len);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700138static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800139static void icmp_tpi_unbind(queue_t *q, mblk_t *mp);
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300140static int icmp_wput(queue_t *q, mblk_t *mp);
141static int icmp_wput_fallback(queue_t *q, mblk_t *mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700142static void icmp_wput_other(queue_t *q, mblk_t *mp);
143static void icmp_wput_iocdata(queue_t *q, mblk_t *mp);
144static void icmp_wput_restricted(queue_t *q, mblk_t *mp);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800145static void icmp_ulp_recv(conn_t *, mblk_t *, uint_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700146
dh155122f4b3ec62007-01-19 16:59:38 -0800147static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns);
148static void rawip_stack_fini(netstackid_t stackid, void *arg);
149
150static void *rawip_kstat_init(netstackid_t stackid);
151static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700152static int rawip_kstat_update(kstat_t *kp, int rw);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800153static void rawip_stack_shutdown(netstackid_t stackid, void *arg);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800154
155/* Common routines for TPI and socket module */
156static conn_t *rawip_do_open(int, cred_t *, int *, int);
157static void rawip_do_close(conn_t *);
158static int rawip_do_bind(conn_t *, struct sockaddr *, socklen_t);
159static int rawip_do_unbind(conn_t *);
160static int rawip_do_connect(conn_t *, const struct sockaddr *, socklen_t,
161 cred_t *, pid_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700162
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800163int rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
164 socklen_t *, cred_t *);
165int rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
166 socklen_t *, cred_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700167
nordmarkfc80c0d2007-10-11 22:57:36 -0700168static struct module_info icmp_mod_info = {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700169 5707, "icmp", 1, INFPSZ, 512, 128
170};
171
nordmarkfc80c0d2007-10-11 22:57:36 -0700172/*
173 * Entry points for ICMP as a device.
174 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
175 */
176static struct qinit icmprinitv4 = {
177 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700178};
179
nordmarkfc80c0d2007-10-11 22:57:36 -0700180static struct qinit icmprinitv6 = {
181 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700182};
183
nordmarkfc80c0d2007-10-11 22:57:36 -0700184static struct qinit icmpwinit = {
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300185 icmp_wput, ip_wsrv, NULL, NULL, NULL, &icmp_mod_info
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800186};
187
188/* ICMP entry point during fallback */
189static struct qinit icmp_fallback_sock_winit = {
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300190 icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
nordmarkfc80c0d2007-10-11 22:57:36 -0700191};
192
193/* For AF_INET aka /dev/icmp */
194struct streamtab icmpinfov4 = {
195 &icmprinitv4, &icmpwinit
196};
197
198/* For AF_INET6 aka /dev/icmp6 */
199struct streamtab icmpinfov6 = {
200 &icmprinitv6, &icmpwinit
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700201};
202
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700203/* Default structure copied into T_INFO_ACK messages */
204static struct T_info_ack icmp_g_t_info_ack = {
205 T_INFO_ACK,
206 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */
207 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */
208 T_INVALID, /* CDATA_size. icmp does not support connect data. */
209 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */
210 0, /* ADDR_size - filled in later. */
211 0, /* OPT_size - not initialized here */
212 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */
213 T_CLTS, /* SERV_type. icmp supports connection-less. */
214 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */
215 (XPG4_1|SENDZERO) /* PROVIDER_flag */
216};
217
Sebastien Roy299625c2013-08-01 17:47:00 -0800218static int
219icmp_set_buf_prop(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
220 const char *ifname, const void *pval, uint_t flags)
221{
222 return (mod_set_buf_prop(stack->netstack_icmp->is_propinfo_tbl,
223 stack, cr, pinfo, ifname, pval, flags));
224}
225
226static int
227icmp_get_buf_prop(netstack_t *stack, mod_prop_info_t *pinfo, const char *ifname,
228 void *val, uint_t psize, uint_t flags)
229{
230 return (mod_get_buf_prop(stack->netstack_icmp->is_propinfo_tbl, stack,
231 pinfo, ifname, val, psize, flags));
232}
233
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700234/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700235 * All of these are alterable, within the min/max values given, at run time.
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400236 *
237 * Note: All those tunables which do not start with "icmp_" are Committed and
Girish Moodalbail8887b572010-08-16 19:16:00 -0400238 * therefore are public. See PSARC 2010/080.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700239 */
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400240static mod_prop_info_t icmp_propinfo_tbl[] = {
241 /* tunable - 0 */
Girish Moodalbail8887b572010-08-16 19:16:00 -0400242 { "_wroff_extra", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400243 mod_set_uint32, mod_get_uint32,
244 {0, 128, 32}, {32} },
245
Girish Moodalbail8887b572010-08-16 19:16:00 -0400246 { "_ipv4_ttl", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400247 mod_set_uint32, mod_get_uint32,
248 {1, 255, 255}, {255} },
249
Girish Moodalbail8887b572010-08-16 19:16:00 -0400250 { "_ipv6_hoplimit", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400251 mod_set_uint32, mod_get_uint32,
252 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
253 {IPV6_DEFAULT_HOPS} },
254
Girish Moodalbail8887b572010-08-16 19:16:00 -0400255 { "_bsd_compat", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400256 mod_set_boolean, mod_get_boolean,
257 {B_TRUE}, {B_TRUE} },
258
Sebastien Roy299625c2013-08-01 17:47:00 -0800259 { "send_buf", MOD_PROTO_RAWIP,
260 icmp_set_buf_prop, icmp_get_buf_prop,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400261 {4096, 65536, 8192}, {8192} },
262
Girish Moodalbail8887b572010-08-16 19:16:00 -0400263 { "_xmit_lowat", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400264 mod_set_uint32, mod_get_uint32,
265 {0, 65536, 1024}, {1024} },
266
Sebastien Roy299625c2013-08-01 17:47:00 -0800267 { "recv_buf", MOD_PROTO_RAWIP,
268 icmp_set_buf_prop, icmp_get_buf_prop,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400269 {4096, 65536, 8192}, {8192} },
270
Sebastien Roy299625c2013-08-01 17:47:00 -0800271 { "max_buf", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400272 mod_set_uint32, mod_get_uint32,
Sebastien Roy299625c2013-08-01 17:47:00 -0800273 {65536, ULP_MAX_BUF, 256*1024}, {256*1024} },
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400274
Girish Moodalbail8887b572010-08-16 19:16:00 -0400275 { "_pmtu_discovery", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400276 mod_set_boolean, mod_get_boolean,
277 {B_FALSE}, {B_FALSE} },
278
Girish Moodalbail8887b572010-08-16 19:16:00 -0400279 { "_sendto_ignerr", MOD_PROTO_RAWIP,
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400280 mod_set_boolean, mod_get_boolean,
281 {B_FALSE}, {B_FALSE} },
282
283 { "?", MOD_PROTO_RAWIP, NULL, mod_get_allprop, {0}, {0} },
284
285 { NULL, 0, NULL, NULL, {0}, {0} }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700286};
Girish Moodalbail6e91bba2010-03-26 17:53:11 -0400287
288#define is_wroff_extra is_propinfo_tbl[0].prop_cur_uval
289#define is_ipv4_ttl is_propinfo_tbl[1].prop_cur_uval
290#define is_ipv6_hoplimit is_propinfo_tbl[2].prop_cur_uval
291#define is_bsd_compat is_propinfo_tbl[3].prop_cur_bval
292#define is_xmit_hiwat is_propinfo_tbl[4].prop_cur_uval
293#define is_xmit_lowat is_propinfo_tbl[5].prop_cur_uval
294#define is_recv_hiwat is_propinfo_tbl[6].prop_cur_uval
295#define is_max_buf is_propinfo_tbl[7].prop_cur_uval
296#define is_pmtu_discovery is_propinfo_tbl[8].prop_cur_bval
297#define is_sendto_ignerr is_propinfo_tbl[9].prop_cur_bval
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700298
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800299typedef union T_primitives *t_primp_t;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800300
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700301/*
302 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
303 * passed to icmp_wput.
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800304 * It calls IP to verify the local IP address, and calls IP to insert
305 * the conn_t in the fanout table.
306 * If everything is ok it then sends the T_BIND_ACK back up.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700307 */
308static void
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800309icmp_tpi_bind(queue_t *q, mblk_t *mp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700310{
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800311 int error;
312 struct sockaddr *sa;
313 struct T_bind_req *tbr;
314 socklen_t len;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700315 sin_t *sin;
316 sin6_t *sin6;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800317 icmp_t *icmp;
nordmarkfc80c0d2007-10-11 22:57:36 -0700318 conn_t *connp = Q_TO_CONN(q);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800319 mblk_t *mp1;
Erik Nordmarkde8c4a12009-02-12 08:42:06 -0800320 cred_t *cr;
321
322 /*
323 * All Solaris components should pass a db_credp
324 * for this TPI message, hence we ASSERT.
325 * But in case there is some other M_PROTO that looks
326 * like a TPI message sent by some other kernel
327 * component, we check and return an error.
328 */
329 cr = msg_getcred(mp, NULL);
330 ASSERT(cr != NULL);
331 if (cr == NULL) {
332 icmp_err_ack(q, mp, TSYSERR, EINVAL);
333 return;
334 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700335
nordmarkfc80c0d2007-10-11 22:57:36 -0700336 icmp = connp->conn_icmp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700337 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
338 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
339 "icmp_bind: bad req, len %u",
340 (uint_t)(mp->b_wptr - mp->b_rptr));
341 icmp_err_ack(q, mp, TPROTO, 0);
342 return;
343 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800344
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700345 if (icmp->icmp_state != TS_UNBND) {
346 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800347 "icmp_bind: bad state, %u", icmp->icmp_state);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700348 icmp_err_ack(q, mp, TOUTSTATE, 0);
349 return;
350 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800351
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700352 /*
353 * Reallocate the message to make sure we have enough room for an
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800354 * address.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700355 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800356 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
357 if (mp1 == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700358 icmp_err_ack(q, mp, TSYSERR, ENOMEM);
359 return;
360 }
361 mp = mp1;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800362
363 /* Reset the message type in preparation for shipping it back. */
364 DB_TYPE(mp) = M_PCPROTO;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700365 tbr = (struct T_bind_req *)mp->b_rptr;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800366 len = tbr->ADDR_length;
367 switch (len) {
368 case 0: /* request for a generic port */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700369 tbr->ADDR_offset = sizeof (struct T_bind_req);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800370 if (connp->conn_family == AF_INET) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700371 tbr->ADDR_length = sizeof (sin_t);
372 sin = (sin_t *)&tbr[1];
373 *sin = sin_null;
374 sin->sin_family = AF_INET;
375 mp->b_wptr = (uchar_t *)&sin[1];
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800376 sa = (struct sockaddr *)sin;
377 len = sizeof (sin_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700378 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800379 ASSERT(connp->conn_family == AF_INET6);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700380 tbr->ADDR_length = sizeof (sin6_t);
381 sin6 = (sin6_t *)&tbr[1];
382 *sin6 = sin6_null;
383 sin6->sin6_family = AF_INET6;
384 mp->b_wptr = (uchar_t *)&sin6[1];
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800385 sa = (struct sockaddr *)sin6;
386 len = sizeof (sin6_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700387 }
388 break;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800389
390 case sizeof (sin_t): /* Complete IPv4 address */
391 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700392 sizeof (sin_t));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700393 break;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800394
395 case sizeof (sin6_t): /* Complete IPv6 address */
396 sa = (struct sockaddr *)mi_offset_param(mp,
397 tbr->ADDR_offset, sizeof (sin6_t));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700398 break;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800399
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700400 default:
401 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800402 "icmp_bind: bad ADDR_length %u", tbr->ADDR_length);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700403 icmp_err_ack(q, mp, TBADADDR, 0);
404 return;
405 }
nordmarkfc80c0d2007-10-11 22:57:36 -0700406
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800407 error = rawip_do_bind(connp, sa, len);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800408 if (error != 0) {
409 if (error > 0) {
410 icmp_err_ack(q, mp, TSYSERR, error);
411 } else {
412 icmp_err_ack(q, mp, -error, 0);
413 }
414 } else {
415 tbr->PRIM_type = T_BIND_ACK;
416 qreply(q, mp);
417 }
418}
419
420static int
421rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
422{
423 sin_t *sin;
424 sin6_t *sin6;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800425 icmp_t *icmp = connp->conn_icmp;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800426 int error = 0;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800427 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
428 in_port_t lport; /* Network byte order */
429 ipaddr_t v4src; /* Set if AF_INET */
430 in6_addr_t v6src;
431 uint_t scopeid = 0;
432 zoneid_t zoneid = IPCL_ZONEID(connp);
433 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800434
435 if (sa == NULL || !OK_32PTR((char *)sa)) {
436 return (EINVAL);
437 }
438
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800439 switch (len) {
440 case sizeof (sin_t): /* Complete IPv4 address */
441 sin = (sin_t *)sa;
442 if (sin->sin_family != AF_INET ||
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800443 connp->conn_family != AF_INET) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800444 /* TSYSERR, EAFNOSUPPORT */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800445 return (EAFNOSUPPORT);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800446 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800447 v4src = sin->sin_addr.s_addr;
448 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
449 if (v4src != INADDR_ANY) {
450 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
451 B_TRUE);
452 }
453 lport = sin->sin_port;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800454 break;
455 case sizeof (sin6_t): /* Complete IPv6 address */
456 sin6 = (sin6_t *)sa;
457 if (sin6->sin6_family != AF_INET6 ||
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800458 connp->conn_family != AF_INET6) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800459 /* TSYSERR, EAFNOSUPPORT */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800460 return (EAFNOSUPPORT);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800461 }
462 /* No support for mapped addresses on raw sockets */
463 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
464 /* TSYSERR, EADDRNOTAVAIL */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800465 return (EADDRNOTAVAIL);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800466 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800467 v6src = sin6->sin6_addr;
468 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
469 if (IN6_IS_ADDR_LINKSCOPE(&v6src))
470 scopeid = sin6->sin6_scope_id;
471 laddr_type = ip_laddr_verify_v6(&v6src, zoneid, ipst,
472 B_TRUE, scopeid);
473 }
474 lport = sin6->sin6_port;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800475 break;
476
477 default:
478 /* TBADADDR */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800479 return (EADDRNOTAVAIL);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800480 }
481
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800482 /* Is the local address a valid unicast, multicast, or broadcast? */
483 if (laddr_type == IPVL_BAD)
484 return (EADDRNOTAVAIL);
485
486 /*
487 * The state must be TS_UNBND.
488 */
489 mutex_enter(&connp->conn_lock);
490 if (icmp->icmp_state != TS_UNBND) {
491 mutex_exit(&connp->conn_lock);
492 return (-TOUTSTATE);
493 }
nordmarkfc80c0d2007-10-11 22:57:36 -0700494
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700495 /*
496 * Copy the source address into our icmp structure. This address
497 * may still be zero; if so, ip will fill in the correct address
498 * each time an outbound packet is passed to it.
nordmarkfc80c0d2007-10-11 22:57:36 -0700499 * If we are binding to a broadcast or multicast address then
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800500 * we just set the conn_bound_addr since we don't want to use
501 * that as the source address when sending.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700502 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800503 connp->conn_bound_addr_v6 = v6src;
504 connp->conn_laddr_v6 = v6src;
505 if (scopeid != 0) {
506 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
507 connp->conn_ixa->ixa_scopeid = scopeid;
508 connp->conn_incoming_ifindex = scopeid;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700509 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800510 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
511 connp->conn_incoming_ifindex = connp->conn_bound_if;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700512 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800513
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800514 switch (laddr_type) {
515 case IPVL_UNICAST_UP:
516 case IPVL_UNICAST_DOWN:
517 connp->conn_saddr_v6 = v6src;
518 connp->conn_mcbc_bind = B_FALSE;
519 break;
520 case IPVL_MCAST:
521 case IPVL_BCAST:
522 /* ip_set_destination will pick a source address later */
523 connp->conn_saddr_v6 = ipv6_all_zeros;
524 connp->conn_mcbc_bind = B_TRUE;
525 break;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800526 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800527
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800528 /* Any errors after this point should use late_error */
529
530 /*
531 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
532 * with IPPROTO_TCP.
533 */
534 connp->conn_lport = lport;
535 connp->conn_fport = 0;
536
537 if (connp->conn_family == AF_INET) {
538 ASSERT(connp->conn_ipversion == IPV4_VERSION);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800539 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800540 ASSERT(connp->conn_ipversion == IPV6_VERSION);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800541 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800542
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800543 icmp->icmp_state = TS_IDLE;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800544
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800545 /*
546 * We create an initial header template here to make a subsequent
547 * sendto have a starting point. Since conn_last_dst is zero the
548 * first sendto will always follow the 'dst changed' code path.
549 * Note that we defer massaging options and the related checksum
550 * adjustment until we have a destination address.
551 */
552 error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
553 &connp->conn_faddr_v6, connp->conn_flowinfo);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800554 if (error != 0) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800555 mutex_exit(&connp->conn_lock);
556 goto late_error;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700557 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800558 /* Just in case */
559 connp->conn_faddr_v6 = ipv6_all_zeros;
560 connp->conn_v6lastdst = ipv6_all_zeros;
561 mutex_exit(&connp->conn_lock);
562
563 error = ip_laddr_fanout_insert(connp);
564 if (error != 0)
565 goto late_error;
566
567 /* Bind succeeded */
568 return (0);
569
570late_error:
571 mutex_enter(&connp->conn_lock);
572 connp->conn_saddr_v6 = ipv6_all_zeros;
573 connp->conn_bound_addr_v6 = ipv6_all_zeros;
574 connp->conn_laddr_v6 = ipv6_all_zeros;
575 if (scopeid != 0) {
576 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
577 connp->conn_incoming_ifindex = connp->conn_bound_if;
578 }
579 icmp->icmp_state = TS_UNBND;
580 connp->conn_v6lastdst = ipv6_all_zeros;
581 connp->conn_lport = 0;
582
583 /* Restore the header that was built above - different source address */
584 (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
585 &connp->conn_faddr_v6, connp->conn_flowinfo);
586 mutex_exit(&connp->conn_lock);
587 return (error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700588}
589
590/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800591 * Tell IP to just bind to the protocol.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700592 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800593static void
594icmp_bind_proto(icmp_t *icmp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700595{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800596 conn_t *connp = icmp->icmp_connp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700597
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800598 mutex_enter(&connp->conn_lock);
599 connp->conn_saddr_v6 = ipv6_all_zeros;
600 connp->conn_laddr_v6 = ipv6_all_zeros;
601 connp->conn_faddr_v6 = ipv6_all_zeros;
602 connp->conn_v6lastdst = ipv6_all_zeros;
603 mutex_exit(&connp->conn_lock);
nordmarkfc80c0d2007-10-11 22:57:36 -0700604
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800605 (void) ip_laddr_fanout_insert(connp);
nordmarkfc80c0d2007-10-11 22:57:36 -0700606}
607
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800608/*
609 * This routine handles each T_CONN_REQ message passed to icmp. It
610 * associates a default destination address with the stream.
611 *
612 * After various error checks are completed, icmp_connect() lays
613 * the target address and port into the composite header template.
614 * Then we ask IP for information, including a source address if we didn't
615 * already have one. Finally we send up the T_OK_ACK reply message.
616 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700617static void
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800618icmp_tpi_connect(queue_t *q, mblk_t *mp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700619{
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800620 conn_t *connp = Q_TO_CONN(q);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700621 struct T_conn_req *tcr;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800622 struct sockaddr *sa;
623 socklen_t len;
624 int error;
Erik Nordmarkde8c4a12009-02-12 08:42:06 -0800625 cred_t *cr;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800626 pid_t pid;
Erik Nordmarkde8c4a12009-02-12 08:42:06 -0800627 /*
628 * All Solaris components should pass a db_credp
629 * for this TPI message, hence we ASSERT.
630 * But in case there is some other M_PROTO that looks
631 * like a TPI message sent by some other kernel
632 * component, we check and return an error.
633 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800634 cr = msg_getcred(mp, &pid);
Erik Nordmarkde8c4a12009-02-12 08:42:06 -0800635 ASSERT(cr != NULL);
636 if (cr == NULL) {
637 icmp_err_ack(q, mp, TSYSERR, EINVAL);
638 return;
639 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700640
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700641 tcr = (struct T_conn_req *)mp->b_rptr;
642 /* Sanity checks */
nordmarkfc80c0d2007-10-11 22:57:36 -0700643 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700644 icmp_err_ack(q, mp, TPROTO, 0);
645 return;
646 }
647
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700648 if (tcr->OPT_length != 0) {
649 icmp_err_ack(q, mp, TBADOPT, 0);
650 return;
651 }
nordmarkfc80c0d2007-10-11 22:57:36 -0700652
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800653 len = tcr->DEST_length;
654
655 switch (len) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700656 default:
657 icmp_err_ack(q, mp, TBADADDR, 0);
658 return;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700659 case sizeof (sin_t):
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800660 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700661 sizeof (sin_t));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700662 break;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700663 case sizeof (sin6_t):
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800664 sa = (struct sockaddr *)mi_offset_param(mp,
665 tcr->DEST_offset, sizeof (sin6_t));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700666 break;
667 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800668
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800669 error = proto_verify_ip_addr(connp->conn_family, sa, len);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800670 if (error != 0) {
671 icmp_err_ack(q, mp, TSYSERR, error);
672 return;
673 }
674
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800675 error = rawip_do_connect(connp, sa, len, cr, pid);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800676 if (error != 0) {
677 if (error < 0) {
678 icmp_err_ack(q, mp, -error, 0);
679 } else {
680 icmp_err_ack(q, mp, 0, error);
681 }
682 } else {
683 mblk_t *mp1;
684
685 /*
686 * We have to send a connection confirmation to
687 * keep TLI happy.
688 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800689 if (connp->conn_family == AF_INET) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800690 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
691 sizeof (sin_t), NULL, 0);
692 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800693 ASSERT(connp->conn_family == AF_INET6);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800694 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
695 sizeof (sin6_t), NULL, 0);
696 }
697 if (mp1 == NULL) {
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800698 icmp_err_ack(q, mp, TSYSERR, ENOMEM);
699 return;
700 }
701
702 /*
703 * Send ok_ack for T_CONN_REQ
704 */
705 mp = mi_tpi_ok_ack_alloc(mp);
706 if (mp == NULL) {
707 /* Unable to reuse the T_CONN_REQ for the ack. */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800708 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
709 return;
710 }
711 putnext(connp->conn_rq, mp);
712 putnext(connp->conn_rq, mp1);
713 }
714}
715
716static int
Erik Nordmarkde8c4a12009-02-12 08:42:06 -0800717rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800718 cred_t *cr, pid_t pid)
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800719{
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800720 icmp_t *icmp;
721 sin_t *sin;
722 sin6_t *sin6;
723 int error;
Toomas Soome8a06b3d2018-10-15 22:13:16 +0300724 uint16_t dstport;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800725 ipaddr_t v4dst;
726 in6_addr_t v6dst;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800727 uint32_t flowinfo;
728 ip_xmit_attr_t *ixa;
Erik Nordmarke5e79712010-04-27 02:41:28 -0700729 ip_xmit_attr_t *oldixa;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800730 uint_t scopeid = 0;
731 uint_t srcid = 0;
732 in6_addr_t v6src = connp->conn_saddr_v6;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800733
734 icmp = connp->conn_icmp;
735
736 if (sa == NULL || !OK_32PTR((char *)sa)) {
737 return (EINVAL);
738 }
739
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800740 ASSERT(sa != NULL && len != 0);
Toomas Soomeab82c292019-12-28 14:24:51 +0200741 sin = NULL;
742 sin6 = NULL;
743 dstport = 0;
744 flowinfo = 0;
745 v4dst = INADDR_ANY;
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800746
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800747 /*
748 * Determine packet type based on type of address passed in
749 * the request should contain an IPv4 or IPv6 address.
750 * Make sure that address family matches the type of
751 * family of the address passed down.
752 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800753 switch (len) {
754 case sizeof (sin_t):
755 sin = (sin_t *)sa;
756
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800757 v4dst = sin->sin_addr.s_addr;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800758 dstport = sin->sin_port;
759 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
760 ASSERT(connp->conn_ipversion == IPV4_VERSION);
761 break;
762
763 case sizeof (sin6_t):
764 sin6 = (sin6_t *)sa;
765
766 /* No support for mapped addresses on raw sockets */
767 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
768 return (EADDRNOTAVAIL);
769 }
770 v6dst = sin6->sin6_addr;
771 dstport = sin6->sin6_port;
772 ASSERT(connp->conn_ipversion == IPV6_VERSION);
773 flowinfo = sin6->sin6_flowinfo;
774 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
775 scopeid = sin6->sin6_scope_id;
776 srcid = sin6->__sin6_src_id;
777 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
Dan McDonalda1ca8b42014-02-14 11:27:16 -0500778 /* Due to check above, we know sin6_addr is v6-only. */
779 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
780 B_FALSE, connp->conn_netstack)) {
781 /* Mismatch - v6src would be v4mapped. */
782 return (EADDRNOTAVAIL);
783 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800784 }
785 break;
786 }
787
788 /*
789 * If there is a different thread using conn_ixa then we get a new
790 * copy and cut the old one loose from conn_ixa. Otherwise we use
791 * conn_ixa and prevent any other thread from using/changing it.
792 * Once connect() is done other threads can use conn_ixa since the
793 * refcnt will be back at one.
Erik Nordmarke5e79712010-04-27 02:41:28 -0700794 * We defer updating conn_ixa until later to handle any concurrent
795 * conn_ixa_cleanup thread.
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800796 */
Erik Nordmarke5e79712010-04-27 02:41:28 -0700797 ixa = conn_get_ixa(connp, B_FALSE);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800798 if (ixa == NULL)
799 return (ENOMEM);
800
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800801 mutex_enter(&connp->conn_lock);
802 /*
803 * This icmp_t must have bound already before doing a connect.
804 * Reject if a connect is in progress (we drop conn_lock during
805 * rawip_do_connect).
806 */
807 if (icmp->icmp_state == TS_UNBND || icmp->icmp_state == TS_WCON_CREQ) {
808 mutex_exit(&connp->conn_lock);
809 ixa_refrele(ixa);
810 return (-TOUTSTATE);
811 }
812
813 if (icmp->icmp_state == TS_DATA_XFER) {
814 /* Already connected - clear out state */
815 if (connp->conn_mcbc_bind)
816 connp->conn_saddr_v6 = ipv6_all_zeros;
817 else
818 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
819 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
820 connp->conn_faddr_v6 = ipv6_all_zeros;
821 icmp->icmp_state = TS_IDLE;
822 }
823
824 /*
825 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
826 * with IPPROTO_TCP.
827 */
828 connp->conn_fport = dstport;
829 if (connp->conn_ipversion == IPV4_VERSION) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700830 /*
831 * Interpret a zero destination to mean loopback.
832 * Update the T_CONN_REQ (sin/sin6) since it is used to
833 * generate the T_CONN_CON.
834 */
835 if (v4dst == INADDR_ANY) {
836 v4dst = htonl(INADDR_LOOPBACK);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800837 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
838 ASSERT(connp->conn_family == AF_INET);
839 sin->sin_addr.s_addr = v4dst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700840 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800841 connp->conn_faddr_v6 = v6dst;
842 connp->conn_flowinfo = 0;
843 } else {
844 ASSERT(connp->conn_ipversion == IPV6_VERSION);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700845 /*
846 * Interpret a zero destination to mean loopback.
847 * Update the T_CONN_REQ (sin/sin6) since it is used to
848 * generate the T_CONN_CON.
849 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800850 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
851 v6dst = ipv6_loopback;
852 sin6->sin6_addr = v6dst;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700853 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800854 connp->conn_faddr_v6 = v6dst;
855 connp->conn_flowinfo = flowinfo;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700856 }
857
Erik Nordmark76a10332010-02-17 19:52:19 -0800858 /*
859 * We update our cred/cpid based on the caller of connect
860 */
861 if (connp->conn_cred != cr) {
862 crhold(cr);
863 crfree(connp->conn_cred);
864 connp->conn_cred = cr;
865 }
866 connp->conn_cpid = pid;
Erik Nordmarkbe4c8f72010-03-03 23:02:28 -0800867 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800868 ixa->ixa_cred = cr;
869 ixa->ixa_cpid = pid;
870 if (is_system_labeled()) {
871 /* We need to restart with a label based on the cred */
872 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
873 }
nordmarkfc80c0d2007-10-11 22:57:36 -0700874
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800875 if (scopeid != 0) {
876 ixa->ixa_flags |= IXAF_SCOPEID_SET;
877 ixa->ixa_scopeid = scopeid;
878 connp->conn_incoming_ifindex = scopeid;
879 } else {
880 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
881 connp->conn_incoming_ifindex = connp->conn_bound_if;
882 }
883
884 /*
885 * conn_connect will drop conn_lock and reacquire it.
886 * To prevent a send* from messing with this icmp_t while the lock
887 * is dropped we set icmp_state and clear conn_v6lastdst.
888 * That will make all send* fail with EISCONN.
889 */
890 connp->conn_v6lastdst = ipv6_all_zeros;
891 icmp->icmp_state = TS_WCON_CREQ;
892
893 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
894 mutex_exit(&connp->conn_lock);
895 if (error != 0)
896 goto connect_failed;
897
898 /*
899 * The addresses have been verified. Time to insert in
900 * the correct fanout list.
901 */
902 error = ipcl_conn_insert(connp);
903 if (error != 0)
904 goto connect_failed;
905
906 mutex_enter(&connp->conn_lock);
907 error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
908 &connp->conn_faddr_v6, connp->conn_flowinfo);
909 if (error != 0) {
910 mutex_exit(&connp->conn_lock);
911 goto connect_failed;
nordmarkfc80c0d2007-10-11 22:57:36 -0700912 }
913
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700914 icmp->icmp_state = TS_DATA_XFER;
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800915 /* Record this as the "last" send even though we haven't sent any */
916 connp->conn_v6lastdst = connp->conn_faddr_v6;
917 connp->conn_lastipversion = connp->conn_ipversion;
918 connp->conn_lastdstport = connp->conn_fport;
919 connp->conn_lastflowinfo = connp->conn_flowinfo;
920 connp->conn_lastscopeid = scopeid;
921 connp->conn_lastsrcid = srcid;
922 /* Also remember a source to use together with lastdst */
923 connp->conn_v6lastsrc = v6src;
Erik Nordmarke5e79712010-04-27 02:41:28 -0700924
925 oldixa = conn_replace_ixa(connp, ixa);
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800926 mutex_exit(&connp->conn_lock);
Erik Nordmarke5e79712010-04-27 02:41:28 -0700927 ixa_refrele(oldixa);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700928
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800929 ixa_refrele(ixa);
930 return (0);
931
932connect_failed:
933 if (ixa != NULL)
934 ixa_refrele(ixa);
935 mutex_enter(&connp->conn_lock);
936 icmp->icmp_state = TS_IDLE;
937 /* In case the source address was set above */
938 if (connp->conn_mcbc_bind)
939 connp->conn_saddr_v6 = ipv6_all_zeros;
940 else
941 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
942 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
943 connp->conn_faddr_v6 = ipv6_all_zeros;
944 connp->conn_v6lastdst = ipv6_all_zeros;
945 connp->conn_flowinfo = 0;
946
947 (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
948 &connp->conn_faddr_v6, connp->conn_flowinfo);
949 mutex_exit(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800950 return (error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700951}
952
nordmarkfc80c0d2007-10-11 22:57:36 -0700953static void
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800954rawip_do_close(conn_t *connp)
nordmarkfc80c0d2007-10-11 22:57:36 -0700955{
nordmarkfc80c0d2007-10-11 22:57:36 -0700956 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700957
nordmarkfc80c0d2007-10-11 22:57:36 -0700958 ip_quiesce_conn(connp);
959
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800960 if (!IPCL_IS_NONSTR(connp)) {
961 qprocsoff(connp->conn_rq);
962 }
nordmarkfc80c0d2007-10-11 22:57:36 -0700963
964 icmp_close_free(connp);
965
966 /*
967 * Now we are truly single threaded on this stream, and can
968 * delete the things hanging off the connp, and finally the connp.
969 * We removed this connp from the fanout list, it cannot be
970 * accessed thru the fanouts, and we already waited for the
971 * conn_ref to drop to 0. We are already in close, so
972 * there cannot be any other thread from the top. qprocsoff
973 * has completed, and service has completed or won't run in
974 * future.
975 */
976 ASSERT(connp->conn_ref == 1);
977
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800978 if (!IPCL_IS_NONSTR(connp)) {
979 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
980 } else {
Rao Shoaibbfcb55b2009-01-05 10:51:43 -0800981 ip_free_helper_stream(connp);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800982 }
nordmarkfc80c0d2007-10-11 22:57:36 -0700983
984 connp->conn_ref--;
985 ipcl_conn_destroy(connp);
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800986}
987
Toomas Soome5e1743f2018-10-15 22:13:49 +0300988/* ARGSUSED */
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800989static int
Toomas Soome5e1743f2018-10-15 22:13:49 +0300990icmp_close(queue_t *q, int flags, cred_t *credp __unused)
Yu Xiangning0f1702c2008-12-11 20:04:13 -0800991{
992 conn_t *connp;
993
994 if (flags & SO_FALLBACK) {
995 /*
996 * stream is being closed while in fallback
997 * simply free the resources that were allocated
998 */
999 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
1000 qprocsoff(q);
1001 goto done;
1002 }
1003
1004 connp = Q_TO_CONN(q);
1005 (void) rawip_do_close(connp);
1006done:
nordmarkfc80c0d2007-10-11 22:57:36 -07001007 q->q_ptr = WR(q)->q_ptr = NULL;
1008 return (0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001009}
1010
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001011static void
1012icmp_close_free(conn_t *connp)
1013{
1014 icmp_t *icmp = connp->conn_icmp;
1015
1016 if (icmp->icmp_filter != NULL) {
1017 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
1018 icmp->icmp_filter = NULL;
1019 }
1020
1021 /*
1022 * Clear any fields which the kmem_cache constructor clears.
1023 * Only icmp_connp needs to be preserved.
1024 * TBD: We should make this more efficient to avoid clearing
1025 * everything.
1026 */
1027 ASSERT(icmp->icmp_connp == connp);
1028 bzero(icmp, sizeof (icmp_t));
1029 icmp->icmp_connp = connp;
1030}
1031
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001032/*
1033 * This routine handles each T_DISCON_REQ message passed to icmp
1034 * as an indicating that ICMP is no longer connected. This results
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001035 * in telling IP to restore the binding to just the local address.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001036 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001037static int
1038icmp_do_disconnect(conn_t *connp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001039{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001040 icmp_t *icmp = connp->conn_icmp;
1041 int error;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001042
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001043 mutex_enter(&connp->conn_lock);
1044 if (icmp->icmp_state != TS_DATA_XFER) {
1045 mutex_exit(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001046 return (-TOUTSTATE);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001047 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001048 if (connp->conn_mcbc_bind)
1049 connp->conn_saddr_v6 = ipv6_all_zeros;
1050 else
1051 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
1052 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
1053 connp->conn_faddr_v6 = ipv6_all_zeros;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001054 icmp->icmp_state = TS_IDLE;
1055
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001056 connp->conn_v6lastdst = ipv6_all_zeros;
1057 error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
1058 &connp->conn_faddr_v6, connp->conn_flowinfo);
1059 mutex_exit(&connp->conn_lock);
1060 if (error != 0)
1061 return (error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001062
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001063 /*
1064 * Tell IP to remove the full binding and revert
1065 * to the local address binding.
1066 */
1067 return (ip_laddr_fanout_insert(connp));
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001068}
1069
1070static void
1071icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
1072{
1073 conn_t *connp = Q_TO_CONN(q);
1074 int error;
1075
1076 /*
1077 * Allocate the largest primitive we need to send back
1078 * T_error_ack is > than T_ok_ack
1079 */
1080 mp = reallocb(mp, sizeof (struct T_error_ack), 1);
1081 if (mp == NULL) {
1082 /* Unable to reuse the T_DISCON_REQ for the ack. */
1083 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
1084 return;
1085 }
1086
1087 error = icmp_do_disconnect(connp);
1088
1089 if (error != 0) {
1090 if (error > 0) {
1091 icmp_err_ack(q, mp, 0, error);
1092 } else {
1093 icmp_err_ack(q, mp, -error, 0);
1094 }
1095 } else {
1096 mp = mi_tpi_ok_ack_alloc(mp);
1097 ASSERT(mp != NULL);
1098 qreply(q, mp);
1099 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001100}
1101
1102static int
1103icmp_disconnect(conn_t *connp)
1104{
1105 int error;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001106
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001107 connp->conn_dgram_errind = B_FALSE;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001108
1109 error = icmp_do_disconnect(connp);
1110
1111 if (error < 0)
1112 error = proto_tlitosyserr(-error);
1113 return (error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001114}
1115
1116/* This routine creates a T_ERROR_ACK message and passes it upstream. */
1117static void
1118icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
1119{
1120 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1121 qreply(q, mp);
1122}
1123
1124/* Shorthand to generate and send TPI error acks to our client */
1125static void
1126icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
1127 t_scalar_t t_error, int sys_error)
1128{
1129 struct T_error_ack *teackp;
1130
1131 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1132 M_PCPROTO, T_ERROR_ACK)) != NULL) {
1133 teackp = (struct T_error_ack *)mp->b_rptr;
1134 teackp->ERROR_prim = primitive;
1135 teackp->TLI_error = t_error;
1136 teackp->UNIX_error = sys_error;
1137 qreply(q, mp);
1138 }
1139}
1140
1141/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001142 * icmp_icmp_input is called as conn_recvicmp to process ICMP messages.
1143 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1144 * Assumes that IP has pulled up everything up to and including the ICMP header.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001145 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001146/* ARGSUSED2 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001147static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001148icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001149{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001150 conn_t *connp = (conn_t *)arg1;
1151 icmp_t *icmp = connp->conn_icmp;
1152 icmph_t *icmph;
1153 ipha_t *ipha;
1154 int iph_hdr_length;
1155 sin_t sin;
1156 mblk_t *mp1;
1157 int error = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001158
1159 ipha = (ipha_t *)mp->b_rptr;
1160
nordmarkfc80c0d2007-10-11 22:57:36 -07001161 ASSERT(OK_32PTR(mp->b_rptr));
1162
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001163 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1164 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001165 icmp_icmp_error_ipv6(connp, mp, ira);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001166 return;
1167 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001168 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001169
nordmarkfc80c0d2007-10-11 22:57:36 -07001170 /* Skip past the outer IP and ICMP headers */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001171 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
1172 iph_hdr_length = ira->ira_ip_hdr_length;
1173 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1174 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
1175
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001176 iph_hdr_length = IPH_HDR_LENGTH(ipha);
1177
1178 switch (icmph->icmph_type) {
1179 case ICMP_DEST_UNREACHABLE:
1180 switch (icmph->icmph_code) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001181 case ICMP_FRAGMENTATION_NEEDED: {
1182 ipha_t *ipha;
1183 ip_xmit_attr_t *ixa;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001184 /*
1185 * IP has already adjusted the path MTU.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001186 * But we need to adjust DF for IPv4.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001187 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001188 if (connp->conn_ipversion != IPV4_VERSION)
1189 break;
1190
1191 ixa = conn_get_ixa(connp, B_FALSE);
1192 if (ixa == NULL || ixa->ixa_ire == NULL) {
1193 /*
1194 * Some other thread holds conn_ixa. We will
1195 * redo this on the next ICMP too big.
1196 */
1197 if (ixa != NULL)
1198 ixa_refrele(ixa);
1199 break;
1200 }
1201 (void) ip_get_pmtu(ixa);
1202
1203 mutex_enter(&connp->conn_lock);
1204 ipha = (ipha_t *)connp->conn_ht_iphc;
1205 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1206 ipha->ipha_fragment_offset_and_flags |=
1207 IPH_DF_HTONS;
1208 } else {
1209 ipha->ipha_fragment_offset_and_flags &=
1210 ~IPH_DF_HTONS;
1211 }
1212 mutex_exit(&connp->conn_lock);
1213 ixa_refrele(ixa);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001214 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001215 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001216 case ICMP_PORT_UNREACHABLE:
1217 case ICMP_PROTOCOL_UNREACHABLE:
1218 error = ECONNREFUSED;
1219 break;
1220 default:
1221 /* Transient errors */
1222 break;
1223 }
1224 break;
1225 default:
1226 /* Transient errors */
1227 break;
1228 }
1229 if (error == 0) {
1230 freemsg(mp);
1231 return;
1232 }
1233
nordmarkfc80c0d2007-10-11 22:57:36 -07001234 /*
1235 * Deliver T_UDERROR_IND when the application has asked for it.
1236 * The socket layer enables this automatically when connected.
1237 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001238 if (!connp->conn_dgram_errind) {
nordmarkfc80c0d2007-10-11 22:57:36 -07001239 freemsg(mp);
1240 return;
1241 }
1242
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001243 sin = sin_null;
1244 sin.sin_family = AF_INET;
1245 sin.sin_addr.s_addr = ipha->ipha_dst;
Anders Persson41174432009-02-12 17:35:05 -08001246
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001247 if (IPCL_IS_NONSTR(connp)) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001248 mutex_enter(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001249 if (icmp->icmp_state == TS_DATA_XFER) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001250 if (sin.sin_addr.s_addr == connp->conn_faddr_v4) {
1251 mutex_exit(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001252 (*connp->conn_upcalls->su_set_error)
1253 (connp->conn_upper_handle, error);
1254 goto done;
1255 }
1256 } else {
1257 icmp->icmp_delayed_error = error;
1258 *((sin_t *)&icmp->icmp_delayed_addr) = sin;
1259 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001260 mutex_exit(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001261 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001262 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
1263 error);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001264 if (mp1 != NULL)
1265 putnext(connp->conn_rq, mp1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001266 }
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001267done:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001268 freemsg(mp);
1269}
1270
1271/*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001272 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMP for IPv6.
1273 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1274 * Assumes that IP has pulled up all the extension headers as well as the
1275 * ICMPv6 header.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001276 */
1277static void
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001278icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001279{
1280 icmp6_t *icmp6;
1281 ip6_t *ip6h, *outer_ip6h;
1282 uint16_t iph_hdr_length;
1283 uint8_t *nexthdrp;
1284 sin6_t sin6;
1285 mblk_t *mp1;
1286 int error = 0;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001287 icmp_t *icmp = connp->conn_icmp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001288
1289 outer_ip6h = (ip6_t *)mp->b_rptr;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001290#ifdef DEBUG
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001291 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1292 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1293 else
1294 iph_hdr_length = IPV6_HDR_LEN;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001295 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1296#endif
1297 /* Skip past the outer IP and ICMP headers */
1298 iph_hdr_length = ira->ira_ip_hdr_length;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001299 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001300
1301 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001302 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1303 freemsg(mp);
1304 return;
1305 }
nordmarkfc80c0d2007-10-11 22:57:36 -07001306
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001307 switch (icmp6->icmp6_type) {
1308 case ICMP6_DST_UNREACH:
1309 switch (icmp6->icmp6_code) {
1310 case ICMP6_DST_UNREACH_NOPORT:
1311 error = ECONNREFUSED;
1312 break;
1313 case ICMP6_DST_UNREACH_ADMIN:
1314 case ICMP6_DST_UNREACH_NOROUTE:
1315 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1316 case ICMP6_DST_UNREACH_ADDR:
1317 /* Transient errors */
1318 break;
1319 default:
1320 break;
1321 }
1322 break;
1323 case ICMP6_PACKET_TOO_BIG: {
1324 struct T_unitdata_ind *tudi;
1325 struct T_opthdr *toh;
1326 size_t udi_size;
1327 mblk_t *newmp;
1328 t_scalar_t opt_length = sizeof (struct T_opthdr) +
1329 sizeof (struct ip6_mtuinfo);
1330 sin6_t *sin6;
1331 struct ip6_mtuinfo *mtuinfo;
1332
1333 /*
1334 * If the application has requested to receive path mtu
1335 * information, send up an empty message containing an
1336 * IPV6_PATHMTU ancillary data item.
1337 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001338 if (!connp->conn_ipv6_recvpathmtu)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001339 break;
1340
1341 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1342 opt_length;
1343 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
nordmarkfc80c0d2007-10-11 22:57:36 -07001344 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001345 break;
1346 }
1347
1348 /*
1349 * newmp->b_cont is left to NULL on purpose. This is an
1350 * empty message containing only ancillary data.
1351 */
1352 newmp->b_datap->db_type = M_PROTO;
1353 tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1354 newmp->b_wptr = (uchar_t *)tudi + udi_size;
1355 tudi->PRIM_type = T_UNITDATA_IND;
1356 tudi->SRC_length = sizeof (sin6_t);
1357 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1358 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1359 tudi->OPT_length = opt_length;
1360
1361 sin6 = (sin6_t *)&tudi[1];
1362 bzero(sin6, sizeof (sin6_t));
1363 sin6->sin6_family = AF_INET6;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001364 sin6->sin6_addr = connp->conn_faddr_v6;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001365
1366 toh = (struct T_opthdr *)&sin6[1];
1367 toh->level = IPPROTO_IPV6;
1368 toh->name = IPV6_PATHMTU;
1369 toh->len = opt_length;
1370 toh->status = 0;
1371
1372 mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1373 bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1374 mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1375 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1376 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1377 /*
1378 * We've consumed everything we need from the original
1379 * message. Free it, then send our empty message.
1380 */
1381 freemsg(mp);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001382 icmp_ulp_recv(connp, newmp, msgdsize(newmp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001383 return;
1384 }
1385 case ICMP6_TIME_EXCEEDED:
1386 /* Transient errors */
1387 break;
1388 case ICMP6_PARAM_PROB:
1389 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1390 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1391 (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1392 (uchar_t *)nexthdrp) {
1393 error = ECONNREFUSED;
1394 break;
1395 }
1396 break;
1397 }
1398 if (error == 0) {
1399 freemsg(mp);
1400 return;
1401 }
1402
nordmarkfc80c0d2007-10-11 22:57:36 -07001403 /*
1404 * Deliver T_UDERROR_IND when the application has asked for it.
1405 * The socket layer enables this automatically when connected.
1406 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001407 if (!connp->conn_dgram_errind) {
nordmarkfc80c0d2007-10-11 22:57:36 -07001408 freemsg(mp);
1409 return;
1410 }
1411
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001412 sin6 = sin6_null;
1413 sin6.sin6_family = AF_INET6;
1414 sin6.sin6_addr = ip6h->ip6_dst;
1415 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001416 if (IPCL_IS_NONSTR(connp)) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001417 mutex_enter(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001418 if (icmp->icmp_state == TS_DATA_XFER) {
1419 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001420 &connp->conn_faddr_v6)) {
1421 mutex_exit(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001422 (*connp->conn_upcalls->su_set_error)
1423 (connp->conn_upper_handle, error);
1424 goto done;
1425 }
1426 } else {
1427 icmp->icmp_delayed_error = error;
1428 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
1429 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001430 mutex_exit(&connp->conn_lock);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001431 } else {
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001432 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1433 NULL, 0, error);
1434 if (mp1 != NULL)
1435 putnext(connp->conn_rq, mp1);
1436 }
1437done:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001438 freemsg(mp);
1439}
1440
1441/*
1442 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput.
1443 * The local address is filled in if endpoint is bound. The remote address
1444 * is filled in if remote address has been precified ("connected endpoint")
1445 * (The concept of connected CLTS sockets is alien to published TPI
1446 * but we support it anyway).
1447 */
1448static void
1449icmp_addr_req(queue_t *q, mblk_t *mp)
1450{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001451 struct sockaddr *sa;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001452 mblk_t *ackmp;
1453 struct T_addr_ack *taa;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001454 icmp_t *icmp = Q_TO_ICMP(q);
1455 conn_t *connp = icmp->icmp_connp;
1456 uint_t addrlen;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001457
1458 /* Make it large enough for worst case */
1459 ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1460 2 * sizeof (sin6_t), 1);
1461 if (ackmp == NULL) {
1462 icmp_err_ack(q, mp, TSYSERR, ENOMEM);
1463 return;
1464 }
1465 taa = (struct T_addr_ack *)ackmp->b_rptr;
1466
1467 bzero(taa, sizeof (struct T_addr_ack));
1468 ackmp->b_wptr = (uchar_t *)&taa[1];
1469
1470 taa->PRIM_type = T_ADDR_ACK;
1471 ackmp->b_datap->db_type = M_PCPROTO;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001472
1473 if (connp->conn_family == AF_INET)
1474 addrlen = sizeof (sin_t);
1475 else
1476 addrlen = sizeof (sin6_t);
1477
1478 mutex_enter(&connp->conn_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001479 /*
1480 * Note: Following code assumes 32 bit alignment of basic
1481 * data structures like sin_t and struct T_addr_ack.
1482 */
1483 if (icmp->icmp_state != TS_UNBND) {
1484 /*
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001485 * Fill in local address first
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001486 */
1487 taa->LOCADDR_offset = sizeof (*taa);
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001488 taa->LOCADDR_length = addrlen;
1489 sa = (struct sockaddr *)&taa[1];
1490 (void) conn_getsockname(connp, sa, &addrlen);
1491 ackmp->b_wptr += addrlen;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001492 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001493 if (icmp->icmp_state == TS_DATA_XFER) {
1494 /*
1495 * connected, fill remote address too
1496 */
1497 taa->REMADDR_length = addrlen;
1498 /* assumed 32-bit alignment */
1499 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1500 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1501 (void) conn_getpeername(connp, sa, &addrlen);
1502 ackmp->b_wptr += addrlen;
1503 }
1504 mutex_exit(&connp->conn_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001505 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1506 qreply(q, ackmp);
1507}
1508
1509static void
1510icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
1511{
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001512 conn_t *connp = icmp->icmp_connp;
1513
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001514 *tap = icmp_g_t_info_ack;
1515
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001516 if (connp->conn_family == AF_INET6)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001517 tap->ADDR_size = sizeof (sin6_t);
1518 else
1519 tap->ADDR_size = sizeof (sin_t);
1520 tap->CURRENT_state = icmp->icmp_state;
1521 tap->OPT_size = icmp_max_optsize;
1522}
1523
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001524static void
1525icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
1526 t_uscalar_t cap_bits1)
1527{
1528 tcap->CAP_bits1 = 0;
1529
1530 if (cap_bits1 & TC1_INFO) {
1531 icmp_copy_info(&tcap->INFO_ack, icmp);
1532 tcap->CAP_bits1 |= TC1_INFO;
1533 }
1534}
1535
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001536/*
1537 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1538 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from
1539 * icmp_g_t_info_ack. The current state of the stream is copied from
1540 * icmp_state.
1541 */
1542static void
1543icmp_capability_req(queue_t *q, mblk_t *mp)
1544{
nordmarkfc80c0d2007-10-11 22:57:36 -07001545 icmp_t *icmp = Q_TO_ICMP(q);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001546 t_uscalar_t cap_bits1;
1547 struct T_capability_ack *tcap;
1548
1549 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1550
1551 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
nordmarkfc80c0d2007-10-11 22:57:36 -07001552 mp->b_datap->db_type, T_CAPABILITY_ACK);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001553 if (!mp)
1554 return;
1555
1556 tcap = (struct T_capability_ack *)mp->b_rptr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001557
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001558 icmp_do_capability_ack(icmp, tcap, cap_bits1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001559
1560 qreply(q, mp);
1561}
1562
1563/*
1564 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput.
1565 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1566 * The current state of the stream is copied from icmp_state.
1567 */
1568static void
1569icmp_info_req(queue_t *q, mblk_t *mp)
1570{
nordmarkfc80c0d2007-10-11 22:57:36 -07001571 icmp_t *icmp = Q_TO_ICMP(q);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001572
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001573 /* Create a T_INFO_ACK message. */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001574 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1575 T_INFO_ACK);
1576 if (!mp)
1577 return;
1578 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
1579 qreply(q, mp);
1580}
1581
nordmarkfc80c0d2007-10-11 22:57:36 -07001582static int
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001583icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1584 int family)
jpk45916cd2006-03-24 12:29:20 -08001585{
nordmarkfc80c0d2007-10-11 22:57:36 -07001586 conn_t *connp;
1587 dev_t conn_dev;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001588 int error;
1589
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001590 /* If the stream is already open, return immediately. */
1591 if (q->q_ptr != NULL)
1592 return (0);
1593
nordmarkfc80c0d2007-10-11 22:57:36 -07001594 if (sflag == MODOPEN)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001595 return (EINVAL);
1596
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001597 /*
1598 * Since ICMP is not used so heavily, allocating from the small
1599 * arena should be sufficient.
1600 */
1601 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
1602 return (EBUSY);
1603 }
1604
1605 if (flag & SO_FALLBACK) {
1606 /*
1607 * Non streams socket needs a stream to fallback to
1608 */
1609 RD(q)->q_ptr = (void *)conn_dev;
1610 WR(q)->q_qinfo = &icmp_fallback_sock_winit;
1611 WR(q)->q_ptr = (void *)ip_minor_arena_sa;
1612 qprocson(q);
1613 return (0);
1614 }
1615
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001616 connp = rawip_do_open(family, credp, &error, KM_SLEEP);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001617 if (connp == NULL) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001618 ASSERT(error != 0);
Erik Nordmark7eb216a2010-06-21 13:09:13 -07001619 inet_minor_free(ip_minor_arena_sa, conn_dev);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001620 return (error);
1621 }
1622
1623 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1624 connp->conn_dev = conn_dev;
1625 connp->conn_minor_arena = ip_minor_arena_sa;
1626
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001627 /*
1628 * Initialize the icmp_t structure for this stream.
1629 */
1630 q->q_ptr = connp;
1631 WR(q)->q_ptr = connp;
1632 connp->conn_rq = q;
1633 connp->conn_wq = WR(q);
1634
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001635 WR(q)->q_hiwat = connp->conn_sndbuf;
1636 WR(q)->q_lowat = connp->conn_sndlowat;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001637
1638 qprocson(q);
1639
1640 /* Set the Stream head write offset. */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001641 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1642 (void) proto_set_rx_hiwat(connp->conn_rq, connp, connp->conn_rcvbuf);
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001643
1644 mutex_enter(&connp->conn_lock);
1645 connp->conn_state_flags &= ~CONN_INCIPIENT;
1646 mutex_exit(&connp->conn_lock);
1647
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001648 icmp_bind_proto(connp->conn_icmp);
1649
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001650 return (0);
1651}
1652
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001653/* For /dev/icmp aka AF_INET open */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001654static int
1655icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1656{
1657 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
1658}
1659
1660/* For /dev/icmp6 aka AF_INET6 open */
1661static int
1662icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1663{
1664 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
1665}
1666
1667/*
1668 * This is the open routine for icmp. It allocates a icmp_t structure for
1669 * the stream and, on the first open of the module, creates an ND table.
1670 */
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001671static conn_t *
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001672rawip_do_open(int family, cred_t *credp, int *err, int flags)
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001673{
1674 icmp_t *icmp;
1675 conn_t *connp;
1676 zoneid_t zoneid;
1677 netstack_t *ns;
1678 icmp_stack_t *is;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001679 int len;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001680 boolean_t isv6 = B_FALSE;
1681
1682 *err = secpolicy_net_icmpaccess(credp);
1683 if (*err != 0)
1684 return (NULL);
1685
1686 if (family == AF_INET6)
1687 isv6 = B_TRUE;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001688
dh155122f4b3ec62007-01-19 16:59:38 -08001689 ns = netstack_find_by_cred(credp);
1690 ASSERT(ns != NULL);
1691 is = ns->netstack_icmp;
1692 ASSERT(is != NULL);
1693
1694 /*
1695 * For exclusive stacks we set the zoneid to zero
1696 * to make ICMP operate as if in the global zone.
1697 */
nordmarkfc80c0d2007-10-11 22:57:36 -07001698 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
dh155122f4b3ec62007-01-19 16:59:38 -08001699 zoneid = GLOBAL_ZONEID;
1700 else
1701 zoneid = crgetzoneid(credp);
1702
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001703 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
nordmarkfc80c0d2007-10-11 22:57:36 -07001704
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001705 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
nordmarkfc80c0d2007-10-11 22:57:36 -07001706 icmp = connp->conn_icmp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001707
1708 /*
nordmarkfc80c0d2007-10-11 22:57:36 -07001709 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1710 * done by netstack_find_by_cred()
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001711 */
nordmarkfc80c0d2007-10-11 22:57:36 -07001712 netstack_rele(ns);
1713
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001714 /*
1715 * Since this conn_t/icmp_t is not yet visible to anybody else we don't
1716 * need to lock anything.
1717 */
1718 ASSERT(connp->conn_proto == IPPROTO_ICMP);
nordmarkfc80c0d2007-10-11 22:57:36 -07001719 ASSERT(connp->conn_icmp == icmp);
1720 ASSERT(icmp->icmp_connp == connp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001721
1722 /* Set the initial state of the stream and the privilege status. */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001723 icmp->icmp_state = TS_UNBND;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001724 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
nordmarkfc80c0d2007-10-11 22:57:36 -07001725 if (isv6) {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001726 connp->conn_family = AF_INET6;
1727 connp->conn_ipversion = IPV6_VERSION;
1728 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
1729 connp->conn_proto = IPPROTO_ICMPV6;
nordmarkfc80c0d2007-10-11 22:57:36 -07001730 /* May be changed by a SO_PROTOTYPE socket option. */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001731 connp->conn_proto = IPPROTO_ICMPV6;
1732 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1733 connp->conn_ixa->ixa_raw_cksum_offset = 2;
1734 connp->conn_default_ttl = is->is_ipv6_hoplimit;
1735 len = sizeof (ip6_t);
nordmarkfc80c0d2007-10-11 22:57:36 -07001736 } else {
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001737 connp->conn_family = AF_INET;
1738 connp->conn_ipversion = IPV4_VERSION;
1739 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
nordmarkfc80c0d2007-10-11 22:57:36 -07001740 /* May be changed by a SO_PROTOTYPE socket option. */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001741 connp->conn_proto = IPPROTO_ICMP;
1742 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1743 connp->conn_default_ttl = is->is_ipv4_ttl;
1744 len = sizeof (ipha_t);
nordmarkfc80c0d2007-10-11 22:57:36 -07001745 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001746 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
1747
1748 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1749
1750 /*
1751 * For the socket of protocol IPPROTO_RAW or when IP_HDRINCL is set,
1752 * the checksum is provided in the pre-built packet. We clear
1753 * IXAF_SET_ULP_CKSUM to tell IP that the application has sent a
1754 * complete IP header and not to compute the transport checksum.
1755 */
1756 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
1757 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1758 connp->conn_ixa->ixa_zoneid = zoneid;
1759
nordmarkfc80c0d2007-10-11 22:57:36 -07001760 connp->conn_zoneid = zoneid;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001761
jpk45916cd2006-03-24 12:29:20 -08001762 /*
1763 * If the caller has the process-wide flag set, then default to MAC
1764 * exempt mode. This allows read-down to unlabeled hosts.
1765 */
1766 if (getpflags(NET_MAC_AWARE, credp) != 0)
Bill Sommerfeld5d3b8cb2009-11-02 15:39:20 -08001767 connp->conn_mac_mode = CONN_MAC_AWARE;
jpk45916cd2006-03-24 12:29:20 -08001768
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001769 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
nordmarkfc80c0d2007-10-11 22:57:36 -07001770
dh155122f4b3ec62007-01-19 16:59:38 -08001771 icmp->icmp_is = is;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001772
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001773 connp->conn_rcvbuf = is->is_recv_hiwat;
1774 connp->conn_sndbuf = is->is_xmit_hiwat;
1775 connp->conn_sndlowat = is->is_xmit_lowat;
1776 connp->conn_rcvlowat = icmp_mod_info.mi_lowat;
1777
1778 connp->conn_wroff = len + is->is_wroff_extra;
1779 connp->conn_so_type = SOCK_RAW;
1780
nordmarkfc80c0d2007-10-11 22:57:36 -07001781 connp->conn_recv = icmp_input;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001782 connp->conn_recvicmp = icmp_icmp_input;
nordmarkfc80c0d2007-10-11 22:57:36 -07001783 crhold(credp);
1784 connp->conn_cred = credp;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001785 connp->conn_cpid = curproc->p_pid;
Rafael Vanonid3d50732009-11-13 01:32:32 -08001786 connp->conn_open_time = ddi_get_lbolt64();
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001787 /* Cache things in ixa without an extra refhold */
Erik Nordmarkbe4c8f72010-03-03 23:02:28 -08001788 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001789 connp->conn_ixa->ixa_cred = connp->conn_cred;
1790 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
1791 if (is_system_labeled())
1792 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
nordmarkfc80c0d2007-10-11 22:57:36 -07001793
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001794 connp->conn_flow_cntrld = B_FALSE;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001795
1796 if (is->is_pmtu_discovery)
1797 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1798
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001799 return (connp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001800}
1801
1802/*
1803 * Which ICMP options OK to set through T_UNITDATA_REQ...
1804 */
1805/* ARGSUSED */
1806static boolean_t
1807icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1808{
1809 return (B_TRUE);
1810}
1811
1812/*
1813 * This routine gets default values of certain options whose default
1814 * values are maintained by protcol specific code
1815 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001816int
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001817icmp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001818{
nordmarkfc80c0d2007-10-11 22:57:36 -07001819 icmp_t *icmp = Q_TO_ICMP(q);
dh155122f4b3ec62007-01-19 16:59:38 -08001820 icmp_stack_t *is = icmp->icmp_is;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001821 int *i1 = (int *)ptr;
1822
1823 switch (level) {
1824 case IPPROTO_IP:
1825 switch (name) {
1826 case IP_MULTICAST_TTL:
1827 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1828 return (sizeof (uchar_t));
1829 case IP_MULTICAST_LOOP:
1830 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1831 return (sizeof (uchar_t));
1832 }
1833 break;
1834 case IPPROTO_IPV6:
1835 switch (name) {
1836 case IPV6_MULTICAST_HOPS:
1837 *i1 = IP_DEFAULT_MULTICAST_TTL;
1838 return (sizeof (int));
1839 case IPV6_MULTICAST_LOOP:
1840 *i1 = IP_DEFAULT_MULTICAST_LOOP;
1841 return (sizeof (int));
1842 case IPV6_UNICAST_HOPS:
dh155122f4b3ec62007-01-19 16:59:38 -08001843 *i1 = is->is_ipv6_hoplimit;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001844 return (sizeof (int));
1845 }
1846 break;
1847 case IPPROTO_ICMPV6:
1848 switch (name) {
1849 case ICMP6_FILTER:
1850 /* Make it look like "pass all" */
1851 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1852 return (sizeof (icmp6_filter_t));
1853 }
1854 break;
1855 }
1856 return (-1);
1857}
1858
1859/*
1860 * This routine retrieves the current status of socket options.
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001861 * It returns the size of the option retrieved, or -1.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001862 */
1863int
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001864icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001865{
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001866 icmp_t *icmp = connp->conn_icmp;
Yu Xiangning0f1702c2008-12-11 20:04:13 -08001867 int *i1 = (int *)ptr;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001868 conn_opt_arg_t coas;
1869 int retval;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001870
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001871 coas.coa_connp = connp;
1872 coas.coa_ixa = connp->conn_ixa;
1873 coas.coa_ipp = &connp->conn_xmit_ipp;
1874 coas.coa_ancillary = B_FALSE;
1875 coas.coa_changed = 0;
1876
1877 /*
1878 * We assume that the optcom framework has checked for the set
1879 * of levels and names that are supported, hence we don't worry
1880 * about rejecting based on that.
1881 * First check for ICMP specific handling, then pass to common routine.
1882 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001883 switch (level) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001884 case IPPROTO_IP:
1885 /*
1886 * Only allow IPv4 option processing on IPv4 sockets.
1887 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001888 if (connp->conn_family != AF_INET)
1889 return (-1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001890
1891 switch (name) {
1892 case IP_OPTIONS:
1893 case T_IP_OPTIONS:
1894 /* Options are passed up with each packet */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001895 return (0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001896 case IP_HDRINCL:
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001897 mutex_enter(&connp->conn_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001898 *i1 = (int)icmp->icmp_hdrincl;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001899 mutex_exit(&connp->conn_lock);
1900 return (sizeof (int));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001901 }
1902 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001903
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001904 case IPPROTO_IPV6:
1905 /*
1906 * Only allow IPv6 option processing on native IPv6 sockets.
1907 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001908 if (connp->conn_family != AF_INET6)
1909 return (-1);
1910
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001911 switch (name) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001912 case IPV6_CHECKSUM:
1913 /*
1914 * Return offset or -1 if no checksum offset.
1915 * Does not apply to IPPROTO_ICMPV6
1916 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001917 if (connp->conn_proto == IPPROTO_ICMPV6)
1918 return (-1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001919
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001920 mutex_enter(&connp->conn_lock);
1921 if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM)
1922 *i1 = connp->conn_ixa->ixa_raw_cksum_offset;
1923 else
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001924 *i1 = -1;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001925 mutex_exit(&connp->conn_lock);
1926 return (sizeof (int));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001927 }
1928 break;
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001929
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001930 case IPPROTO_ICMPV6:
1931 /*
1932 * Only allow IPv6 option processing on native IPv6 sockets.
1933 */
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001934 if (connp->conn_family != AF_INET6)
1935 return (-1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001936
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001937 if (connp->conn_proto != IPPROTO_ICMPV6)
1938 return (-1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001939
1940 switch (name) {
1941 case ICMP6_FILTER:
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001942 mutex_enter(&connp->conn_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001943 if (icmp->icmp_filter == NULL) {
1944 /* Make it look like "pass all" */
1945 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1946 } else {
1947 (void) bcopy(icmp->icmp_filter, ptr,
1948 sizeof (icmp6_filter_t));
1949 }
Erik Nordmarkbd670b32009-11-11 11:49:49 -08001950 mutex_exit(&connp->conn_lock);
1951 return (sizeof (icmp6_filter_t));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700