blob: 559bca6031e46f3b0a5f7935d0b82f4b93304a7a [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#include <sys/strsun.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/timod.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/strsubr.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/policy.h>
#include <sys/priv.h>
#include <sys/zone.h>
#include <sys/time.h>
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/isa_defs.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
#include <sys/netstack.h>
#include <net/route.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/proto_set.h>
#include <inet/nd.h>
#include <inet/optcom.h>
#include <inet/snmpcom.h>
#include <inet/kstatcom.h>
#include <inet/rawip_impl.h>
#include <netinet/ip_mroute.h>
#include <inet/tcp.h>
#include <net/pfkeyv2.h>
#include <inet/ipsec_info.h>
#include <inet/ipclassifier.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <inet/ip_ire.h>
#include <inet/ip_if.h>
#include <inet/ip_impl.h>
#include <sys/disp.h>
/*
* Synchronization notes:
*
* RAWIP is MT and uses the usual kernel synchronization primitives. There is
* locks, which is icmp_rwlock. We also use conn_lock when updating things
* which affect the IP classifier lookup.
* The lock order is icmp_rwlock -> conn_lock.
*
* The icmp_rwlock:
* This protects most of the other fields in the icmp_t. The exact list of
* fields which are protected by each of the above locks is documented in
* the icmp_t structure definition.
*
* Plumbing notes:
* ICMP is always a device driver. For compatibility with mibopen() code
* it is possible to I_PUSH "icmp", but that results in pushing a passthrough
* dummy module.
*/
static void icmp_addr_req(queue_t *q, mblk_t *mp);
static void icmp_tpi_bind(queue_t *q, mblk_t *mp);
static int icmp_bind_proto(conn_t *connp);
static int icmp_build_hdrs(icmp_t *icmp);
static void icmp_capability_req(queue_t *q, mblk_t *mp);
static int icmp_close(queue_t *q, int flags);
static void icmp_tpi_connect(queue_t *q, mblk_t *mp);
static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
int sys_error);
static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
t_scalar_t t_error, int sys_error);
static void icmp_icmp_error(conn_t *connp, mblk_t *mp);
static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp);
static void icmp_info_req(queue_t *q, mblk_t *mp);
static void icmp_input(void *, mblk_t *, void *);
static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags);
static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
int *errorp, void *thisdg_attrs);
static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
int icmp_opt_set(conn_t *connp, uint_t optset_context,
int level, int name, uint_t inlen,
uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
void *thisdg_attrs, cred_t *cr);
int icmp_opt_get(conn_t *connp, int level, int name,
uchar_t *ptr);
static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
static int icmp_param_set(queue_t *q, mblk_t *mp, char *value,
caddr_t cp, cred_t *cr);
static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
uchar_t *ptr, int len);
static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
static void icmp_tpi_unbind(queue_t *q, mblk_t *mp);
static int icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst);
static void icmp_wput(queue_t *q, mblk_t *mp);
static void icmp_wput_fallback(queue_t *q, mblk_t *mp);
static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp,
sin6_t *sin6, ip6_pkt_t *ipp);
static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp,
ipaddr_t v4dst, ip4_pkt_t *pktinfop);
static void icmp_wput_other(queue_t *q, mblk_t *mp);
static void icmp_wput_iocdata(queue_t *q, mblk_t *mp);
static void icmp_wput_restricted(queue_t *q, mblk_t *mp);
static void icmp_ulp_recv(conn_t *, mblk_t *);
static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns);
static void rawip_stack_fini(netstackid_t stackid, void *arg);
static void *rawip_kstat_init(netstackid_t stackid);
static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
static int rawip_kstat_update(kstat_t *kp, int rw);
static void rawip_stack_shutdown(netstackid_t stackid, void *arg);
static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa,
uint_t *salenp);
static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa,
uint_t *salenp);
int rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
socklen_t *, cred_t *);
int rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
socklen_t *, cred_t *);
static struct module_info icmp_mod_info = {
5707, "icmp", 1, INFPSZ, 512, 128
};
/*
* Entry points for ICMP as a device.
* We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
*/
static struct qinit icmprinitv4 = {
NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
};
static struct qinit icmprinitv6 = {
NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
};
static struct qinit icmpwinit = {
(pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info
};
/* ICMP entry point during fallback */
static struct qinit icmp_fallback_sock_winit = {
(pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
};
/* For AF_INET aka /dev/icmp */
struct streamtab icmpinfov4 = {
&icmprinitv4, &icmpwinit
};
/* For AF_INET6 aka /dev/icmp6 */
struct streamtab icmpinfov6 = {
&icmprinitv6, &icmpwinit
};
static sin_t sin_null; /* Zero address for quick clears */
static sin6_t sin6_null; /* Zero address for quick clears */
/* Default structure copied into T_INFO_ACK messages */
static struct T_info_ack icmp_g_t_info_ack = {
T_INFO_ACK,
IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */
T_INVALID, /* ETSDU_size. icmp does not support expedited data. */
T_INVALID, /* CDATA_size. icmp does not support connect data. */
T_INVALID, /* DDATA_size. icmp does not support disconnect data. */
0, /* ADDR_size - filled in later. */
0, /* OPT_size - not initialized here */
IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */
T_CLTS, /* SERV_type. icmp supports connection-less. */
TS_UNBND, /* CURRENT_state. This is set from icmp_state. */
(XPG4_1|SENDZERO) /* PROVIDER_flag */
};
/*
* Table of ND variables supported by icmp. These are loaded into is_nd
* when the stack instance is created.
* All of these are alterable, within the min/max values given, at run time.
*/
static icmpparam_t icmp_param_arr[] = {
/* min max value name */
{ 0, 128, 32, "icmp_wroff_extra" },
{ 1, 255, 255, "icmp_ipv4_ttl" },
{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"},
{ 0, 1, 1, "icmp_bsd_compat" },
{ 4096, 65536, 8192, "icmp_xmit_hiwat"},
{ 0, 65536, 1024, "icmp_xmit_lowat"},
{ 4096, 65536, 8192, "icmp_recv_hiwat"},
{ 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"},
};
#define is_wroff_extra is_param_arr[0].icmp_param_value
#define is_ipv4_ttl is_param_arr[1].icmp_param_value
#define is_ipv6_hoplimit is_param_arr[2].icmp_param_value
#define is_bsd_compat is_param_arr[3].icmp_param_value
#define is_xmit_hiwat is_param_arr[4].icmp_param_value
#define is_xmit_lowat is_param_arr[5].icmp_param_value
#define is_recv_hiwat is_param_arr[6].icmp_param_value
#define is_max_buf is_param_arr[7].icmp_param_value
static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len);
static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa,
socklen_t len, cred_t *cr);
static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error);
/*
* This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
* passed to icmp_wput.
* The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP
* protocol type placed in the message following the address. A T_BIND_ACK
* message is returned by ip_bind_v4/v6.
*/
static void
icmp_tpi_bind(queue_t *q, mblk_t *mp)
{
int error;
struct sockaddr *sa;
struct T_bind_req *tbr;
socklen_t len;
sin_t *sin;
sin6_t *sin6;
icmp_t *icmp;
conn_t *connp = Q_TO_CONN(q);
mblk_t *mp1;
cred_t *cr;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
cr = msg_getcred(mp, NULL);
ASSERT(cr != NULL);
if (cr == NULL) {
icmp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
icmp = connp->conn_icmp;
if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"icmp_bind: bad req, len %u",
(uint_t)(mp->b_wptr - mp->b_rptr));
icmp_err_ack(q, mp, TPROTO, 0);
return;
}
if (icmp->icmp_state != TS_UNBND) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"icmp_bind: bad state, %d", icmp->icmp_state);
icmp_err_ack(q, mp, TOUTSTATE, 0);
return;
}
/*
* Reallocate the message to make sure we have enough room for an
* address and the protocol type.
*/
mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
if (!mp1) {
icmp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
mp = mp1;
/* Reset the message type in preparation for shipping it back. */
DB_TYPE(mp) = M_PCPROTO;
tbr = (struct T_bind_req *)mp->b_rptr;
len = tbr->ADDR_length;
switch (len) {
case 0: /* request for a generic port */
tbr->ADDR_offset = sizeof (struct T_bind_req);
if (icmp->icmp_family == AF_INET) {
tbr->ADDR_length = sizeof (sin_t);
sin = (sin_t *)&tbr[1];
*sin = sin_null;
sin->sin_family = AF_INET;
mp->b_wptr = (uchar_t *)&sin[1];
sa = (struct sockaddr *)sin;
len = sizeof (sin_t);
} else {
ASSERT(icmp->icmp_family == AF_INET6);
tbr->ADDR_length = sizeof (sin6_t);
sin6 = (sin6_t *)&tbr[1];
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
mp->b_wptr = (uchar_t *)&sin6[1];
sa = (struct sockaddr *)sin6;
len = sizeof (sin6_t);
}
break;
case sizeof (sin_t): /* Complete IPv4 address */
sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
sizeof (sin_t));
break;
case sizeof (sin6_t): /* Complete IPv6 address */
sa = (struct sockaddr *)mi_offset_param(mp,
tbr->ADDR_offset, sizeof (sin6_t));
break;
default:
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
icmp_err_ack(q, mp, TBADADDR, 0);
return;
}
error = rawip_do_bind(connp, sa, len);
done:
ASSERT(mp->b_cont == NULL);
if (error != 0) {
if (error > 0) {
icmp_err_ack(q, mp, TSYSERR, error);
} else {
icmp_err_ack(q, mp, -error, 0);
}
} else {
tbr->PRIM_type = T_BIND_ACK;
qreply(q, mp);
}
}
static int
rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
{
sin_t *sin;
sin6_t *sin6;
icmp_t *icmp;
int error = 0;
mblk_t *ire_mp;
icmp = connp->conn_icmp;
if (sa == NULL || !OK_32PTR((char *)sa)) {
return (EINVAL);
}
/*
* The state must be TS_UNBND. TPI mandates that users must send
* TPI primitives only 1 at a time and wait for the response before
* sending the next primitive.
*/
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
error = -TOUTSTATE;
goto done;
}
ASSERT(len != 0);
switch (len) {
case sizeof (sin_t): /* Complete IPv4 address */
sin = (sin_t *)sa;
if (sin->sin_family != AF_INET ||
icmp->icmp_family != AF_INET) {
/* TSYSERR, EAFNOSUPPORT */
error = EAFNOSUPPORT;
goto done;
}
break;
case sizeof (sin6_t): /* Complete IPv6 address */
sin6 = (sin6_t *)sa;
if (sin6->sin6_family != AF_INET6 ||
icmp->icmp_family != AF_INET6) {
/* TSYSERR, EAFNOSUPPORT */
error = EAFNOSUPPORT;
goto done;
}
/* No support for mapped addresses on raw sockets */
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
/* TSYSERR, EADDRNOTAVAIL */
error = EADDRNOTAVAIL;
goto done;
}
break;
default:
/* TBADADDR */
error = EADDRNOTAVAIL;
goto done;
}
icmp->icmp_pending_op = T_BIND_REQ;
icmp->icmp_state = TS_IDLE;
/*
* Copy the source address into our icmp structure. This address
* may still be zero; if so, ip will fill in the correct address
* each time an outbound packet is passed to it.
* If we are binding to a broadcast or multicast address then
* rawip_post_ip_bind_connect will clear the source address.
*/
if (icmp->icmp_family == AF_INET) {
ASSERT(sin != NULL);
ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
&icmp->icmp_v6src);
icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
icmp->icmp_ip_snd_options_len;
icmp->icmp_bound_v6src = icmp->icmp_v6src;
} else {
int error;
ASSERT(sin6 != NULL);
ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
icmp->icmp_v6src = sin6->sin6_addr;
icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
icmp->icmp_bound_v6src = icmp->icmp_v6src;
/* Rebuild the header template */
error = icmp_build_hdrs(icmp);
if (error != 0) {
icmp->icmp_pending_op = -1;
/*
* TSYSERR
*/
goto done;
}
}
ire_mp = NULL;
if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
/*
* request an IRE if src not 0 (INADDR_ANY)
*/
ire_mp = allocb(sizeof (ire_t), BPRI_HI);
if (ire_mp == NULL) {
icmp->icmp_pending_op = -1;
error = ENOMEM;
goto done;
}
DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
}
done:
rw_exit(&icmp->icmp_rwlock);
if (error != 0)
return (error);
if (icmp->icmp_family == AF_INET6) {
error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
&sin6->sin6_addr, sin6->sin6_port, B_TRUE);
} else {
error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
sin->sin_addr.s_addr, sin->sin_port, B_TRUE);
}
rawip_post_ip_bind_connect(icmp, ire_mp, error);
return (error);
}
static void
rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error)
{
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state == TS_UNBND) {
/*
* not yet bound - bind sent by icmp_bind_proto.
*/
rw_exit(&icmp->icmp_rwlock);
return;
}
ASSERT(icmp->icmp_pending_op != -1);
icmp->icmp_pending_op = -1;
if (error != 0) {
if (icmp->icmp_state == TS_DATA_XFER) {
/* Connect failed */
/* Revert back to the bound source */
icmp->icmp_v6src = icmp->icmp_bound_v6src;
icmp->icmp_state = TS_IDLE;
if (icmp->icmp_family == AF_INET6)
(void) icmp_build_hdrs(icmp);
} else {
V6_SET_ZERO(icmp->icmp_v6src);
V6_SET_ZERO(icmp->icmp_bound_v6src);
icmp->icmp_state = TS_UNBND;
if (icmp->icmp_family == AF_INET6)
(void) icmp_build_hdrs(icmp);
}
} else {
if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
ire_t *ire;
ire = (ire_t *)ire_mp->b_rptr;
/*
* If a broadcast/multicast address was bound set
* the source address to 0.
* This ensures no datagrams with broadcast address
* as source address are emitted (which would violate
* RFC1122 - Hosts requirements)
* Note: we get IRE_BROADCAST for IPv6
* to "mark" a multicast local address.
*/
if (ire->ire_type == IRE_BROADCAST &&
icmp->icmp_state != TS_DATA_XFER) {
/*
* This was just a local bind to a
* MC/broadcast addr
*/
V6_SET_ZERO(icmp->icmp_v6src);
if (icmp->icmp_family == AF_INET6)
(void) icmp_build_hdrs(icmp);
}
}
}
rw_exit(&icmp->icmp_rwlock);
if (ire_mp != NULL)
freeb(ire_mp);
}
/*
* Send message to IP to just bind to the protocol.
*/
static int
icmp_bind_proto(conn_t *connp)
{
icmp_t *icmp;
int error;
icmp = connp->conn_icmp;
if (icmp->icmp_family == AF_INET6)
error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto,
&sin6_null.sin6_addr, 0, B_TRUE);
else
error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto,
sin_null.sin_addr.s_addr, 0, B_TRUE);
rawip_post_ip_bind_connect(icmp, NULL, error);
return (error);
}
static void
icmp_tpi_connect(queue_t *q, mblk_t *mp)
{
conn_t *connp = Q_TO_CONN(q);
struct T_conn_req *tcr;
icmp_t *icmp;
struct sockaddr *sa;
socklen_t len;
int error;
cred_t *cr;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
cr = msg_getcred(mp, NULL);
ASSERT(cr != NULL);
if (cr == NULL) {
icmp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
icmp = connp->conn_icmp;
tcr = (struct T_conn_req *)mp->b_rptr;
/* Sanity checks */
if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
icmp_err_ack(q, mp, TPROTO, 0);
return;
}
if (tcr->OPT_length != 0) {
icmp_err_ack(q, mp, TBADOPT, 0);
return;
}
len = tcr->DEST_length;
switch (len) {
default:
icmp_err_ack(q, mp, TBADADDR, 0);
return;
case sizeof (sin_t):
sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
sizeof (sin_t));
break;
case sizeof (sin6_t):
sa = (struct sockaddr *)mi_offset_param(mp,
tcr->DEST_offset, sizeof (sin6_t));
break;
}
error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
if (error != 0) {
icmp_err_ack(q, mp, TSYSERR, error);
return;
}
error = rawip_do_connect(connp, sa, len, cr);
if (error != 0) {
if (error < 0) {
icmp_err_ack(q, mp, -error, 0);
} else {
icmp_err_ack(q, mp, 0, error);
}
} else {
mblk_t *mp1;
/*
* We have to send a connection confirmation to
* keep TLI happy.
*/
if (icmp->icmp_family == AF_INET) {
mp1 = mi_tpi_conn_con(NULL, (char *)sa,
sizeof (sin_t), NULL, 0);
} else {
ASSERT(icmp->icmp_family == AF_INET6);
mp1 = mi_tpi_conn_con(NULL, (char *)sa,
sizeof (sin6_t), NULL, 0);
}
if (mp1 == NULL) {
icmp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
/*
* Send ok_ack for T_CONN_REQ
*/
mp = mi_tpi_ok_ack_alloc(mp);
if (mp == NULL) {
/* Unable to reuse the T_CONN_REQ for the ack. */
icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
return;
}
putnext(connp->conn_rq, mp);
putnext(connp->conn_rq, mp1);
}
}
static int
rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
cred_t *cr)
{
icmp_t *icmp;
sin_t *sin;
sin6_t *sin6;
mblk_t *ire_mp;
int error;
ipaddr_t v4dst;
in6_addr_t v6dst;
icmp = connp->conn_icmp;
if (sa == NULL || !OK_32PTR((char *)sa)) {
return (EINVAL);
}
ire_mp = allocb(sizeof (ire_t), BPRI_HI);
if (ire_mp == NULL)
return (ENOMEM);
DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
ASSERT(sa != NULL && len != 0);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
rw_exit(&icmp->icmp_rwlock);
freeb(ire_mp);
return (-TOUTSTATE);
}
switch (len) {
case sizeof (sin_t):
sin = (sin_t *)sa;
ASSERT(icmp->icmp_family == AF_INET);
ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
v4dst = sin->sin_addr.s_addr;
/*
* Interpret a zero destination to mean loopback.
* Update the T_CONN_REQ (sin/sin6) since it is used to
* generate the T_CONN_CON.
*/
if (v4dst == INADDR_ANY) {
v4dst = htonl(INADDR_LOOPBACK);
}
IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
icmp->icmp_ip_snd_options_len;
icmp->icmp_v6dst.sin6_addr = v6dst;
icmp->icmp_v6dst.sin6_family = AF_INET6;
icmp->icmp_v6dst.sin6_flowinfo = 0;
icmp->icmp_v6dst.sin6_port = 0;
/*
* If the destination address is multicast and
* an outgoing multicast interface has been set,
* use the address of that interface as our
* source address if no source address has been set.
*/
if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY &&
CLASSD(v4dst) &&
icmp->icmp_multicast_if_addr != INADDR_ANY) {
IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
&icmp->icmp_v6src);
}
break;
case sizeof (sin6_t):
sin6 = (sin6_t *)sa;
/* No support for mapped addresses on raw sockets */
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
rw_exit(&icmp->icmp_rwlock);
freeb(ire_mp);
return (EADDRNOTAVAIL);
}
ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
ASSERT(icmp->icmp_family == AF_INET6);
icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
icmp->icmp_v6dst = *sin6;
icmp->icmp_v6dst.sin6_port = 0;
/*
* Interpret a zero destination to mean loopback.
* Update the T_CONN_REQ (sin/sin6) since it is used to
* generate the T_CONN_CON.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) {
icmp->icmp_v6dst.sin6_addr = ipv6_loopback;
}
/*
* If the destination address is multicast and
* an outgoing multicast interface has been set,
* then the ip bind logic will pick the correct source
* address (i.e. matching the outgoing multicast interface).
*/
break;
}
icmp->icmp_pending_op = T_CONN_REQ;
if (icmp->icmp_state == TS_DATA_XFER) {
/* Already connected - clear out state */
icmp->icmp_v6src = icmp->icmp_bound_v6src;
icmp->icmp_state = TS_IDLE;
}
icmp->icmp_state = TS_DATA_XFER;
rw_exit(&icmp->icmp_rwlock);
if (icmp->icmp_family == AF_INET6) {
error = ip_proto_bind_connected_v6(connp, &ire_mp,
icmp->icmp_proto, &icmp->icmp_v6src, 0,
&icmp->icmp_v6dst.sin6_addr,
NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr);
} else {
error = ip_proto_bind_connected_v4(connp, &ire_mp,
icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0,
V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port,
B_TRUE, B_TRUE, cr);
}
rawip_post_ip_bind_connect(icmp, ire_mp, error);
return (error);
}
static void
icmp_close_free(conn_t *connp)
{
icmp_t *icmp = connp->conn_icmp;
/* If there are any options associated with the stream, free them. */
if (icmp->icmp_ip_snd_options != NULL) {
mi_free((char *)icmp->icmp_ip_snd_options);
icmp->icmp_ip_snd_options = NULL;
icmp->icmp_ip_snd_options_len = 0;
}
if (icmp->icmp_filter != NULL) {
kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
icmp->icmp_filter = NULL;
}
/* Free memory associated with sticky options */
if (icmp->icmp_sticky_hdrs_len != 0) {
kmem_free(icmp->icmp_sticky_hdrs,
icmp->icmp_sticky_hdrs_len);
icmp->icmp_sticky_hdrs = NULL;
icmp->icmp_sticky_hdrs_len = 0;
}
if (icmp->icmp_last_cred != NULL) {
crfree(icmp->icmp_last_cred);
icmp->icmp_last_cred = NULL;
}
if (icmp->icmp_effective_cred != NULL) {
crfree(icmp->icmp_effective_cred);
icmp->icmp_effective_cred = NULL;
}
ip6_pkt_free(&icmp->icmp_sticky_ipp);
/*
* Clear any fields which the kmem_cache constructor clears.
* Only icmp_connp needs to be preserved.
* TBD: We should make this more efficient to avoid clearing
* everything.
*/
ASSERT(icmp->icmp_connp == connp);
bzero(icmp, sizeof (icmp_t));
icmp->icmp_connp = connp;
}
static int
rawip_do_close(conn_t *connp)
{
ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
ip_quiesce_conn(connp);
if (!IPCL_IS_NONSTR(connp)) {
qprocsoff(connp->conn_rq);
}
ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL &&
connp->conn_icmp->icmp_fallback_queue_tail == NULL);
icmp_close_free(connp);
/*
* Now we are truly single threaded on this stream, and can
* delete the things hanging off the connp, and finally the connp.
* We removed this connp from the fanout list, it cannot be
* accessed thru the fanouts, and we already waited for the
* conn_ref to drop to 0. We are already in close, so
* there cannot be any other thread from the top. qprocsoff
* has completed, and service has completed or won't run in
* future.
*/
ASSERT(connp->conn_ref == 1);
if (!IPCL_IS_NONSTR(connp)) {
inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
} else {
ip_free_helper_stream(connp);
}
connp->conn_ref--;
ipcl_conn_destroy(connp);
return (0);
}
static int
icmp_close(queue_t *q, int flags)
{
conn_t *connp;
if (flags & SO_FALLBACK) {
/*
* stream is being closed while in fallback
* simply free the resources that were allocated
*/
inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
qprocsoff(q);
goto done;
}
connp = Q_TO_CONN(q);
(void) rawip_do_close(connp);
done:
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
/*
* This routine handles each T_DISCON_REQ message passed to icmp
* as an indicating that ICMP is no longer connected. This results
* in sending a T_BIND_REQ to IP to restore the binding to just
* the local address.
*
* The disconnect completes in rawip_post_ip_bind_connect.
*/
static int
icmp_do_disconnect(conn_t *connp)
{
icmp_t *icmp;
mblk_t *ire_mp;
int error;
icmp = connp->conn_icmp;
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
rw_exit(&icmp->icmp_rwlock);
return (-TOUTSTATE);
}
icmp->icmp_pending_op = T_DISCON_REQ;
icmp->icmp_v6src = icmp->icmp_bound_v6src;
icmp->icmp_state = TS_IDLE;
if (icmp->icmp_family == AF_INET6) {
/* Rebuild the header template */
error = icmp_build_hdrs(icmp);
if (error != 0) {
icmp->icmp_pending_op = -1;
rw_exit(&icmp->icmp_rwlock);
return (error);
}
}
rw_exit(&icmp->icmp_rwlock);
ire_mp = allocb(sizeof (ire_t), BPRI_HI);
if (ire_mp == NULL) {
return (ENOMEM);
}
if (icmp->icmp_family == AF_INET6) {
error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
&icmp->icmp_bound_v6src, 0, B_TRUE);
} else {
error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE);
}
rawip_post_ip_bind_connect(icmp, ire_mp, error);
return (error);
}
static void
icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
{
conn_t *connp = Q_TO_CONN(q);
int error;
/*
* Allocate the largest primitive we need to send back
* T_error_ack is > than T_ok_ack
*/
mp = reallocb(mp, sizeof (struct T_error_ack), 1);
if (mp == NULL) {
/* Unable to reuse the T_DISCON_REQ for the ack. */
icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
return;
}
error = icmp_do_disconnect(connp);
if (error != 0) {
if (error > 0) {
icmp_err_ack(q, mp, 0, error);
} else {
icmp_err_ack(q, mp, -error, 0);
}
} else {
mp = mi_tpi_ok_ack_alloc(mp);
ASSERT(mp != NULL);
qreply(q, mp);
}
}
static int
icmp_disconnect(conn_t *connp)
{
int error;
icmp_t *icmp = connp->conn_icmp;
icmp->icmp_dgram_errind = B_FALSE;
error = icmp_do_disconnect(connp);
if (error < 0)
error = proto_tlitosyserr(-error);
return (error);
}
/* This routine creates a T_ERROR_ACK message and passes it upstream. */
static void
icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
{
if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
qreply(q, mp);
}
/* Shorthand to generate and send TPI error acks to our client */
static void
icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
t_scalar_t t_error, int sys_error)
{
struct T_error_ack *teackp;
if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
M_PCPROTO, T_ERROR_ACK)) != NULL) {
teackp = (struct T_error_ack *)mp->b_rptr;
teackp->ERROR_prim = primitive;
teackp->TLI_error = t_error;
teackp->UNIX_error = sys_error;
qreply(q, mp);
}
}
/*
* icmp_icmp_error is called by icmp_input to process ICMP
* messages passed up by IP.
* Generates the appropriate permanent (non-transient) errors.
* Assumes that IP has pulled up everything up to and including
* the ICMP header.
*/
static void
icmp_icmp_error(conn_t *connp, mblk_t *mp)
{
icmph_t *icmph;
ipha_t *ipha;
int iph_hdr_length;
sin_t sin;
mblk_t *mp1;
int error = 0;
icmp_t *icmp = connp->conn_icmp;
ipha = (ipha_t *)mp->b_rptr;
ASSERT(OK_32PTR(mp->b_rptr));
if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
icmp_icmp_error_ipv6(connp, mp);
return;
}
/*
* icmp does not support v4 mapped addresses
* so we can never be here for a V6 socket
* i.e. icmp_family == AF_INET6
*/
ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) &&
(icmp->icmp_family == AF_INET));
ASSERT(icmp->icmp_family == AF_INET);
/* Skip past the outer IP and ICMP headers */
iph_hdr_length = IPH_HDR_LENGTH(ipha);
icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]);
ipha = (ipha_t *)&icmph[1];
iph_hdr_length = IPH_HDR_LENGTH(ipha);
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
switch (icmph->icmph_code) {
case ICMP_FRAGMENTATION_NEEDED:
/*
* IP has already adjusted the path MTU.
*/
break;
case ICMP_PORT_UNREACHABLE:
case ICMP_PROTOCOL_UNREACHABLE:
error = ECONNREFUSED;
break;
default:
/* Transient errors */
break;
}
break;
default:
/* Transient errors */
break;
}
if (error == 0) {
freemsg(mp);
return;
}
/*
* Deliver T_UDERROR_IND when the application has asked for it.
* The socket layer enables this automatically when connected.
*/
if (!icmp->icmp_dgram_errind) {
freemsg(mp);
return;
}
sin = sin_null;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ipha->ipha_dst;
if (IPCL_IS_NONSTR(connp)) {
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state == TS_DATA_XFER) {
if (sin.sin_addr.s_addr ==
V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) {
rw_exit(&icmp->icmp_rwlock);
(*connp->conn_upcalls->su_set_error)
(connp->conn_upper_handle, error);
goto done;
}
} else {
icmp->icmp_delayed_error = error;
*((sin_t *)&icmp->icmp_delayed_addr) = sin;
}
rw_exit(&icmp->icmp_rwlock);
} else {
mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL,
0, error);
if (mp1 != NULL)
putnext(connp->conn_rq, mp1);
}
done:
ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock));
freemsg(mp);
}
/*
* icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6
* for IPv6 packets.
* Send permanent (non-transient) errors upstream.
* Assumes that IP has pulled up all the extension headers as well
* as the ICMPv6 header.
*/
static void
icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
{
icmp6_t *icmp6;
ip6_t *ip6h, *outer_ip6h;
uint16_t iph_hdr_length;
uint8_t *nexthdrp;
sin6_t sin6;
mblk_t *mp1;
int error = 0;
icmp_t *icmp = connp->conn_icmp;
outer_ip6h = (ip6_t *)mp->b_rptr;
if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
else
iph_hdr_length = IPV6_HDR_LEN;
icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
ip6h = (ip6_t *)&icmp6[1];
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
freemsg(mp);
return;
}
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
switch (icmp6->icmp6_code) {
case ICMP6_DST_UNREACH_NOPORT:
error = ECONNREFUSED;
break;
case ICMP6_DST_UNREACH_ADMIN:
case ICMP6_DST_UNREACH_NOROUTE:
case ICMP6_DST_UNREACH_BEYONDSCOPE:
case ICMP6_DST_UNREACH_ADDR:
/* Transient errors */
break;
default:
break;
}
break;
case ICMP6_PACKET_TOO_BIG: {
struct T_unitdata_ind *tudi;
struct T_opthdr *toh;
size_t udi_size;
mblk_t *newmp;
t_scalar_t opt_length = sizeof (struct T_opthdr) +
sizeof (struct ip6_mtuinfo);
sin6_t *sin6;
struct ip6_mtuinfo *mtuinfo;
/*
* If the application has requested to receive path mtu
* information, send up an empty message containing an
* IPV6_PATHMTU ancillary data item.
*/
if (!icmp->icmp_ipv6_recvpathmtu)
break;
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
opt_length;
if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
break;
}
/*
* newmp->b_cont is left to NULL on purpose. This is an
* empty message containing only ancillary data.
*/
newmp->b_datap->db_type = M_PROTO;
tudi = (struct T_unitdata_ind *)newmp->b_rptr;
newmp->b_wptr = (uchar_t *)tudi + udi_size;
tudi->PRIM_type = T_UNITDATA_IND;
tudi->SRC_length = sizeof (sin6_t);
tudi->SRC_offset = sizeof (struct T_unitdata_ind);
tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
tudi->OPT_length = opt_length;
sin6 = (sin6_t *)&tudi[1];
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr;
toh = (struct T_opthdr *)&sin6[1];
toh->level = IPPROTO_IPV6;
toh->name = IPV6_PATHMTU;
toh->len = opt_length;
toh->status = 0;
mtuinfo = (struct ip6_mtuinfo *)&toh[1];
bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
mtuinfo->ip6m_addr.sin6_family = AF_INET6;
mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
/*
* We've consumed everything we need from the original
* message. Free it, then send our empty message.
*/
freemsg(mp);
icmp_ulp_recv(connp, newmp);
return;
}
case ICMP6_TIME_EXCEEDED:
/* Transient errors */
break;
case ICMP6_PARAM_PROB:
/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
(uchar_t *)ip6h + icmp6->icmp6_pptr ==
(uchar_t *)nexthdrp) {
error = ECONNREFUSED;
break;
}
break;
}
if (error == 0) {
freemsg(mp);
return;
}
/*
* Deliver T_UDERROR_IND when the application has asked for it.
* The socket layer enables this automatically when connected.
*/
if (!icmp->icmp_dgram_errind) {
freemsg(mp);
return;
}
sin6 = sin6_null;
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ip6h->ip6_dst;
sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
if (IPCL_IS_NONSTR(connp)) {
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state == TS_DATA_XFER) {
if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
&icmp->icmp_v6dst.sin6_addr)) {
rw_exit(&icmp->icmp_rwlock);
(*connp->conn_upcalls->su_set_error)
(connp->conn_upper_handle, error);
goto done;
}
} else {
icmp->icmp_delayed_error = error;
*((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
}
rw_exit(&icmp->icmp_rwlock);
} else {
mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
NULL, 0, error);
if (mp1 != NULL)
putnext(connp->conn_rq, mp1);
}
done:
ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock));
freemsg(mp);
}
/*
* This routine responds to T_ADDR_REQ messages. It is called by icmp_wput.
* The local address is filled in if endpoint is bound. The remote address
* is filled in if remote address has been precified ("connected endpoint")
* (The concept of connected CLTS sockets is alien to published TPI
* but we support it anyway).
*/
static void
icmp_addr_req(queue_t *q, mblk_t *mp)
{
icmp_t *icmp = Q_TO_ICMP(q);
mblk_t *ackmp;
struct T_addr_ack *taa;
/* Make it large enough for worst case */
ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
2 * sizeof (sin6_t), 1);
if (ackmp == NULL) {
icmp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
taa = (struct T_addr_ack *)ackmp->b_rptr;
bzero(taa, sizeof (struct T_addr_ack));
ackmp->b_wptr = (uchar_t *)&taa[1];
taa->PRIM_type = T_ADDR_ACK;
ackmp->b_datap->db_type = M_PCPROTO;
rw_enter(&icmp->icmp_rwlock, RW_READER);
/*
* Note: Following code assumes 32 bit alignment of basic
* data structures like sin_t and struct T_addr_ack.
*/
if (icmp->icmp_state != TS_UNBND) {
/*
* Fill in local address
*/
taa->LOCADDR_offset = sizeof (*taa);
if (icmp->icmp_family == AF_INET) {
sin_t *sin;
taa->LOCADDR_length = sizeof (sin_t);
sin = (sin_t *)&taa[1];
/* Fill zeroes and then intialize non-zero fields */
*sin = sin_null;
sin->sin_family = AF_INET;
if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src,
sin->sin_addr.s_addr);
} else {
/*
* INADDR_ANY
* icmp_v6src is not set, we might be bound to
* broadcast/multicast. Use icmp_bound_v6src as
* local address instead (that could
* also still be INADDR_ANY)
*/
IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src,
sin->sin_addr.s_addr);
}
ackmp->b_wptr = (uchar_t *)&sin[1];
} else {
sin6_t *sin6;
ASSERT(icmp->icmp_family == AF_INET6);
taa->LOCADDR_length = sizeof (sin6_t);
sin6 = (sin6_t *)&taa[1];
/* Fill zeroes and then intialize non-zero fields */
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
sin6->sin6_addr = icmp->icmp_v6src;
} else {
/*
* UNSPECIFIED
* icmp_v6src is not set, we might be bound to
* broadcast/multicast. Use icmp_bound_v6src as
* local address instead (that could
* also still be UNSPECIFIED)
*/
sin6->sin6_addr = icmp->icmp_bound_v6src;
}
ackmp->b_wptr = (uchar_t *)&sin6[1];
}
}
rw_exit(&icmp->icmp_rwlock);
ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
qreply(q, ackmp);
}
static void
icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
{
*tap = icmp_g_t_info_ack;
if (icmp->icmp_family == AF_INET6)
tap->ADDR_size = sizeof (sin6_t);
else
tap->ADDR_size = sizeof (sin_t);
tap->CURRENT_state = icmp->icmp_state;
tap->OPT_size = icmp_max_optsize;
}
static void
icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
t_uscalar_t cap_bits1)
{
tcap->CAP_bits1 = 0;
if (cap_bits1 & TC1_INFO) {
icmp_copy_info(&tcap->INFO_ack, icmp);
tcap->CAP_bits1 |= TC1_INFO;
}
}
/*
* This routine responds to T_CAPABILITY_REQ messages. It is called by
* icmp_wput. Much of the T_CAPABILITY_ACK information is copied from
* icmp_g_t_info_ack. The current state of the stream is copied from
* icmp_state.
*/
static void
icmp_capability_req(queue_t *q, mblk_t *mp)
{
icmp_t *icmp = Q_TO_ICMP(q);
t_uscalar_t cap_bits1;
struct T_capability_ack *tcap;
cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
mp->b_datap->db_type, T_CAPABILITY_ACK);
if (!mp)
return;
tcap = (struct T_capability_ack *)mp->b_rptr;
icmp_do_capability_ack(icmp, tcap, cap_bits1);
qreply(q, mp);
}
/*
* This routine responds to T_INFO_REQ messages. It is called by icmp_wput.
* Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
* The current state of the stream is copied from icmp_state.
*/
static void
icmp_info_req(queue_t *q, mblk_t *mp)
{
icmp_t *icmp = Q_TO_ICMP(q);
mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
T_INFO_ACK);
if (!mp)
return;
icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
qreply(q, mp);
}
/* For /dev/icmp aka AF_INET open */
static int
icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
int family)
{
conn_t *connp;
dev_t conn_dev;
icmp_stack_t *is;
int error;
conn_dev = NULL;
/* If the stream is already open, return immediately. */
if (q->q_ptr != NULL)
return (0);
if (sflag == MODOPEN)
return (EINVAL);
/*
* Since ICMP is not used so heavily, allocating from the small
* arena should be sufficient.
*/
if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
return (EBUSY);
}
if (flag & SO_FALLBACK) {
/*
* Non streams socket needs a stream to fallback to
*/
RD(q)->q_ptr = (void *)conn_dev;
WR(q)->q_qinfo = &icmp_fallback_sock_winit;
WR(q)->q_ptr = (void *)ip_minor_arena_sa;
qprocson(q);
return (0);
}
connp = icmp_open(family, credp, &error, KM_SLEEP);
if (connp == NULL) {
ASSERT(error != NULL);
inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
return (error);
}
*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
connp->conn_dev = conn_dev;
connp->conn_minor_arena = ip_minor_arena_sa;
is = connp->conn_icmp->icmp_is;
/*
* Initialize the icmp_t structure for this stream.
*/
q->q_ptr = connp;
WR(q)->q_ptr = connp;
connp->conn_rq = q;
connp->conn_wq = WR(q);
if (connp->conn_icmp->icmp_family == AF_INET6) {
/* Build initial header template for transmit */
rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) {
rw_exit(&connp->conn_icmp->icmp_rwlock);
inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
ipcl_conn_destroy(connp);
return (error);
}
rw_exit(&connp->conn_icmp->icmp_rwlock);
}
q->q_hiwat = is->is_recv_hiwat;
WR(q)->q_hiwat = is->is_xmit_hiwat;
WR(q)->q_lowat = is->is_xmit_lowat;
qprocson(q);
/* Set the Stream head write offset. */
(void) proto_set_tx_wroff(q, connp,
connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra);
(void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat);
mutex_enter(&connp->conn_lock);
connp->conn_state_flags &= ~CONN_INCIPIENT;
mutex_exit(&connp->conn_lock);
return (0);
}
/* For /dev/icmp4 aka AF_INET open */
static int
icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
}
/* For /dev/icmp6 aka AF_INET6 open */
static int
icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
}
/*
* This is the open routine for icmp. It allocates a icmp_t structure for
* the stream and, on the first open of the module, creates an ND table.
*/
/* ARGSUSED */
static conn_t *
icmp_open(int family, cred_t *credp, int *err, int flags)
{
icmp_t *icmp;
conn_t *connp;
zoneid_t zoneid;
netstack_t *ns;
icmp_stack_t *is;
boolean_t isv6 = B_FALSE;
*err = secpolicy_net_icmpaccess(credp);
if (*err != 0)
return (NULL);
if (family == AF_INET6)
isv6 = B_TRUE;
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
is = ns->netstack_icmp;
ASSERT(is != NULL);
/*
* For exclusive stacks we set the zoneid to zero
* to make ICMP operate as if in the global zone.
*/
if (ns->netstack_stackid != GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
else
zoneid = crgetzoneid(credp);
ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
icmp = connp->conn_icmp;
icmp->icmp_v6dst = sin6_null;
/*
* ipcl_conn_create did a netstack_hold. Undo the hold that was
* done by netstack_find_by_cred()
*/
netstack_rele(ns);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
ASSERT(connp->conn_ulp == IPPROTO_ICMP);
ASSERT(connp->conn_icmp == icmp);
ASSERT(icmp->icmp_connp == connp);
/* Set the initial state of the stream and the privilege status. */
icmp->icmp_state = TS_UNBND;
if (isv6) {
icmp->icmp_ipversion = IPV6_VERSION;
icmp->icmp_family = AF_INET6;
connp->conn_ulp = IPPROTO_ICMPV6;
/* May be changed by a SO_PROTOTYPE socket option. */
icmp->icmp_proto = IPPROTO_ICMPV6;
icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */
icmp->icmp_max_hdr_len = IPV6_HDR_LEN;
icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit;
connp->conn_af_isv6 = B_TRUE;
connp->conn_flags |= IPCL_ISV6;
} else {
icmp->icmp_ipversion = IPV4_VERSION;
icmp->icmp_family = AF_INET;
/* May be changed by a SO_PROTOTYPE socket option. */
icmp->icmp_proto = IPPROTO_ICMP;
icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH;
icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl;
connp->conn_af_isv6 = B_FALSE;
connp->conn_flags &= ~IPCL_ISV6;
}
icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
icmp->icmp_pending_op = -1;
connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
connp->conn_zoneid = zoneid;
/*
* If the caller has the process-wide flag set, then default to MAC
* exempt mode. This allows read-down to unlabeled hosts.
*/
if (getpflags(NET_MAC_AWARE, credp) != 0)
connp->conn_mac_exempt = B_TRUE;
connp->conn_ulp_labeled = is_system_labeled();
icmp->icmp_is = is;
connp->conn_recv = icmp_input;
crhold(credp);
connp->conn_cred = credp;
rw_exit(&icmp->icmp_rwlock);
connp->conn_flow_cntrld = B_FALSE;
return (connp);
}
/*
* Which ICMP options OK to set through T_UNITDATA_REQ...
*/
/* ARGSUSED */
static boolean_t
icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
{
return (B_TRUE);
}
/*
* This routine gets default values of certain options whose default
* values are maintained by protcol specific code
*/
/* ARGSUSED */
int
icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
{
icmp_t *icmp = Q_TO_ICMP(q);
icmp_stack_t *is = icmp->icmp_is;
int *i1 = (int *)ptr;
switch (level) {
case IPPROTO_IP:
switch (name) {
case IP_MULTICAST_TTL:
*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
return (sizeof (uchar_t));
case IP_MULTICAST_LOOP:
*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
return (sizeof (uchar_t));
}
break;
case IPPROTO_IPV6:
switch (name) {
case IPV6_MULTICAST_HOPS:
*i1 = IP_DEFAULT_MULTICAST_TTL;
return (sizeof (int));
case IPV6_MULTICAST_LOOP:
*i1 = IP_DEFAULT_MULTICAST_LOOP;
return (sizeof (int));
case IPV6_UNICAST_HOPS:
*i1 = is->is_ipv6_hoplimit;
return (sizeof (int));
}
break;
case IPPROTO_ICMPV6:
switch (name) {
case ICMP6_FILTER:
/* Make it look like "pass all" */
ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
return (sizeof (icmp6_filter_t));
}
break;
}
return (-1);
}
/*
* This routine retrieves the current status of socket options.
* It returns the size of the option retrieved.
*/
int
icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
{
icmp_t *icmp = connp->conn_icmp;
icmp_stack_t *is = icmp->icmp_is;
int *i1 = (int *)ptr;
ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
int ret = 0;
ASSERT(RW_READ_HELD(&icmp->icmp_rwlock));
switch (level) {
case SOL_SOCKET:
switch (name) {
case SO_DEBUG:
*i1 = icmp->icmp_debug;
break;
case SO_TYPE:
*i1 = SOCK_RAW;
break;
case SO_PROTOTYPE:
*i1 = icmp->icmp_proto;
break;
case SO_REUSEADDR:
*i1 = icmp->icmp_reuseaddr;
break;
/*
* The following three items are available here,
* but are only meaningful to IP.
*/
case SO_DONTROUTE:
*i1 = icmp->icmp_dontroute;
break;
case SO_USELOOPBACK:
*i1 = icmp->icmp_useloopback;
break;
case SO_BROADCAST:
*i1 = icmp->icmp_broadcast;
break;
case SO_SNDBUF:
ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX);
*i1 = icmp->icmp_xmit_hiwat;
break;
case SO_RCVBUF:
ASSERT(icmp->icmp_recv_hiwat <= INT_MAX);
*i1 = icmp->icmp_recv_hiwat;
break;
case SO_DGRAM_ERRIND:
*i1 = icmp->icmp_dgram_errind;
break;
case SO_TIMESTAMP:
*i1 = icmp->icmp_timestamp;
break;
case SO_MAC_EXEMPT:
*i1 = connp->conn_mac_exempt;
break;
case SO_DOMAIN:
*i1 = icmp->icmp_family;
break;
/*
* Following four not meaningful for icmp
* Action is same as "default" to which we fallthrough
* so we keep them in comments.
* case SO_LINGER:
* case SO_KEEPALIVE:
* case SO_OOBINLINE:
* case SO_ALLZONES:
*/
default:
ret = -1;
goto done;
}
break;
case IPPROTO_IP:
/*
* Only allow IPv4 option processing on IPv4 sockets.
*/
if (icmp->icmp_family != AF_INET) {
ret = -1;
goto done;
}
switch (name) {
case IP_OPTIONS:
case T_IP_OPTIONS:
/* Options are passed up with each packet */
ret = 0;
goto done;
case IP_HDRINCL:
*i1 = (int)icmp->icmp_hdrincl;
break;
case IP_TOS:
case T_IP_TOS:
*i1 = (int)icmp->icmp_type_of_service;
break;
case IP_TTL:
*i1 = (int)icmp->icmp_ttl;
break;
case IP_MULTICAST_IF:
/* 0 address if not set */
*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
ret = sizeof (ipaddr_t);
goto done;
case IP_MULTICAST_TTL:
*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
ret = sizeof (uchar_t);
goto done;
case IP_MULTICAST_LOOP:
*ptr = connp->conn_multicast_loop;
ret = sizeof (uint8_t);
goto done;
case IP_BOUND_IF:
/* Zero if not set */
*i1 = icmp->icmp_bound_if;
break; /* goto sizeof (int) option return */
case IP_UNSPEC_SRC:
*ptr = icmp->icmp_unspec_source;
break; /* goto sizeof (int) option return */
case IP_RECVIF:
*ptr = icmp->icmp_recvif;
break; /* goto sizeof (int) option return */
case IP_BROADCAST_TTL:
*(uchar_t *)ptr = connp->conn_broadcast_ttl;
return (sizeof (uchar_t));
case IP_RECVPKTINFO:
/*
* This also handles IP_PKTINFO.
* IP_PKTINFO and IP_RECVPKTINFO have the same value.
* Differentiation is based on the size of the argument
* passed in.
* This option is handled in IP which will return an
* error for IP_PKTINFO as it's not supported as a
* sticky option.
*/
ret = -EINVAL;
goto done;
/*
* Cannot "get" the value of following options
* at this level. Action is same as "default" to
* which we fallthrough so we keep them in comments.
*
* case IP_ADD_MEMBERSHIP:
* case IP_DROP_MEMBERSHIP:
* case IP_BLOCK_SOURCE:
* case IP_UNBLOCK_SOURCE:
* case IP_ADD_SOURCE_MEMBERSHIP:
* case IP_DROP_SOURCE_MEMBERSHIP:
* case MCAST_JOIN_GROUP:
* case MCAST_LEAVE_GROUP:
* case MCAST_BLOCK_SOURCE:
* case MCAST_UNBLOCK_SOURCE:
* case MCAST_JOIN_SOURCE_GROUP:
* case MCAST_LEAVE_SOURCE_GROUP:
* case MRT_INIT:
* case MRT_DONE:
* case MRT_ADD_VIF:
* case MRT_DEL_VIF:
* case MRT_ADD_MFC:
* case MRT_DEL_MFC:
* case MRT_VERSION:
* case MRT_ASSERT:
* case IP_SEC_OPT:
* case IP_NEXTHOP:
*/
default:
ret = -1;
goto done;
}
break;
case IPPROTO_IPV6:
/*
* Only allow IPv6 option processing on native IPv6 sockets.
*/
if (icmp->icmp_family != AF_INET6) {
ret = -1;
goto done;
}
switch (name) {
case IPV6_UNICAST_HOPS:
*i1 = (unsigned int)icmp->icmp_ttl;
break;
case IPV6_MULTICAST_IF:
/* 0 index if not set */
*i1 = icmp->icmp_multicast_if_index;
break;
case IPV6_MULTICAST_HOPS:
*i1 = icmp->icmp_multicast_ttl;
break;
case IPV6_MULTICAST_LOOP:
*i1 = connp->conn_multicast_loop;
break;
case IPV6_BOUND_IF:
/* Zero if not set */
*i1 = icmp->icmp_bound_if;
break;
case IPV6_UNSPEC_SRC:
*i1 = icmp->icmp_unspec_source;
break;
case IPV6_CHECKSUM:
/*
* Return offset or -1 if no checksum offset.
* Does not apply to IPPROTO_ICMPV6
*/
if (icmp->icmp_proto == IPPROTO_ICMPV6) {
ret = -1;
goto done;
}
if (icmp->icmp_raw_checksum) {
*i1 = icmp->icmp_checksum_off;
} else {
*i1 = -1;
}
break;
case IPV6_JOIN_GROUP:
case IPV6_LEAVE_GROUP:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
/* cannot "get" the value for these */
ret = -1;
goto done;
case IPV6_RECVPKTINFO:
*i1 = icmp->icmp_ip_recvpktinfo;
break;
case IPV6_RECVTCLASS:
*i1 = icmp->icmp_ipv6_recvtclass;
break;
case IPV6_RECVPATHMTU:
*i1 = icmp->icmp_ipv6_recvpathmtu;
break;
case IPV6_V6ONLY:
*i1 = 1;
break;
case IPV6_RECVHOPLIMIT:
*i1 = icmp->icmp_ipv6_recvhoplimit;
break;
case IPV6_RECVHOPOPTS:
*i1 = icmp->icmp_ipv6_recvhopopts;
break;
case IPV6_RECVDSTOPTS:
*i1 = icmp->icmp_ipv6_recvdstopts;
break;
case _OLD_IPV6_RECVDSTOPTS:
*i1 = icmp->icmp_old_ipv6_recvdstopts;
break;
case IPV6_RECVRTHDRDSTOPTS:
*i1 = icmp->icmp_ipv6_recvrtdstopts;
break;
case IPV6_RECVRTHDR:
*i1 = icmp->icmp_ipv6_recvrthdr;
break;
case IPV6_PKTINFO: {
/* XXX assumes that caller has room for max size! */
struct in6_pktinfo *pkti;
pkti = (struct in6_pktinfo *)ptr;
if (ipp->ipp_fields & IPPF_IFINDEX)
pkti->ipi6_ifindex = ipp->ipp_ifindex;
else
pkti->ipi6_ifindex = 0;
if (ipp->ipp_fields & IPPF_ADDR)
pkti->ipi6_addr = ipp->ipp_addr;
else
pkti->ipi6_addr = ipv6_all_zeros;
ret = sizeof (struct in6_pktinfo);
goto done;
}
case IPV6_NEXTHOP: {
sin6_t *sin6 = (sin6_t *)ptr;
if (!(ipp->ipp_fields & IPPF_NEXTHOP))
return (0);
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
sin6->sin6_addr = ipp->ipp_nexthop;
ret = (sizeof (sin6_t));
goto done;
}
case IPV6_HOPOPTS:
if (!(ipp->ipp_fields & IPPF_HOPOPTS))
return (0);
if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6)
return (0);
bcopy((char *)ipp->ipp_hopopts +
icmp->icmp_label_len_v6, ptr,
ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
if (icmp->icmp_label_len_v6 > 0) {
ptr[0] = ((char *)ipp->ipp_hopopts)[0];
ptr[1] = (ipp->ipp_hopoptslen -
icmp->icmp_label_len_v6 + 7) / 8 - 1;
}
ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
goto done;
case IPV6_RTHDRDSTOPTS:
if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
return (0);
bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
ret = ipp->ipp_rtdstoptslen;
goto done;
case IPV6_RTHDR:
if (!(ipp->ipp_fields & IPPF_RTHDR))
return (0);
bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
ret = ipp->ipp_rthdrlen;
goto done;
case IPV6_DSTOPTS:
if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
ret = 0;
goto done;
}
bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
ret = ipp->ipp_dstoptslen;
goto done;
case IPV6_PATHMTU:
if (!(ipp->ipp_fields & IPPF_PATHMTU)) {
ret = 0;
} else {
ret = ip_fill_mtuinfo(
&icmp->icmp_v6dst.sin6_addr, 0,
(struct ip6_mtuinfo *)ptr,
is->is_netstack);
}
goto done;
case IPV6_TCLASS:
if (ipp->ipp_fields & IPPF_TCLASS)
*i1 = ipp->ipp_tclass;
else
*i1 = IPV6_FLOW_TCLASS(
IPV6_DEFAULT_VERS_AND_FLOW);
break;
default:
ret = -1;
goto done;
}
break;
case IPPROTO_ICMPV6:
/*
* Only allow IPv6 option processing on native IPv6 sockets.
*/
if (icmp->icmp_family != AF_INET6) {
ret = -1;
}
if (icmp->icmp_proto != IPPROTO_ICMPV6) {
ret = -1;
}
switch (name) {
case ICMP6_FILTER:
if (icmp->icmp_filter == NULL) {
/* Make it look like "pass all" */
ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
} else {
(void) bcopy(icmp->icmp_filter, ptr,
sizeof (icmp6_filter_t));
}
ret = sizeof (icmp6_filter_t);
goto done;
default:
ret = -1;
goto done;
}
default:
ret = -1;
goto done;
}
ret = sizeof (int);
done:
return (ret);
}
/*
* This routine retrieves the current status of socket options.
* It returns the size of the option retrieved.
*/
int
icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
{
conn_t *connp = Q_TO_CONN(q);
icmp_t *icmp = connp->conn_icmp;
int err;
rw_enter(&icmp->icmp_rwlock, RW_READER);
err = icmp_opt_get(connp, level, name, ptr);
rw_exit(&icmp->icmp_rwlock);
return (err);
}
int
icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
void *thisdg_attrs, boolean_t checkonly)
{
int *i1 = (int *)invalp;
boolean_t onoff = (*i1 == 0) ? 0 : 1;
icmp_t *icmp = connp->conn_icmp;
icmp_stack_t *is = icmp->icmp_is;
int error;
ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
/*
* For fixed length options, no sanity check
* of passed in length is done. It is assumed *_optcom_req()
* routines do the right thing.
*/
switch (level) {
case SOL_SOCKET:
switch (name) {
case SO_DEBUG:
if (!checkonly)
icmp->icmp_debug = onoff;
break;
case SO_PROTOTYPE:
if ((*i1 & 0xFF) != IPPROTO_ICMP &&
(*i1 & 0xFF) != IPPROTO_ICMPV6 &&
secpolicy_net_rawaccess(cr) != 0) {
*outlenp = 0;
return (EACCES);
}
/* Can't use IPPROTO_RAW with IPv6 */
if ((*i1 & 0xFF) == IPPROTO_RAW &&
icmp->icmp_family == AF_INET6) {
*outlenp = 0;
return (EPROTONOSUPPORT);
}
if (checkonly) {
/* T_CHECK case */
*(int *)outvalp = (*i1 & 0xFF);
break;
}
icmp->icmp_proto = *i1 & 0xFF;
if ((icmp->icmp_proto == IPPROTO_RAW ||
icmp->icmp_proto == IPPROTO_IGMP) &&
icmp->icmp_family == AF_INET)
icmp->icmp_hdrincl = 1;
else
icmp->icmp_hdrincl = 0;
if (icmp->icmp_family == AF_INET6 &&
icmp->icmp_proto == IPPROTO_ICMPV6) {
/* Set offset for icmp6_cksum */
icmp->icmp_raw_checksum = 0;
icmp->icmp_checksum_off = 2;
}
if (icmp->icmp_proto == IPPROTO_UDP ||
icmp->icmp_proto == IPPROTO_TCP ||
icmp->icmp_proto == IPPROTO_SCTP) {
icmp->icmp_no_tp_cksum = 1;
icmp->icmp_sticky_ipp.ipp_fields |=
IPPF_NO_CKSUM;
} else {
icmp->icmp_no_tp_cksum = 0;
icmp->icmp_sticky_ipp.ipp_fields &=
~IPPF_NO_CKSUM;
}
if (icmp->icmp_filter != NULL &&
icmp->icmp_proto != IPPROTO_ICMPV6) {
kmem_free(icmp->icmp_filter,
sizeof (icmp6_filter_t));
icmp->icmp_filter = NULL;
}
/* Rebuild the header template */
error = icmp_build_hdrs(icmp);
if (error != 0) {
*outlenp = 0;
return (error);
}
/*
* For SCTP, we don't use icmp_bind_proto() for
* raw socket binding. Note that we do not need
* to set *outlenp.
* FIXME: how does SCTP work?
*/
if (icmp->icmp_proto == IPPROTO_SCTP)
return (0);
*outlenp = sizeof (int);
*(int *)outvalp = *i1 & 0xFF;
/* Drop lock across the bind operation */
rw_exit(&icmp->icmp_rwlock);
(void) icmp_bind_proto(connp);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
return (0);
case SO_REUSEADDR:
if (!checkonly) {
icmp->icmp_reuseaddr = onoff;
PASS_OPT_TO_IP(connp);
}
break;
/*
* The following three items are available here,
* but are only meaningful to IP.
*/
case SO_DONTROUTE:
if (!checkonly) {
icmp->icmp_dontroute = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case SO_USELOOPBACK:
if (!checkonly) {
icmp->icmp_useloopback = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case SO_BROADCAST:
if (!checkonly) {
icmp->icmp_broadcast = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case SO_SNDBUF:
if (*i1 > is->is_max_buf) {
*outlenp = 0;
return (ENOBUFS);
}
if (!checkonly) {
if (!IPCL_IS_NONSTR(connp)) {
connp->conn_wq->q_hiwat = *i1;
}
icmp->icmp_xmit_hiwat = *i1;
}
break;
case SO_RCVBUF:
if (*i1 > is->is_max_buf) {
*outlenp = 0;
return (ENOBUFS);
}
if (!checkonly) {
icmp->icmp_recv_hiwat = *i1;
rw_exit(&icmp->icmp_rwlock);
(void) proto_set_rx_hiwat(connp->conn_rq, connp,
*i1);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
}
break;
case SO_DGRAM_ERRIND:
if (!checkonly)
icmp->icmp_dgram_errind = onoff;
break;
case SO_ALLZONES:
/*
* "soft" error (negative)
* option not handled at this level
* Note: Do not modify *outlenp
*/
return (-EINVAL);
case SO_TIMESTAMP:
if (!checkonly) {
icmp->icmp_timestamp = onoff;
}
break;
case SO_MAC_EXEMPT:
/*
* "soft" error (negative)
* option not handled at this level
* Note: Do not modify *outlenp
*/
return (-EINVAL);
case SO_RCVTIMEO:
case SO_SNDTIMEO:
/*
* Pass these two options in order for third part
* protocol usage. Here just return directly.
*/
return (0);
/*
* Following three not meaningful for icmp
* Action is same as "default" so we keep them
* in comments.
* case SO_LINGER:
* case SO_KEEPALIVE:
* case SO_OOBINLINE:
*/
default:
*outlenp = 0;
return (EINVAL);
}
break;
case IPPROTO_IP:
/*
* Only allow IPv4 option processing on IPv4 sockets.
*/
if (icmp->icmp_family != AF_INET) {
*outlenp = 0;
return (ENOPROTOOPT);
}
switch (name) {
case IP_OPTIONS:
case T_IP_OPTIONS:
/* Save options for use by IP. */
if ((inlen & 0x3) ||
inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) {
*outlenp = 0;
return (EINVAL);
}
if (checkonly)
break;
if (!tsol_option_set(&icmp->icmp_ip_snd_options,
&icmp->icmp_ip_snd_options_len,
icmp->icmp_label_len, invalp, inlen)) {
*outlenp = 0;
return (ENOMEM);
}
icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
icmp->icmp_ip_snd_options_len;
rw_exit(&icmp->icmp_rwlock);
(void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL:
RD(connp->conn_rq), connp,
icmp->icmp_max_hdr_len + is->is_wroff_extra);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
break;
case IP_HDRINCL:
if (!checkonly)
icmp->icmp_hdrincl = onoff;
break;
case IP_TOS:
case T_IP_TOS:
if (!checkonly) {
icmp->icmp_type_of_service = (uint8_t)*i1;
}
break;
case IP_TTL:
if (!checkonly) {
icmp->icmp_ttl = (uint8_t)*i1;
}
break;
case IP_MULTICAST_IF:
/*
* TODO should check OPTMGMT reply and undo this if
* there is an error.
*/
if (!checkonly) {
icmp->icmp_multicast_if_addr = *i1;
PASS_OPT_TO_IP(connp);
}
break;
case IP_MULTICAST_TTL:
if (!checkonly)
icmp->icmp_multicast_ttl = *invalp;
break;
case IP_MULTICAST_LOOP:
if (!checkonly) {
connp->conn_multicast_loop =
(*invalp == 0) ? 0 : 1;
PASS_OPT_TO_IP(connp);
}
break;
case IP_BOUND_IF:
if (!checkonly) {
icmp->icmp_bound_if = *i1;
PASS_OPT_TO_IP(connp);
}
break;
case IP_UNSPEC_SRC:
if (!checkonly) {
icmp->icmp_unspec_source = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IP_BROADCAST_TTL:
if (!checkonly)
connp->conn_broadcast_ttl = *invalp;
break;
case IP_RECVIF:
if (!checkonly) {
icmp->icmp_recvif = onoff;
}
/*
* pass to ip
*/
return (-EINVAL);
case IP_PKTINFO: {
/*
* This also handles IP_RECVPKTINFO.
* IP_PKTINFO and IP_RECVPKTINFO have the same value.
* Differentiation is based on the size of the argument
* passed in.
*/
struct in_pktinfo *pktinfop;
ip4_pkt_t *attr_pktinfop;
if (checkonly)
break;
if (inlen == sizeof (int)) {
/*
* This is IP_RECVPKTINFO option.
* Keep a local copy of wether this option is
* set or not and pass it down to IP for
* processing.
*/
icmp->icmp_ip_recvpktinfo = onoff;
return (-EINVAL);
}
if (inlen != sizeof (struct in_pktinfo)) {
return (EINVAL);
}
if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
== NULL) {
/*
* sticky option is not supported
*/
return (EINVAL);
}
pktinfop = (struct in_pktinfo *)invalp;
/*
* Atleast one of the values should be specified
*/
if (pktinfop->ipi_ifindex == 0 &&
pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
return (EINVAL);
}
attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
}
break;
case IP_ADD_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
case IP_BLOCK_SOURCE:
case IP_UNBLOCK_SOURCE:
case IP_ADD_SOURCE_MEMBERSHIP:
case IP_DROP_SOURCE_MEMBERSHIP:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MRT_INIT:
case MRT_DONE:
case MRT_ADD_VIF:
case MRT_DEL_VIF:
case MRT_ADD_MFC:
case MRT_DEL_MFC:
case MRT_VERSION:
case MRT_ASSERT:
case IP_SEC_OPT:
case IP_NEXTHOP:
/*
* "soft" error (negative)
* option not handled at this level
* Note: Do not modify *outlenp
*/
return (-EINVAL);
default:
*outlenp = 0;
return (EINVAL);
}
break;
case IPPROTO_IPV6: {
ip6_pkt_t *ipp;
boolean_t sticky;
if (icmp->icmp_family != AF_INET6) {
*outlenp = 0;
return (ENOPROTOOPT);
}
/*
* Deal with both sticky options and ancillary data
*/
if (thisdg_attrs == NULL) {
/* sticky options, or none */
ipp = &icmp->icmp_sticky_ipp;
sticky = B_TRUE;
} else {
/* ancillary data */
ipp = (ip6_pkt_t *)thisdg_attrs;
sticky = B_FALSE;
}
switch (name) {
case IPV6_MULTICAST_IF:
if (!checkonly) {
icmp->icmp_multicast_if_index = *i1;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_UNICAST_HOPS:
/* -1 means use default */
if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
*outlenp = 0;
return (EINVAL);
}
if (!checkonly) {
if (*i1 == -1) {
icmp->icmp_ttl = ipp->ipp_unicast_hops =
is->is_ipv6_hoplimit;
ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
/* Pass modified value to IP. */
*i1 = ipp->ipp_hoplimit;
} else {
icmp->icmp_ttl = ipp->ipp_unicast_hops =
(uint8_t)*i1;
ipp->ipp_fields |= IPPF_UNICAST_HOPS;
}
/* Rebuild the header template */
error = icmp_build_hdrs(icmp);
if (error != 0) {
*outlenp = 0;
return (error);
}
}
break;
case IPV6_MULTICAST_HOPS:
/* -1 means use default */
if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
*outlenp = 0;
return (EINVAL);
}
if (!checkonly) {
if (*i1 == -1) {
icmp->icmp_multicast_ttl =
ipp->ipp_multicast_hops =
IP_DEFAULT_MULTICAST_TTL;
ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
/* Pass modified value to IP. */
*i1 = icmp->icmp_multicast_ttl;
} else {
icmp->icmp_multicast_ttl =
ipp->ipp_multicast_hops =
(uint8_t)*i1;
ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
}
}
break;
case IPV6_MULTICAST_LOOP:
if (*i1 != 0 && *i1 != 1) {
*outlenp = 0;
return (EINVAL);
}
if (!checkonly) {
connp->conn_multicast_loop = *i1;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_CHECKSUM:
/*
* Integer offset into the user data of where the
* checksum is located.
* Offset of -1 disables option.
* Does not apply to IPPROTO_ICMPV6.
*/
if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) {
*outlenp = 0;
return (EINVAL);
}
if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
/* Negative or not 16 bit aligned offset */
*outlenp = 0;
return (EINVAL);
}
if (checkonly)
break;
if (*i1 == -1) {
icmp->icmp_raw_checksum = 0;
ipp->ipp_fields &= ~IPPF_RAW_CKSUM;
} else {
icmp->icmp_raw_checksum = 1;
icmp->icmp_checksum_off = *i1;
ipp->ipp_fields |= IPPF_RAW_CKSUM;
}
/* Rebuild the header template */
error = icmp_build_hdrs(icmp);
if (error != 0) {
*outlenp = 0;
return (error);
}
break;
case IPV6_JOIN_GROUP:
case IPV6_LEAVE_GROUP:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
/*
* "soft" error (negative)
* option not handled at this level
* Note: Do not modify *outlenp
*/
return (-EINVAL);
case IPV6_BOUND_IF:
if (!checkonly) {
icmp->icmp_bound_if = *i1;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_UNSPEC_SRC:
if (!checkonly) {
icmp->icmp_unspec_source = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVTCLASS:
if (!checkonly) {
icmp->icmp_ipv6_recvtclass = onoff;
PASS_OPT_TO_IP(connp);
}
break;
/*
* Set boolean switches for ancillary data delivery
*/
case IPV6_RECVPKTINFO:
if (!checkonly) {
icmp->icmp_ip_recvpktinfo = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVPATHMTU:
if (!checkonly) {
icmp->icmp_ipv6_recvpathmtu = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVHOPLIMIT:
if (!checkonly) {
icmp->icmp_ipv6_recvhoplimit = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVHOPOPTS:
if (!checkonly) {
icmp->icmp_ipv6_recvhopopts = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVDSTOPTS:
if (!checkonly) {
icmp->icmp_ipv6_recvdstopts = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case _OLD_IPV6_RECVDSTOPTS:
if (!checkonly)
icmp->icmp_old_ipv6_recvdstopts = onoff;
break;
case IPV6_RECVRTHDRDSTOPTS:
if (!checkonly) {
icmp->icmp_ipv6_recvrtdstopts = onoff;
PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVRTHDR:
if (!checkonly) {
icmp->icmp_ipv6_recvrthdr = onoff;
PASS_OPT_TO_IP(connp);
}
break;
/*
* Set sticky options or ancillary data.
* If sticky options, (re)build any extension headers
* that might be needed as a result.
*/
case IPV6_PKTINFO:
/*
* The source address and ifindex are verified
* in ip_opt_set(). For ancillary data the
* source address is checked in ip_wput_v6.
*/
if (inlen != 0 && inlen !=
sizeof (struct in6_pktinfo)) {
return (EINVAL);
}
if (checkonly)
break;
if (inlen == 0) {
ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
ipp->ipp_sticky_ignored |=
(IPPF_IFINDEX|IPPF_ADDR);
} else {
struct in6_pktinfo *pkti;