blob: 2cc88ffeb14883d40a1ab56fd9796c0dcde8d676 [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/sysmacros.h>
#include <sys/strsun.h>
#include <sys/strlog.h>
#include <sys/strsubr.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/kobj.h>
#include <sys/zone.h>
#include <sys/neti.h>
#include <sys/hook.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/vtrace.h>
#include <sys/isa_defs.h>
#include <sys/atomic.h>
#include <sys/policy.h>
#include <sys/mac.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/if_dl.h>
#include <sys/sockio.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/sctp.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/optcom.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/arp.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <inet/ip6_asp.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/udp_impl.h>
#include <inet/ipp_common.h>
#include <inet/ip_multi.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/ip_rts.h>
#include <inet/ip_ndp.h>
#include <net/pfkeyv2.h>
#include <inet/sadb.h>
#include <inet/ipsec_impl.h>
#include <inet/iptun/iptun_impl.h>
#include <inet/sctp_ip.h>
#include <sys/pattr.h>
#include <inet/ipclassifier.h>
#include <inet/ipsecah.h>
#include <inet/rawip_impl.h>
#include <inet/rts_impl.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
/* Temporary; for CR 6451644 work-around */
#include <sys/ethernet.h>
/*
* Naming conventions:
* These rules should be judiciously applied
* if there is a need to identify something as IPv6 versus IPv4
* IPv6 funcions will end with _v6 in the ip module.
* IPv6 funcions will end with _ipv6 in the transport modules.
* IPv6 macros:
* Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
* Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
* And then there are ..V4_PART_OF_V6.
* The intent is that macros in the ip module end with _V6.
* IPv6 global variables will start with ipv6_
* IPv6 structures will start with ipv6
* IPv6 defined constants should start with IPV6_
* (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
*/
/*
* ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
* We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
* from IANA. This mechanism will remain in effect until an official
* number is obtained.
*/
uchar_t ip6opt_ls;
const in6_addr_t ipv6_all_ones =
{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_solicited_node_mcast =
{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_solicited_node_mcast =
{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
#endif /* _BIG_ENDIAN */
static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
ip_recv_attr_t *);
static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
ip_recv_attr_t *);
static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
in6_addr_t *, ip_recv_attr_t *);
static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
ip_recv_attr_t *);
static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
/*
* icmp_inbound_v6 deals with ICMP messages that are handled by IP.
* If the ICMP message is consumed by IP, i.e., it should not be delivered
* to any IPPROTO_ICMP raw sockets, then it returns NULL.
* Likewise, if the ICMP error is misformed (too short, etc), then it
* returns NULL. The caller uses this to determine whether or not to send
* to raw sockets.
*
* All error messages are passed to the matching transport stream.
*
* See comment for icmp_inbound_v4() on how IPsec is handled.
*/
mblk_t *
icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
{
icmp6_t *icmp6;
ip6_t *ip6h; /* Outer header */
int ip_hdr_length; /* Outer header length */
boolean_t interested;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
mblk_t *mp_ret = NULL;
ip6h = (ip6_t *)mp->b_rptr;
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
/* Make sure ira_l2src is set for ndp_input */
if (!(ira->ira_flags & IRAF_L2SRC_SET))
ip_setl2src(mp, ira, ira->ira_rill);
ip_hdr_length = ira->ira_ip_hdr_length;
if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
freemsg(mp);
return (NULL);
}
ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
if (ip6h == NULL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(mp);
return (NULL);
}
}
icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
icmp6->icmp6_code));
/*
* We will set "interested" to "true" if we should pass a copy to
* the transport i.e., if it is an error message.
*/
interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
break;
case ICMP6_TIME_EXCEEDED:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
break;
case ICMP6_PARAM_PROB:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
break;
case ICMP6_PACKET_TOO_BIG:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
break;
case ICMP6_ECHO_REQUEST:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
!ipst->ips_ipv6_resp_echo_mcast)
break;
/*
* We must have exclusive use of the mblk to convert it to
* a response.
* If not, we copy it.
*/
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards - copymsg",
mp, ill);
freemsg(mp);
return (NULL);
}
freemsg(mp);
mp = mp1;
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
}
icmp6->icmp6_type = ICMP6_ECHO_REPLY;
icmp_send_reply_v6(mp, ip6h, icmp6, ira);
return (NULL);
case ICMP6_ECHO_REPLY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
break;
case ND_ROUTER_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
break;
case ND_ROUTER_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
break;
case ND_NEIGHBOR_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
ndp_input(mp, ira);
return (NULL);
case ND_NEIGHBOR_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInNeighborAdvertisements);
ndp_input(mp, ira);
return (NULL);
case ND_REDIRECT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
if (ipst->ips_ipv6_ignore_redirect)
break;
/* We now allow a RAW socket to receive this. */
interested = B_TRUE;
break;
/*
* The next three icmp messages will be handled by MLD.
* Pass all valid MLD packets up to any process(es)
* listening on a raw ICMP socket.
*/
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
case MLD_LISTENER_REDUCTION:
mp = mld_input(mp, ira);
return (mp);
default:
break;
}
/*
* See if there is an ICMP client to avoid an extra copymsg/freemsg
* if there isn't one.
*/
if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
/* If there is an ICMP client and we want one too, copy it. */
if (!interested) {
/* Caller will deliver to RAW sockets */
return (mp);
}
mp_ret = copymsg(mp);
if (mp_ret == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
}
} else if (!interested) {
/* Neither we nor raw sockets are interested. Drop packet now */
freemsg(mp);
return (NULL);
}
/*
* ICMP error or redirect packet. Make sure we have enough of
* the header and that db_ref == 1 since we might end up modifying
* the packet.
*/
if (mp->b_cont != NULL) {
if (ip_pullup(mp, -1, ira) == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards - ip_pullup",
mp, ill);
freemsg(mp);
return (mp_ret);
}
}
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
freemsg(mp);
return (mp_ret);
}
freemsg(mp);
mp = mp1;
}
/*
* In case mp has changed, verify the message before any further
* processes.
*/
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
freemsg(mp);
return (mp_ret);
}
switch (icmp6->icmp6_type) {
case ND_REDIRECT:
icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
break;
case ICMP6_PACKET_TOO_BIG:
/* Update DCE and adjust MTU is icmp header if needed */
icmp_inbound_too_big_v6(icmp6, ira);
/* FALLTHRU */
default:
icmp_inbound_error_fanout_v6(mp, icmp6, ira);
break;
}
return (mp_ret);
}
/*
* Send an ICMP echo reply.
* The caller has already updated the payload part of the packet.
* We handle the ICMP checksum, IP source address selection and feed
* the packet into ip_output_simple.
*/
static void
icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
ip_recv_attr_t *ira)
{
uint_t ip_hdr_length = ira->ira_ip_hdr_length;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
ip_xmit_attr_t ixas;
in6_addr_t origsrc;
/*
* Remove any extension headers (do not reverse a source route)
* and clear the flow id (keep traffic class for now).
*/
if (ip_hdr_length != IPV6_HDR_LEN) {
int i;
for (i = 0; i < IPV6_HDR_LEN; i++) {
mp->b_rptr[ip_hdr_length - i - 1] =
mp->b_rptr[IPV6_HDR_LEN - i - 1];
}
mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
ip6h = (ip6_t *)mp->b_rptr;
ip6h->ip6_nxt = IPPROTO_ICMPV6;
i = ntohs(ip6h->ip6_plen);
i -= (ip_hdr_length - IPV6_HDR_LEN);
ip6h->ip6_plen = htons(i);
ip_hdr_length = IPV6_HDR_LEN;
ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
}
ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
/* Reverse the source and destination addresses. */
origsrc = ip6h->ip6_src;
ip6h->ip6_src = ip6h->ip6_dst;
ip6h->ip6_dst = origsrc;
/* set the hop limit */
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_output
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
bzero(&ixas, sizeof (ixas));
ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
ixas.ixa_zoneid = ira->ira_zoneid;
ixas.ixa_cred = kcred;
ixas.ixa_cpid = NOPID;
ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
ixas.ixa_ifindex = 0;
ixas.ixa_ipst = ipst;
ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
/*
* This packet should go out the same way as it
* came in i.e in clear, independent of the IPsec
* policy for transmitting packets.
*/
ixas.ixa_flags |= IXAF_NO_IPSEC;
} else {
if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
/* Note: mp already consumed and ip_drop_packet done */
return;
}
}
/* Was the destination (now source) link-local? Send out same group */
if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
ixas.ixa_flags |= IXAF_SCOPEID_SET;
if (IS_UNDER_IPMP(ill))
ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
else
ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
}
if (ira->ira_flags & IRAF_MULTIBROADCAST) {
/*
* Not one or our addresses (IRE_LOCALs), thus we let
* ip_output_simple pick the source.
*/
ip6h->ip6_src = ipv6_all_zeros;
ixas.ixa_flags |= IXAF_SET_SOURCE;
}
/* Should we send using dce_pmtu? */
if (ipst->ips_ipv6_icmp_return_pmtu)
ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
(void) ip_output_simple(mp, &ixas);
ixa_cleanup(&ixas);
}
/*
* Verify the ICMP messages for either for ICMP error or redirect packet.
* The caller should have fully pulled up the message. If it's a redirect
* packet, only basic checks on IP header will be done; otherwise, verify
* the packet by looking at the included ULP header.
*
* Called before icmp_inbound_error_fanout_v6 is called.
*/
static boolean_t
icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
{
ill_t *ill = ira->ira_ill;
uint16_t hdr_length;
uint8_t *nexthdrp;
uint8_t nexthdr;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *connp;
ip6_t *ip6h; /* Inner header */
ip6h = (ip6_t *)&icmp6[1];
if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
goto truncated;
if (icmp6->icmp6_type == ND_REDIRECT) {
hdr_length = sizeof (nd_redirect_t);
} else {
if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
goto discard_pkt;
hdr_length = IPV6_HDR_LEN;
}
if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
goto truncated;
/*
* Stop here for ICMP_REDIRECT.
*/
if (icmp6->icmp6_type == ND_REDIRECT)
return (B_TRUE);
/*
* ICMP errors only.
*/
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
goto discard_pkt;
nexthdr = *nexthdrp;
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP:
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* transport header.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr)
goto truncated;
break;
case IPPROTO_TCP: {
tcpha_t *tcpha;
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* transport header.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr)
goto truncated;
tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
/*
* With IPMP we need to match across group, which we do
* since we have the upper ill from ira_ill.
*/
connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
ill->ill_phyint->phyint_ifindex, ipst);
if (connp == NULL)
goto discard_pkt;
if ((connp->conn_verifyicmp != NULL) &&
!connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
CONN_DEC_REF(connp);
goto discard_pkt;
}
CONN_DEC_REF(connp);
break;
}
case IPPROTO_SCTP:
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* transport header.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr)
goto truncated;
break;
case IPPROTO_ESP:
case IPPROTO_AH:
break;
case IPPROTO_ENCAP:
case IPPROTO_IPV6: {
/* Look for self-encapsulated packets that caused an error */
ip6_t *in_ip6h;
in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
goto truncated;
break;
}
default:
break;
}
return (B_TRUE);
discard_pkt:
/* Bogus ICMP error. */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return (B_FALSE);
truncated:
/* We pulled up everthing already. Must be truncated */
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
return (B_FALSE);
}
/*
* Process received IPv6 ICMP Packet too big.
* The caller is responsible for validating the packet before passing it in
* and also to fanout the ICMP error to any matching transport conns. Assumes
* the message has been fully pulled up.
*
* Before getting here, the caller has called icmp_inbound_verify_v6()
* that should have verified with ULP to prevent undoing the changes we're
* going to make to DCE. For example, TCP might have verified that the packet
* which generated error is in the send window.
*
* In some cases modified this MTU in the ICMP header packet; the caller
* should pass to the matching ULP after this returns.
*/
static void
icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
{
uint32_t mtu;
dce_t *dce;
ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */
ip_stack_t *ipst = ill->ill_ipst;
int old_max_frag;
in6_addr_t final_dst;
ip6_t *ip6h; /* Inner IP header */
/* Caller has already pulled up everything. */
ip6h = (ip6_t *)&icmp6[1];
final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
/*
* For link local destinations matching simply on address is not
* sufficient. Same link local addresses for different ILL's is
* possible.
*/
if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
dce = dce_lookup_and_add_v6(&final_dst,
ill->ill_phyint->phyint_ifindex, ipst);
} else {
dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
}
if (dce == NULL) {
/* Couldn't add a unique one - ENOMEM */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no dce for dst %s\n", AF_INET6,
&final_dst);
}
return;
}
mtu = ntohl(icmp6->icmp6_mtu);
mutex_enter(&dce->dce_lock);
if (dce->dce_flags & DCEF_PMTU)
old_max_frag = dce->dce_pmtu;
else
old_max_frag = ill->ill_mtu;
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6 "
"min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is received,
* we must include a fragment header in
* subsequent packets.
*/
dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
} else {
dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
}
ip1dbg(("Received mtu from router: %d\n", mtu));
dce->dce_pmtu = MIN(old_max_frag, mtu);
/* Prepare to send the new max frag size for the ULP. */
if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
/*
* If we need a fragment header in every packet
* (above case or multirouting), make sure the
* ULP takes it into account when computing the
* payload size.
*/
icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
} else {
icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
}
/* We now have a PMTU for sure */
dce->dce_flags |= DCEF_PMTU;
dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
mutex_exit(&dce->dce_lock);
/*
* After dropping the lock the new value is visible to everyone.
* Then we bump the generation number so any cached values reinspect
* the dce_t.
*/
dce_increment_generation(dce);
dce_refrele(dce);
}
/*
* Fanout received ICMPv6 error packets to the transports.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*
* The caller must have called icmp_inbound_verify_v6.
*/
void
icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
{
uint16_t *up; /* Pointer to ports in ULP header */
uint32_t ports; /* reversed ports for fanout */
ip6_t rip6h; /* With reversed addresses */
ip6_t *ip6h; /* Inner IP header */
uint16_t hdr_length; /* Inner IP header length */
uint8_t *nexthdrp;
uint8_t nexthdr;
tcpha_t *tcpha;
conn_t *connp;
ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */
ip_stack_t *ipst = ill->ill_ipst;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
/* Caller has already pulled up everything. */
ip6h = (ip6_t *)&icmp6[1];
ASSERT(mp->b_cont == NULL);
ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
goto drop_pkt;
nexthdr = *nexthdrp;
ira->ira_protocol = nexthdr;
/*
* We need a separate IP header with the source and destination
* addresses reversed to do fanout/classification because the ip6h in
* the ICMPv6 error is in the form we sent it out.
*/
rip6h.ip6_src = ip6h->ip6_dst;
rip6h.ip6_dst = ip6h->ip6_src;
rip6h.ip6_nxt = nexthdr;
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP: {
/* Attempt to find a client stream based on port. */
up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
/* Note that we send error to all matches. */
ira->ira_flags |= IRAF_ICMP_ERROR;
ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
ira->ira_flags &= ~IRAF_ICMP_ERROR;
return;
}
case IPPROTO_TCP: {
/*
* Attempt to find a client stream based on port.
* Note that we do a reverse lookup since the header is
* in the form we sent it out.
*/
tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
/*
* With IPMP we need to match across group, which we do
* since we have the upper ill from ira_ill.
*/
connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
if (connp == NULL) {
goto drop_pkt;
}
if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
(ira->ira_flags & IRAF_IPSEC_SECURE)) {
mp = ipsec_check_inbound_policy(mp, connp,
NULL, ip6h, ira);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
/* Note that mp is NULL */
ip_drop_input("ipIfStatsInDiscards", mp, ill);
CONN_DEC_REF(connp);
return;
}
}
ira->ira_flags |= IRAF_ICMP_ERROR;
if (IPCL_IS_TCP(connp)) {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
connp->conn_recvicmp, connp, ira, SQ_FILL,
SQTAG_TCP6_INPUT_ICMP_ERR);
} else {
/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
ill_t *rill = ira->ira_rill;
ira->ira_ill = ira->ira_rill = NULL;
(connp->conn_recv)(connp, mp, NULL, ira);
CONN_DEC_REF(connp);
ira->ira_ill = ill;
ira->ira_rill = rill;
}
ira->ira_flags &= ~IRAF_ICMP_ERROR;
return;
}
case IPPROTO_SCTP:
up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
/* Find a SCTP client stream for this packet. */
((uint16_t *)&ports)[0] = up[1];
((uint16_t *)&ports)[1] = up[0];
ira->ira_flags |= IRAF_ICMP_ERROR;
ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
ira->ira_flags &= ~IRAF_ICMP_ERROR;
return;
case IPPROTO_ESP:
case IPPROTO_AH:
if (!ipsec_loaded(ipss)) {
ip_proto_not_sup(mp, ira);
return;
}
if (nexthdr == IPPROTO_ESP)
mp = ipsecesp_icmp_error(mp, ira);
else
mp = ipsecah_icmp_error(mp, ira);
if (mp == NULL)
return;
/* Just in case ipsec didn't preserve the NULL b_cont */
if (mp->b_cont != NULL) {
if (!pullupmsg(mp, -1))
goto drop_pkt;
}
/*
* If succesful, the mp has been modified to not include
* the ESP/AH header so we can fanout to the ULP's icmp
* error handler.
*/
if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
goto drop_pkt;
ip6h = (ip6_t *)mp->b_rptr;
/* Don't call hdr_length_v6() unless you have to. */
if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
hdr_length = ip_hdr_length_v6(mp, ip6h);
else
hdr_length = IPV6_HDR_LEN;
/* Verify the modified message before any further processes. */
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
freemsg(mp);
return;
}
icmp_inbound_error_fanout_v6(mp, icmp6, ira);
return;
case IPPROTO_IPV6: {
/* Look for self-encapsulated packets that caused an error */
ip6_t *in_ip6h;
in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
/*
* Self-encapsulated case. As in the ipv4 case,
* we need to strip the 2nd IP header. Since mp
* is already pulled-up, we can simply bcopy
* the 3rd header + data over the 2nd header.
*/
uint16_t unused_len;
/*
* Make sure we don't do recursion more than once.
*/
if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
&unused_len, &nexthdrp) ||
*nexthdrp == IPPROTO_IPV6) {
goto drop_pkt;
}
/*
* Copy the 3rd header + remaining data on top
* of the 2nd header.
*/
bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
/*
* Subtract length of the 2nd header.
*/
mp->b_wptr -= hdr_length;
ip6h = (ip6_t *)mp->b_rptr;
/* Don't call hdr_length_v6() unless you have to. */
if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
hdr_length = ip_hdr_length_v6(mp, ip6h);
else
hdr_length = IPV6_HDR_LEN;
/*
* Verify the modified message before any further
* processes.
*/
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
freemsg(mp);
return;
}
/*
* Now recurse, and see what I _really_ should be
* doing here.
*/
icmp_inbound_error_fanout_v6(mp, icmp6, ira);
return;
}
/* FALLTHRU */
}
case IPPROTO_ENCAP:
if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
&rip6h.ip6_dst, ipst)) != NULL) {
ira->ira_flags |= IRAF_ICMP_ERROR;
connp->conn_recvicmp(connp, mp, NULL, ira);
CONN_DEC_REF(connp);
ira->ira_flags &= ~IRAF_ICMP_ERROR;
return;
}
/*
* No IP tunnel is interested, fallthrough and see
* if a raw socket will want it.
*/
/* FALLTHRU */
default:
ira->ira_flags |= IRAF_ICMP_ERROR;
ASSERT(ira->ira_protocol == nexthdr);
ip_fanout_proto_v6(mp, &rip6h, ira);
ira->ira_flags &= ~IRAF_ICMP_ERROR;
return;
}
/* NOTREACHED */
drop_pkt:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
freemsg(mp);
}
/*
* Process received IPv6 ICMP Redirect messages.
* Assumes the caller has verified that the headers are in the pulled up mblk.
* Consumes mp.
*/
/* ARGSUSED */
static void
icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
ip_recv_attr_t *ira)
{
ire_t *ire, *nire;
ire_t *prev_ire = NULL;
ire_t *redir_ire;
in6_addr_t *src, *dst, *gateway;
nd_opt_hdr_t *opt;
nce_t *nce;
int ncec_flags = 0;
int err = 0;
boolean_t redirect_to_router = B_FALSE;
int len;
int optlen;
ill_t *ill = ira->ira_rill;
ill_t *rill = ira->ira_rill;
ip_stack_t *ipst = ill->ill_ipst;
/*
* Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
* and make it be the IPMP upper so avoid being confused by a packet
* addressed to a unicast address on a different ill.
*/
if (IS_UNDER_IPMP(rill)) {
rill = ipmp_ill_hold_ipmp_ill(rill);
if (rill == NULL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
mp, ill);
freemsg(mp);
return;
}
ASSERT(rill != ira->ira_rill);
}
len = mp->b_wptr - (uchar_t *)rd;
src = &ip6h->ip6_src;
dst = &rd->nd_rd_dst;
gateway = &rd->nd_rd_target;
/* Verify if it is a valid redirect */
if (!IN6_IS_ADDR_LINKLOCAL(src) ||
(ip6h->ip6_hops != IPV6_MAX_HOPS) ||
(rd->nd_rd_code != 0) ||
(len < sizeof (nd_redirect_t)) ||
(IN6_IS_ADDR_V4MAPPED(dst)) ||
(IN6_IS_ADDR_MULTICAST(dst))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
goto fail_redirect;
}
if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
IN6_ARE_ADDR_EQUAL(gateway, dst))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
mp, ill);
goto fail_redirect;
}
optlen = len - sizeof (nd_redirect_t);
if (optlen != 0) {
if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ip_drop_input("ipv6IfIcmpInBadRedirects - options",
mp, ill);
goto fail_redirect;
}
}
if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
redirect_to_router = B_TRUE;
ncec_flags |= NCE_F_ISROUTER;
} else {
gateway = dst; /* Add nce for dst */
}
/*
* Verify that the IP source address of the redirect is
* the same as the current first-hop router for the specified
* ICMP destination address.
* Also, Make sure we had a route for the dest in question and
* that route was pointing to the old gateway (the source of the
* redirect packet.)
* We do longest match and then compare ire_gateway_addr_v6 below.
*/
prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
/*
* Check that
* the redirect was not from ourselves
* old gateway is still directly reachable
*/
if (prev_ire == NULL ||
(prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
(prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
!IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
goto fail_redirect;
}
ASSERT(prev_ire->ire_ill != NULL);
if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
ncec_flags |= NCE_F_NONUD;
opt = (nd_opt_hdr_t *)&rd[1];
opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
if (opt != NULL) {
err = nce_lookup_then_add_v6(rill,
(uchar_t *)&opt[1], /* Link layer address */
rill->ill_phys_addr_length,
gateway, ncec_flags, ND_STALE, &nce);
switch (err) {
case 0:
nce_refrele(nce);
break;
case EEXIST:
/*
* Check to see if link layer address has changed and
* process the ncec_state accordingly.
*/
nce_process(nce->nce_common,
(uchar_t *)&opt[1], 0, B_FALSE);
nce_refrele(nce);
break;
default:
ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
err));
goto fail_redirect;
}
}
if (redirect_to_router) {
ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
/*
* Create a Route Association. This will allow us to remember
* a router told us to use the particular gateway.
*/
ire = ire_create_v6(
dst,
&ipv6_all_ones, /* mask */
gateway, /* gateway addr */
IRE_HOST,
prev_ire->ire_ill,
ALL_ZONES,
(RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
NULL,
ipst);
} else {
ipif_t *ipif;
in6_addr_t gw;
/*
* Just create an on link entry, i.e. interface route.
* The gateway field is our link-local on the ill.
*/
mutex_enter(&rill->ill_lock);
for (ipif = rill->ill_ipif; ipif != NULL;
ipif = ipif->ipif_next) {
if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
break;
}
if (ipif == NULL) {
/* We have no link-local address! */
mutex_exit(&rill->ill_lock);
goto fail_redirect;
}
gw = ipif->ipif_v6lcl_addr;
mutex_exit(&rill->ill_lock);
ire = ire_create_v6(
dst, /* gateway == dst */
&ipv6_all_ones, /* mask */
&gw, /* gateway addr */
rill->ill_net_type, /* IF_[NO]RESOLVER */
prev_ire->ire_ill,
ALL_ZONES,
(RTF_DYNAMIC | RTF_HOST),
NULL,
ipst);
}
if (ire == NULL)
goto fail_redirect;
nire = ire_add(ire);
/* Check if it was a duplicate entry */
if (nire != NULL && nire != ire) {
ASSERT(nire->ire_identical_ref > 1);
ire_delete(nire);
ire_refrele(nire);
nire = NULL;
}
ire = nire;
if (ire != NULL) {
ire_refrele(ire); /* Held in ire_add */
/* tell routing sockets that we received a redirect */
ip_rts_change_v6(RTM_REDIRECT,
&rd->nd_rd_dst,
&rd->nd_rd_target,
&ipv6_all_ones, 0, src,
(RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
(RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
/*
* Delete any existing IRE_HOST type ires for this destination.
* This together with the added IRE has the effect of
* modifying an existing redirect.
*/
redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
prev_ire->ire_ill, ALL_ZONES, NULL,
(MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
NULL);
if (redir_ire != NULL) {
if (redir_ire->ire_flags & RTF_DYNAMIC)
ire_delete(redir_ire);
ire_refrele(redir_ire);
}
}
ire_refrele(prev_ire);
prev_ire = NULL;
fail_redirect:
if (prev_ire != NULL)
ire_refrele(prev_ire);
freemsg(mp);
if (rill != ira->ira_rill)
ill_refrele(rill);
}
/*
* Build and ship an IPv6 ICMP message using the packet data in mp,
* and the ICMP header pointed to by "stuff". (May be called as
* writer.)
* Note: assumes that icmp_pkt_err_ok_v6 has been called to
* verify that an icmp error packet can be sent.
*
* If v6src_ptr is set use it as a source. Otherwise select a reasonable
* source address (see above function).
*/
static void
icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
{
ip6_t *ip6h;
in6_addr_t v6dst;
size_t len_needed;
size_t msg_len;
mblk_t *mp1;
icmp6_t *icmp6;
in6_addr_t v6src;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
ip_xmit_attr_t ixas;
ip6h = (ip6_t *)mp->b_rptr;
bzero(&ixas, sizeof (ixas));
ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
ixas.ixa_zoneid = ira->ira_zoneid;
ixas.ixa_ifindex = 0;
ixas.ixa_ipst = ipst;
ixas.ixa_cred = kcred;
ixas.ixa_cpid = NOPID;
ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
/*
* If the source of the original packet was link-local, then
* make sure we send on the same ill (group) as we received it on.
*/
if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
ixas.ixa_flags |= IXAF_SCOPEID_SET;
if (IS_UNDER_IPMP(ill))
ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
else
ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
}
if (ira->ira_flags & IRAF_IPSEC_SECURE) {
/*
* Apply IPsec based on how IPsec was applied to
* the packet that had the error.
*
* If it was an outbound packet that caused the ICMP
* error, then the caller will have setup the IRA
* appropriately.
*/
if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
/* Note: mp already consumed and ip_drop_packet done */
return;
}
} else {
/*
* This is in clear. The icmp message we are building
* here should go out in clear, independent of our policy.
*/
ixas.ixa_flags |= IXAF_NO_IPSEC;
}
/*
* If the caller specified the source we use that.
* Otherwise, if the packet was for one of our unicast addresses, make
* sure we respond with that as the source. Otherwise
* have ip_output_simple pick the source address.
*/
if (v6src_ptr != NULL) {
v6src = *v6src_ptr;
} else {
ire_t *ire;
uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
match_flags |= MATCH_IRE_ILL;
ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
(IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
match_flags, 0, ipst, NULL);
if (ire != NULL) {
v6src = ip6h->ip6_dst;
ire_refrele(ire);
} else {
v6src = ipv6_all_zeros;
ixas.ixa_flags |= IXAF_SET_SOURCE;
}
}
v6dst = ip6h->ip6_src;
len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
msg_len = msgdsize(mp);
if (msg_len > len_needed) {
if (!adjmsg(mp, len_needed - msg_len)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(mp);
return;
}
msg_len = len_needed;
}
mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(mp);
return;
}
mp1->b_cont = mp;
mp = mp1;
/*
* Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
* node generates be accepted in peace by all on-host destinations.
* If we do NOT assume that all on-host destinations trust
* self-generated ICMP messages, then rework here, ip6.c, and spd.c.
* (Look for IXAF_TRUSTED_ICMP).
*/
ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
ip6h = (ip6_t *)mp->b_rptr;
mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_nxt = IPPROTO_ICMPV6;
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
ip6h->ip6_dst = v6dst;
ip6h->ip6_src = v6src;
msg_len += IPV6_HDR_LEN + len;
if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
}
ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
icmp6 = (icmp6_t *)&ip6h[1];
bcopy(stuff, (char *)icmp6, len);
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_output_wire_v6.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
if (icmp6->icmp6_type == ND_REDIRECT) {
ip6h->ip6_hops = IPV6_MAX_HOPS;
}
(void) ip_output_simple(mp, &ixas);
ixa_cleanup(&ixas);
}
/*
* Update the output mib when ICMPv6 packets are sent.
*/
void
icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
{
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
break;
case ICMP6_TIME_EXCEEDED:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
break;
case ICMP6_PARAM_PROB:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
break;
case ICMP6_PACKET_TOO_BIG:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
break;
case ICMP6_ECHO_REQUEST:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
break;
case ICMP6_ECHO_REPLY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
break;
case ND_ROUTER_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
break;
case ND_ROUTER_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
break;
case ND_NEIGHBOR_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
break;
case ND_NEIGHBOR_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpOutNeighborAdvertisements);
break;
case ND_REDIRECT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
break;
case MLD_LISTENER_QUERY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
break;
case MLD_LISTENER_REPORT:
case MLD_V2_LISTENER_REPORT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
break;
case MLD_LISTENER_REDUCTION:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
break;
}
}
/*
* Check if it is ok to send an ICMPv6 error packet in
* response to the IP packet in mp.
* Free the message and return null if no
* ICMP error packet should be sent.
*/
static mblk_t *
icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
{
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
boolean_t llbcast;
ip6_t *ip6h;
if (!mp)
return (NULL);
/* We view multicast and broadcast as the same.. */
llbcast = (ira->ira_flags &
(IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
ip6h = (ip6_t *)mp->b_rptr;
/* Check if source address uniquely identifies the host */
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
freemsg(mp);
return (NULL);
}
if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
icmp6_t *icmp6;
if (mp->b_wptr - mp->b_rptr < len_needed) {
if (!pullupmsg(mp, len_needed)) {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInErrors);
freemsg(mp);
return (NULL);
}
ip6h = (ip6_t *)mp->b_rptr;
}
icmp6 = (icmp6_t *)&ip6h[1];
/* Explicitly do not generate errors in response to redirects */
if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
icmp6->icmp6_type == ND_REDIRECT) {
freemsg(mp);
return (NULL);
}
}
/*
* Check that the destination is not multicast and that the packet
* was not sent on link layer broadcast or multicast. (Exception
* is Packet too big message as per the draft - when mcast_ok is set.)
*/
if (!mcast_ok &&
(llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
freemsg(mp);
return (NULL);
}
/*
* If this is a labeled system, then check to see if we're allowed to
* send a response to this particular sender. If not, then just drop.
*/
if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(mp);
return (NULL);
}
if (icmp_err_rate_limit(ipst)) {
/*
* Only send ICMP error packets every so often.
* This should be done on a per port/source basis,
* but for now this will suffice.
*/
freemsg(mp);
return (NULL);
}
return (mp);
}
/*
* Called when a packet was sent out the same link that it arrived on.
* Check if it is ok to send a redirect and then send it.
*/
void
ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
ip_recv_attr_t *ira)
{
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
in6_addr_t *v6targ;
ire_t *src_ire_v6 = NULL;
mblk_t *mp1;
ire_t *nhop_ire = NULL;
/*
* Don't send a redirect when forwarding a source
* routed packet.
*/
if (ip_source_routed_v6(ip6h, mp, ipst))
return;
if (ire->ire_type & IRE_ONLINK) {
/* Target is directly connected */
v6targ = &ip6h->ip6_dst;
} else {
/* Determine the most specific IRE used to send the packets */
nhop_ire = ire_nexthop(ire);
if (nhop_ire == NULL)
return;
/*
* We won't send redirects to a router
* that doesn't have a link local
* address, but will forward.
*/
if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
ire_refrele(nhop_ire);
return;
}
v6targ = &nhop_ire->ire_addr_v6;
}
src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
if (src_ire_v6 == NULL) {
if (nhop_ire != NULL)
ire_refrele(nhop_ire);
return;
}
/*
* The source is directly connected.
*/
mp1 = copymsg(mp);
if (mp1 != NULL)
icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
if (nhop_ire != NULL)
ire_refrele(nhop_ire);
ire_refrele(src_ire_v6);
}
/*
* Generate an ICMPv6 redirect message.
* Include target link layer address option if it exits.
* Always include redirect header.
*/
static void
icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
ip_recv_attr_t *ira)
{
nd_redirect_t *rd;
nd_opt_rd_hdr_t *rdh;
uchar_t *buf;
ncec_t *ncec = NULL;
nd_opt_hdr_t *opt;
int len;
int ll_opt_len = 0;
int max_redir_hdr_data_len;
int pkt_len;
in6_addr_t *srcp;
ill_t *ill;
boolean_t need_refrele;
ip_stack_t *ipst = ira->ira_ill->ill_ipst;
mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
if (mp == NULL)
return;
if (IS_UNDER_IPMP(ira->ira_ill)) {
ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
if (ill == NULL) {
ill = ira->ira_ill;
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ip_drop_output("no IPMP ill for sending redirect",
mp, ill);
freemsg(mp);
return;
}
need_refrele = B_TRUE;
} else {
ill = ira->ira_ill;
need_refrele = B_FALSE;
}
ncec = ncec_lookup_illgrp_v6(ill, targetp);
if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
ncec->ncec_lladdr != NULL) {
ll_opt_len = (sizeof (nd_opt_hdr_t) +
ill->ill_phys_addr_length + 7)/8 * 8;
}
len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
ASSERT(len % 4 == 0);
buf = kmem_alloc(len, KM_NOSLEEP);
if (buf == NULL) {
if (ncec != NULL)
ncec_refrele(ncec);
if (need_refrele)
ill_refrele(ill);
freemsg(mp);
return;
}
rd = (nd_redirect_t *)buf;
rd->nd_rd_type = (uint8_t)ND_REDIRECT;
rd->nd_rd_code = 0;
rd->nd_rd_reserved = 0;
rd->nd_rd_target = *targetp;
rd->nd_rd_dst = *dest;
opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
if (ncec != NULL && ll_opt_len != 0) {
opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
opt->nd_opt_len = ll_opt_len/8;
bcopy((char *)ncec->ncec_lladdr, &opt[1],
ill->ill_phys_addr_length);
}
if (ncec != NULL)
ncec_refrele(ncec);
rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
max_redir_hdr_data_len =
(ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
pkt_len = msgdsize(mp);
/* Make sure mp is 8 byte aligned */
if (pkt_len > max_redir_hdr_data_len) {
rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
sizeof (nd_opt_rd_hdr_t))/8;
(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
} else {
rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
(void) adjmsg(mp, -(pkt_len % 8));
}
rdh->nd_opt_rh_reserved1 = 0;
rdh->nd_opt_rh_reserved2 = 0;
/* ipif_v6lcl_addr contains the link-local source address */
srcp = &ill->ill_ipif->ipif_v6lcl_addr;
/* Redirects sent by router, and router is global zone */
ASSERT(ira->ira_zoneid == ALL_ZONES);
ira->ira_zoneid = GLOBAL_ZONEID;
icmp_pkt_v6(mp, buf, len, srcp, ira);
kmem_free(buf, len);
if (need_refrele)
ill_refrele(ill);
}
/* Generate an ICMP time exceeded message. (May be called as writer.) */
void
icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
ip_recv_attr_t *ira)
{
icmp6_t icmp6;
mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
if (mp == NULL)
return;
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
icmp6.icmp6_code = code;
icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
}
/*
* Generate an ICMP unreachable message.
* When called from ip_output side a minimal ip_recv_attr_t needs to be
* constructed by the caller.
*/
void
icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
ip_recv_attr_t *ira)
{
icmp6_t icmp6;
mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
if (mp == NULL)
return;
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = code;
icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
}
/*
* Generate an ICMP pkt too big message.
* When called from ip_output side a minimal ip_recv_attr_t needs to be
* constructed by the caller.
*/
void
icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
ip_recv_attr_t *ira)
{
icmp6_t icmp6;
mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
if (mp == NULL)
return;
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
icmp6.icmp6_code = 0;
icmp6.icmp6_mtu = htonl(mtu);
icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
}
/*
* Generate an ICMP parameter problem message. (May be called as writer.)
* 'offset' is the offset from the beginning of the packet in error.
* When called from ip_output side a minimal ip_recv_attr_t needs to be
* constructed by the caller.
*/
static void
icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
boolean_t mcast_ok, ip_recv_attr_t *ira)
{
icmp6_t icmp6;
mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
if (mp == NULL)
return;
bzero((char *)&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_PARAM_PROB;
icmp6.icmp6_code = code;
icmp6.icmp6_pptr = htonl(offset);
icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
}
void
icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
ip_recv_attr_t *ira)
{
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
uint16_t hdr_length;
uint8_t *nexthdrp;
uint32_t offset;
ill_t *ill = ira->ira_ill;
/* Determine the offset of the bad nexthdr value */
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
/* Malformed packet */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards", mp, ill);
freemsg(mp);
return;
}
offset = nexthdrp - mp->b_rptr;
icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
mcast_ok, ira);
}
/*
* Verify whether or not the IP address is a valid local address.
* Could be a unicast, including one for a down interface.
* If allow_mcbc then a multicast or broadcast address is also
* acceptable.
*
* In the case of a multicast address, however, the
* upper protocol is expected to reset the src address
* to zero when we return IPVL_MCAST so that
* no packets are emitted with multicast address as
* source address.
* The addresses valid for bind are:
* (1) - in6addr_any
* (2) - IP address of an UP interface
* (3) - IP address of a DOWN interface
* (4) - a multicast address. In this case
* the conn will only receive packets destined to
* the specified multicast address. Note: the
* application still has to issue an
* IPV6_JOIN_GROUP socket option.
*
* In all the above cases, the bound address must be valid in the current zone.
* When the address is loopback or multicast, there might be many matching IREs
* so bind has to look up based on the zone.
*/
ip_laddr_t
ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
{
ire_t *src_ire;
uint_t match_flags;
ill_t *ill = NULL;
ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
match_flags = MATCH_IRE_ZONEONLY;
if (scopeid != 0) {
ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
if (ill == NULL)
return (IPVL_BAD);
match_flags |= MATCH_IRE_ILL;
}
src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
ill, zoneid, NULL, match_flags, 0, ipst, NULL);
if (ill != NULL)
ill_refrele(ill);
/*
* If an address other than in6addr_any is requested,
* we verify that it is a valid address for bind
* Note: Following code is in if-else-if form for
* readability compared to a condition check.
*/
if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
/*
* (2) Bind to address of local UP interface
*/
ire_refrele(src_ire);
return (IPVL_UNICAST_UP);
} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
/* (4) bind to multicast address. */
if (src_ire != NULL)
ire_refrele(src_ire);
/*
* Note: caller should take IPV6_MULTICAST_IF
* into account when selecting a real source address.
*/
if (allow_mcbc)
return (IPVL_MCAST);
else
return (IPVL_BAD);
} else {
ipif_t *ipif;
/*
* (3) Bind to address of local DOWN interface?
* (ipif_lookup_addr() looks up all interfaces
* but we do not get here for UP interfaces
* - case (2) above)
*/
if (src_ire != NULL)
ire_refrele(src_ire);
ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
if (ipif == NULL)
return (IPVL_BAD);
/* Not a useful source? */
if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
ipif_refrele(ipif);
return (IPVL_BAD);
}
ipif_refrele(ipif);
return (IPVL_UNICAST_DOWN);
}
}
/*
* Verify that both the source and destination addresses are valid. If
* IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
* i.e. have no route to it. Protocols like TCP want to verify destination
* reachability, while tunnels do not.
*
* Determine the route, the interface, and (optionally) the source address
* to use to reach a given destination.
* Note that we allow connect to broadcast and multicast addresses when
* IPDF_ALLOW_MCBC is set.
* first_hop and dst_addr are normally the same, but if source routing
* they will differ; in that case the first_hop is what we'll use for the
* routing lookup but the dce and label checks will be done on dst_addr,
*
* If uinfo is set, then we fill in the best available information
* we have for the destination. This is based on (in priority order) any
* metrics and path MTU stored in a dce_t, route metrics, and finally the
* ill_mtu.
*
* Tsol note: If we have a source route then dst_addr != firsthop. But we
* always do the label check on dst_addr.
*
* Assumes that the caller has set ixa_scopeid for link-local communication.
*/
int
ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
uint32_t flags, uint_t mac_mode)
{
ire_t *ire;
int error = 0;
in6_addr_t setsrc; /* RTF_SETSRC */
zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */
ip_stack_t *ipst = ixa->ixa_ipst;
dce_t *dce;
uint_t pmtu;
uint_t ifindex;
uint_t generation;
nce_t *nce;
ill_t *ill = NULL;
boolean_t multirt = B_FALSE;
ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
/*
* We never send to zero; the ULPs map it to the loopback address.
* We can't allow it since we use zero to mean unitialized in some
* places.
*/
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
if (is_system_labeled()) {
ts_label_t *tsl = NULL;
error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
if (error != 0)
return (error);
if (tsl != NULL) {
/* Update the label */
ip_xmit_attr_replace_tsl(ixa, tsl);
}
}
setsrc = ipv6_all_zeros;
/*
* Select a route; For IPMP interfaces, we would only select
* a "hidden" route (i.e., going through a specific under_ill)
* if ixa_ifindex has been specified.
*/
ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
&setsrc, &error, &multirt);
ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
if (error != 0)
goto bad_addr;
/*
* ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
* If IPDF_VERIFY_DST is set, the destination must be reachable.
* Otherwise the destination needn't be reachable.
*
* If we match on a reject or black hole, then we've got a
* local failure. May as well fail out the connect() attempt,
* since it's never going to succeed.
*/
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
/*
* If we're verifying destination reachability, we always want
* to complain here.
*
* If we're not verifying destination reachability but the
* destination has a route, we still want to fail on the
* temporary address and broadcast address tests.
*
* In both cases do we let the code continue so some reasonable
* information is returned to the caller. That enables the
* caller to use (and even cache) the IRE. conn_ip_ouput will
* use the generation mismatch path to check for the unreachable
* case thereby avoiding any specific check in the main path.
*/
ASSERT(generation == IRE_GENERATION_VERIFY);
if (flags & IPDF_VERIFY_DST) {
/*
* Set errno but continue to set up ixa_ire to be
* the RTF_REJECT|RTF_BLACKHOLE IRE.
* That allows callers to use ip_output to get an
* ICMP error back.
*/
if (!(ire->ire_type & IRE_HOST))
error = ENETUNREACH;
else
error = EHOSTUNREACH;
}
}
if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
!(flags & IPDF_ALLOW_MCBC)) {
ire_refrele(ire);
ire = ire_reject(ipst, B_FALSE);
generation = IRE_GENERATION_VERIFY;
error = ENETUNREACH;
}
/* Cache things */
if (ixa->ixa_ire != NULL)
ire_refrele_notr(ixa->ixa_ire);
#ifdef DEBUG
ire_refhold_notr(ire);
ire_refrele(ire);
#endif
ixa->ixa_ire = ire;
ixa->ixa_ire_generation = generation;
/*
* For multicast with multirt we have a flag passed back from
* ire_lookup_multi_ill_v6 since we don't have an IRE for each
* possible multicast address.
* We also need a flag for multicast since we can't check
* whether RTF_MULTIRT is set in ixa_ire for multicast.
*/
if (multirt) {
ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
} else {
ixa->ixa_postfragfn = ire->ire_postfragfn;
ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
}
if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
/* Get an nce to cache. */
nce = ire_to_nce(ire, NULL, firsthop);
if (nce == NULL) {
/* Allocation failure? */
ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
} else {
if (ixa->ixa_nce != NULL)
nce_refrele(ixa->ixa_nce);
ixa->ixa_nce = nce;
}
}
/*
* If the source address is a loopback address, the
* destination had best be local or multicast.
* If we are sending to an IRE_LOCAL using a loopback source then
* it had better be the same zoneid.
*/
if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
ire = NULL; /* Stored in ixa_ire */
error = EADDRNOTAVAIL;
goto bad_addr;
}
if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
ire = NULL; /* Stored in ixa_ire */
error = EADDRNOTAVAIL;
goto bad_addr;
}
}
/*
* Does the caller want us to pick a source address?
*/
if (flags & IPDF_SELECT_SRC) {
in6_addr_t src_addr;
/*
* We use use ire_nexthop_ill to avoid the under ipmp
* interface for source address selection. Note that for ipmp
* probe packets, ixa_ifindex would have been specified, and
* the ip_select_route() invocation would have picked an ire
* will ire_ill pointing at an under interface.
*/
ill = ire_nexthop_ill(ire);
/* If unreachable we have no ill but need some source */
if (ill == NULL) {
src_addr = ipv6_loopback;
/* Make sure we look for a better source address */
generation = SRC_GENERATION_VERIFY;
} else {
error = ip_select_source_v6(ill, &setsrc, dst_addr,
zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
&src_addr, &generation, NULL);
if (error != 0) {
ire = NULL; /* Stored in ixa_ire */
goto bad_addr;
}
}
/*
* We allow the source address to to down.
* However, we check that we don't use the loopback address
* as a source when sending out on the wire.
*/
if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
!(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
ire = NULL; /* Stored in ixa_ire */
error = EADDRNOTAVAIL;
goto bad_addr;
}
*src_addrp = src_addr;
ixa->ixa_src_generation = generation;
}
/*
* Make sure we don't leave an unreachable ixa_nce in place
* since ip_select_route is used when we unplumb i.e., remove
* references on ixa_ire, ixa_nce, and ixa_dce.
*/
nce = ixa->ixa_nce;
if (nce != NULL && nce->nce_is_condemned) {
nce_refrele(nce);
ixa->ixa_nce = NULL;
ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
}
ifindex = 0;
if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
/* If we are creating a DCE we'd better have an ifindex */
if (ill != NULL)
ifindex = ill->ill_phyint->phyint_ifindex;
else
flags &= ~IPDF_UNIQUE_DCE;
}
if (flags & IPDF_UNIQUE_DCE) {
/* Fallback to the default dce if allocation fails */
dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
if (dce != NULL) {
generation = dce->dce_generation;
} else {
dce = dce_lookup_v6(dst_addr, ifindex, ipst,
&generation);
}
} else {
dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
}
ASSERT(dce != NULL);
if (ixa->ixa_dce != NULL)
dce_refrele_notr(ixa->ixa_dce);
#ifdef DEBUG
dce_refhold_notr(dce);
dce_refrele(dce);
#endif
ixa->ixa_dce = dce;
ixa->ixa_dce_generation = generation;
/*
* Note that IPv6 multicast supports PMTU discovery unlike IPv4
* multicast. But pmtu discovery is only enabled for connected
* sockets in general.
*/
/*
* Set initial value for fragmentation limit. Either conn_ip_output
* or ULP might updates it when there are routing changes.
* Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
*/
pmtu = ip_get_pmtu(ixa);
ixa->ixa_fragsize = pmtu;
/* Make sure ixa_fragsize and ixa_pmtu remain identical */
if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
ixa->ixa_pmtu = pmtu;
/*
* Extract information useful for some transports.
* First we look for DCE metrics. Then we take what we have in
* the metrics in the route, where the offlink is used if we have
* one.
*/
if (uinfo != NULL) {
bzero(uinfo, sizeof (*uinfo));
if (dce->dce_flags & DCEF_UINFO)
*uinfo = dce->dce_uinfo;
rts_merge_metrics(uinfo, &ire->ire_metrics);
/* Allow ire_metrics to decrease the path MTU from above */
if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
uinfo->iulp_mtu = pmtu;
uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
}
if (ill != NULL)
ill_refrele(ill);
return (error);
bad_addr:
if (ire != NULL)
ire_refrele(ire);
if (ill != NULL)
ill_refrele(ill);
/*
* Make sure we don't leave an unreachable ixa_nce in place
* since ip_select_route is used when we unplumb i.e., remove
* references on ixa_ire, ixa_nce, and ixa_dce.
*/
nce = ixa->ixa_nce;
if (nce != NULL && nce->nce_is_condemned) {
nce_refrele(nce);
ixa->ixa_nce = NULL;
ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
}
return (error);
}
/*
* Handle protocols with which IP is less intimate. There
* can be more than one stream bound to a particular
* protocol. When this is the case, normally each one gets a copy
* of any incoming packets.
*
* Zones notes:
* Packets will be distributed to conns in all zones. This is really only
* useful for ICMPv6 as only applications in the global zone can create raw
* sockets for other protocols.
*/
void
ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
{
mblk_t *mp1;
in6_addr_t laddr = ip6h->ip6_dst;
conn_t *connp, *first_connp, *next_connp;
connf_t *connfp;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
mutex_enter(&connfp->connf_lock);
connp = connfp->connf_head;
for (connp = connfp->connf_head; connp != NULL;
connp = connp->conn_next) {
/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
(!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
break;
}
if (connp == NULL) {
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
mutex_exit(&connfp->connf_lock);
ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_NEXTHEADER, ira);
return;
}
ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
CONN_INC_REF(connp);
first_connp = connp;
/*
* XXX: Fix the multiple protocol listeners case. We should not
* be walking the conn->conn_next list here.
*/
connp = connp->conn_next;
for (;;) {
while (connp != NULL) {
/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
(!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
tsol_receive_local(mp, &laddr, IPV6_VERSION,
ira, connp)))
break;
connp = connp->conn_next;
}
if (connp == NULL) {
/* No more interested clients */
connp = first_connp;
break;
}
if (((mp1 = dupmsg(mp)) == NULL) &&
((mp1 = copymsg(mp)) == NULL)) {
/* Memory allocation failed */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards", mp, ill);
connp = first_connp;
break;
}
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
ira);
mutex_enter(&connfp->connf_lock);
/* Follow the next pointer before releasing the conn. */
next_connp = connp->conn_next;
CONN_DEC_REF(connp);
connp = next_connp;
}
/* Last one. Send it upstream. */
mutex_exit(&connfp->connf_lock);
ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
CONN_DEC_REF(connp);
}
/*
* Called when it is conceptually a ULP that would sent the packet
* e.g., port unreachable and nexthdr unknown. Check that the packet
* would have passed the IPsec global policy before sending the error.
*
* Send an ICMP error after patching up the packet appropriately.
* Uses ip_drop_input and bumps the appropriate MIB.
* For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
*/
void
ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
ip_recv_attr_t *ira)
{
ip6_t *ip6h;
boolean_t secure;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
netstack_t *ns = ipst->ips_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
secure = ira->ira_flags & IRAF_IPSEC_SECURE;
/*
* We are generating an icmp error for some inbound packet.
* Called from all ip_fanout_(udp, tcp, proto) functions.
* Before we generate an error, check with global policy
* to see whether this is allowed to enter the system. As
* there is no "conn", we are checking with global policy.
*/
ip6h = (ip6_t *)mp->b_rptr;
if (secure || ipss->ipsec_inbound_v6_policy_present) {
mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
if (mp == NULL)
return;
}
/* We never send errors for protocols that we do implement */
if (ira->ira_protocol == IPPROTO_ICMPV6) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
freemsg(mp);
return;
}
switch (icmp_type) {
case ICMP6_DST_UNREACH:
ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
ip_drop_input("ipIfStatsNoPorts", mp, ill);
icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
break;
case ICMP6_PARAM_PROB:
ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
/* Let the system determine the offset for this one */
icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
break;
default:
#ifdef DEBUG
panic("ip_fanout_send_icmp_v6: wrong type");
/*NOTREACHED*/
#else
freemsg(mp);
break;
#endif
}
}
/*
* Fanout for UDP packets that are multicast or ICMP errors.
* (Unicast fanout is handled in ip_input_v6.)
*
* If SO_REUSEADDR is set all multicast packets
* will be delivered to all conns bound to the same port.
*
* Fanout for UDP packets.
* The caller puts <fport, lport> in the ports parameter.
* ire_type must be IRE_BROADCAST for multicast and broadcast packets.
*
* If SO_REUSEADDR is set all multicast and broadcast packets
* will be delivered to all conns bound to the same port.
*
* Zones notes:
* Earlier in ip_input on a system with multiple shared-IP zones we
* duplicate the multicast and broadcast packets and send them up
* with each explicit zoneid that exists on that ill.
* This means that here we can match the zoneid with SO_ALLZONES being special.
*/
void
ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
ip_recv_attr_t *ira)
{
in6_addr_t laddr;
conn_t *connp;
connf_t *connfp;
in6_addr_t faddr;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
laddr = ip6h->ip6_dst;
faddr = ip6h->ip6_src;
/* Attempt to find a client stream based on destination port. */
connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
mutex_enter(&connfp->connf_lock);
connp = connfp->connf_head;
while (connp != NULL) {
if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
conn_wantpacket_v6(connp, ira, ip6h) &&
(!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
break;
connp = connp->conn_next;
}
if (connp == NULL)
goto notfound;
CONN_INC_REF(connp);
if (connp->conn_reuseaddr) {
conn_t *first_connp = connp;
conn_t *next_connp;
mblk_t *mp1;
connp = connp->conn_next;
for (;;) {
while (connp != NULL) {
if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
fport, faddr) &&
conn_wantpacket_v6(connp, ira, ip6h) &&
(!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
tsol_receive_local(mp, &laddr, IPV6_VERSION,
ira, connp)))
break;
connp = connp->conn_next;
}
if (connp == NULL) {
/* No more interested clients */
connp = first_connp;
break;
}
if (((mp1 = dupmsg(mp)) == NULL) &&
((mp1 = copymsg(mp)) == NULL)) {
/* Memory allocation failed */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ipIfStatsInDiscards", mp, ill);
connp = first_connp;
break;
}
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
IP6_STAT(ipst, ip6_udp_fanmb);
ip_fanout_udp_conn(connp, mp1, NULL,
(ip6_t *)mp1->b_rptr, ira);
mutex_enter(&connfp->connf_lock);
/* Follow the next pointer before releasing the conn. */
next_connp = connp->conn_next;
IP6_STAT(ipst, ip6_udp_fanmb);
CONN_DEC_REF(connp);
connp = next_connp;
}
}
/* Last one. Send it upstream. */
mutex_exit(&connfp->connf_lock);
IP6_STAT(ipst, ip6_udp_fanmb);
ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
CONN_DEC_REF(connp);
return;
notfound:
mutex_exit(&connfp->connf_lock);
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
ASSERT(ira->ira_protocol == IPPROTO_UDP);
ip_fanout_proto_v6(mp, ip6h, ira);
} else {
ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_NOPORT, ira);
}
}
/*
* int ip_find_hdr_v6()
*
* This routine is used by the upper layer protocols, iptun, and IPsec:
* - Set extension header pointers to appropriate locations
* - Determine IPv6 header length and return it
* - Return a pointer to the last nexthdr value
*
* The caller must initialize ipp_fields.
* The upper layer protocols normally set label_separate which makes the
* routine put the TX label in ipp_label_v6. If this is not set then
* the hop-by-hop options including the label are placed in ipp_hopopts.
*
* NOTE: If multiple extension headers of the same type are present,
* ip_find_hdr_v6() will set the respective extension header pointers
* to the first one that it encounters in the IPv6 header. It also
* skips fragment headers. This routine deals with malformed packets
* of various sorts in which case the returned length is up to the
* malformed part.
*/
int
ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
uint8_t *nexthdrp)
{
uint_t length, ehdrlen;
uint8_t nexthdr;
uint8_t *whereptr, *endptr;
ip6_dest_t *tmpdstopts;
ip6_rthdr_t *tmprthdr;
ip6_hbh_t *tmphopopts;
ip6_frag_t *tmpfraghdr;
ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
ipp->ipp_hoplimit = ip6h->ip6_hops;
ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
ipp->ipp_addr = ip6h->ip6_dst;
length = IPV6_HDR_LEN;
whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
endptr = mp->b_wptr;
nexthdr = ip6h->ip6_nxt;
while (whereptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (whereptr + MIN_EHDR_LEN > endptr)
goto done;
switch (nexthdr) {
case IPPROTO_HOPOPTS: {
/* We check for any CIPSO */
uchar_t *secopt;
boolean_t hbh_needed;
uchar_t *after_secopt;
tmphopopts = (ip6_hbh_t *)whereptr;
ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
if ((uchar_t *)tmphopopts + ehdrlen > endptr)
goto done;
nexthdr = tmphopopts->ip6h_nxt;
if (!label_separate) {
secopt = NULL;
after_secopt = whereptr;
} else {
/*
* We have dropped packets with bad options in
* ip6_input. No need to check return value
* here.
*/
(void) tsol_find_secopt_v6(whereptr, ehdrlen,
&secopt, &after_secopt, &hbh_needed);
}
if (secopt != NULL && after_secopt - whereptr > 0) {
ipp->ipp_fields |= IPPF_LABEL_V6;
ipp->ipp_label_v6 = secopt;
ipp->ipp_label_len_v6 = after_secopt - whereptr;
} else {
ipp->ipp_label_len_v6 = 0;
after_secopt = whereptr;
hbh_needed = B_TRUE;
}
/* return only 1st hbh */
if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
ipp->ipp_fields |= IPPF_HOPOPTS;
ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
ipp->ipp_hopoptslen = ehdrlen -
ipp->ipp_label_len_v6;
}
break;
}
case IPPROTO_DSTOPTS:
tmpdstopts = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
if ((uchar_t *)tmpdstopts + ehdrlen > endptr)
goto done;
nexthdr = tmpdstopts->ip6d_nxt;
/*
* ipp_dstopts is set to the destination header after a
* routing header.
* Assume it is a post-rthdr destination header
* and adjust when we find an rthdr.
*/
if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
ipp->ipp_fields |= IPPF_DSTOPTS;
ipp->ipp_dstopts = tmpdstopts;
ipp->ipp_dstoptslen = ehdrlen;
}
break;
case IPPROTO_ROUTING:
tmprthdr = (ip6_rthdr_t *)whereptr;
ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
if ((uchar_t *)tmprthdr + ehdrlen > endptr)
goto done;
nexthdr = tmprthdr->ip6r_nxt;
/* return only 1st rthdr */
if (!(ipp->ipp_fields & IPPF_RTHDR)) {
ipp->ipp_fields |= IPPF_RTHDR;
ipp->ipp_rthdr = tmprthdr;
ipp->ipp_rthdrlen = ehdrlen;
}
/*
* Make any destination header we've seen be a
* pre-rthdr destination header.
*/
if (ipp->ipp_fields & IPPF_DSTOPTS) {
ipp->ipp_fields &= ~IPPF_DSTOPTS;
ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
ipp->ipp_dstopts = NULL;
ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
ipp->ipp_dstoptslen = 0;
}
break;
case IPPROTO_FRAGMENT:
tmpfraghdr = (ip6_frag_t *)whereptr;
ehdrlen = sizeof (ip6_frag_t);
if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
goto done;
nexthdr = tmpfraghdr->ip6f_nxt;
if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
ipp->ipp_fields |= IPPF_FRAGHDR;
ipp->ipp_fraghdr = tmpfraghdr;
ipp->ipp_fraghdrlen = ehdrlen;
}
break;
case IPPROTO_NONE:
default:
goto done;
}
length += ehdrlen;
whereptr += ehdrlen;
}
done:
if (nexthdrp != NULL)
*nexthdrp = nexthdr;
return (length);
}
/*
* Try to determine where and what are the IPv6 header length and
* pointer to nexthdr value for the upper layer protocol (or an
* unknown next hdr).
*
* Parameters returns a pointer to the nexthdr value;
* Must handle malformed packets of various sorts.
* Function returns failure for malformed cases.
*/
boolean_t
ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
uint8_t **nexthdrpp)
{
uint16_t length;
uint_t ehdrlen;
uint8_t *nexthdrp;
uint8_t *whereptr;
uint8_t *endptr;
ip6_dest_t *desthdr;
ip6_rthdr_t *rthdr;
ip6_frag_t *fraghdr;
ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
length = IPV6_HDR_LEN;
whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
endptr = mp->b_wptr;
nexthdrp = &ip6h->ip6_nxt;
while (whereptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (whereptr + MIN_EHDR_LEN > endptr)
break;
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
desthdr = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (desthdr->ip6d_len + 1);
if ((uchar_t *)desthdr + ehdrlen > endptr)
return (B_FALSE);
nexthdrp = &desthdr->ip6d_nxt;
break;
case IPPROTO_ROUTING:
rthdr = (ip6_rthdr_t *)whereptr;
ehdrlen = 8 * (rthdr->ip6r_len + 1);
if ((uchar_t *)rthdr + ehdrlen > endptr)
return (B_FALSE);
nexthdrp = &rthdr->ip6r_nxt;
break;
case IPPROTO_FRAGMENT:
fraghdr = (ip6_frag_t *)whereptr;
ehdrlen = sizeof (ip6_frag_t);
if ((uchar_t *)&fraghdr[1] > endptr)
return (B_FALSE);
nexthdrp = &fraghdr->ip6f_nxt;
break;
case IPPROTO_NONE:
/* No next header means we're finished */
default:
*hdr_length_ptr = length;
*nexthdrpp = nexthdrp;
return (B_TRUE);
}
length += ehdrlen;
whereptr += ehdrlen;
*hdr_length_ptr = length;
*nexthdrpp = nexthdrp;
}
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_FRAGMENT:
/*
* If any know extension headers are still to be processed,
* the packet's malformed (or at least all the IP header(s) are
* not in the same mblk - and that should never happen.
*/
return (B_FALSE);
default:
/*
* If we get here, we know that all of the IP headers were in
* the same mblk, even if the ULP header is in the next mblk.
*/
*hdr_length_ptr = length;
*nexthdrpp = nexthdrp;
return (B_TRUE);
}
}
/*
* Return the length of the IPv6 related headers (including extension headers)
* Returns a length even if the packet is malformed.
*/
int
ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
{
uint16_t