blob: 686e2ad94e40cc13a319d01ee96f8c6a658d4ef4 [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/sysmacros.h>
#include <sys/strsun.h>
#include <sys/strlog.h>
#include <sys/strsubr.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/kobj.h>
#include <sys/zone.h>
#include <sys/neti.h>
#include <sys/hook.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/vtrace.h>
#include <sys/isa_defs.h>
#include <sys/atomic.h>
#include <sys/iphada.h>
#include <sys/policy.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/if_dl.h>
#include <sys/sockio.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/sctp.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/optcom.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/arp.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <inet/ip6_asp.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/udp_impl.h>
#include <inet/ipp_common.h>
#include <inet/ip_multi.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/ip_rts.h>
#include <inet/ip_ndp.h>
#include <net/pfkeyv2.h>
#include <inet/ipsec_info.h>
#include <inet/sadb.h>
#include <inet/ipsec_impl.h>
#include <inet/tun.h>
#include <inet/sctp_ip.h>
#include <sys/pattr.h>
#include <inet/ipclassifier.h>
#include <inet/ipsecah.h>
#include <inet/rawip_impl.h>
#include <inet/rts_impl.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
/* Temporary; for CR 6451644 work-around */
#include <sys/ethernet.h>
extern int ip_squeue_flag;
/*
* Naming conventions:
* These rules should be judiciously applied
* if there is a need to identify something as IPv6 versus IPv4
* IPv6 funcions will end with _v6 in the ip module.
* IPv6 funcions will end with _ipv6 in the transport modules.
* IPv6 macros:
* Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
* Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
* And then there are ..V4_PART_OF_V6.
* The intent is that macros in the ip module end with _V6.
* IPv6 global variables will start with ipv6_
* IPv6 structures will start with ipv6
* IPv6 defined constants should start with IPV6_
* (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
*/
/*
* ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
* We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
* from IANA. This mechanism will remain in effect until an official
* number is obtained.
*/
uchar_t ip6opt_ls;
const in6_addr_t ipv6_all_ones =
{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_solicited_node_mcast =
{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_solicited_node_mcast =
{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
#endif /* _BIG_ENDIAN */
/* Leave room for ip_newroute to tack on the src and target addresses */
#define OK_RESOLVER_MP_V6(mp) \
((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN))
#define IP6_MBLK_OK 0
#define IP6_MBLK_HDR_ERR 1
#define IP6_MBLK_LEN_ERR 2
static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *,
boolean_t, zoneid_t);
static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t,
const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *);
static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill);
static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *,
uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t,
boolean_t, boolean_t, cred_t *);
static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *,
iulp_t *, ip_stack_t *);
static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t,
boolean_t, ip_stack_t *);
static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t,
const in6_addr_t *, uint16_t, boolean_t);
static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *,
ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t);
static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *,
ill_t *, uint_t, uint_t, boolean_t, zoneid_t);
static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t,
ill_t *, ill_t *, uint_t, boolean_t, zoneid_t);
static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *,
uint8_t *, uint_t, uint8_t, ip_stack_t *);
static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *,
ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *);
static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int,
conn_t *, int, int, zoneid_t);
static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *,
ipif_t **);
/*
* A template for an IPv6 AR_ENTRY_QUERY
*/
static areq_t ipv6_areq_template = {
AR_ENTRY_QUERY, /* cmd */
sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */
sizeof (areq_t), /* name len (filled by ill_arp_alloc) */
IP6_DL_SAP, /* protocol, from arps perspective */
sizeof (areq_t), /* target addr offset */
IPV6_ADDR_LEN, /* target addr_length */
0, /* flags */
sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */
IPV6_ADDR_LEN, /* sender addr length */
6, /* xmit_count */
1000, /* (re)xmit_interval in milliseconds */
4 /* max # of requests to buffer */
/* anything else filled in by the code */
};
/*
* Handle IPv6 ICMP packets sent to us. Consume the mblk passed in.
* The message has already been checksummed and if needed,
* a copy has been made to be sent any interested ICMP client (conn)
* Note that this is different than icmp_inbound() which does the fanout
* to conn's as well as local processing of the ICMP packets.
*
* All error messages are passed to the matching transport stream.
*
* Zones notes:
* The packet is only processed in the context of the specified zone: typically
* only this zone will reply to an echo request. This means that the caller must
* call icmp_inbound_v6() for each relevant zone.
*/
static void
icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill,
uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid,
mblk_t *dl_mp)
{
icmp6_t *icmp6;
ip6_t *ip6h;
boolean_t interested;
in6_addr_t origsrc;
mblk_t *first_mp;
ipsec_in_t *ii;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill != NULL);
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
ASSERT(mp != NULL);
ii = (ipsec_in_t *)first_mp->b_rptr;
ASSERT(ii->ipsec_in_type == IPSEC_IN);
}
ip6h = (ip6_t *)mp->b_rptr;
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) {
if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) {
ip1dbg(("icmp_inbound_v6: pullupmsg failed\n"));
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
if (ipst->ips_icmp_accept_clear_messages == 0) {
first_mp = ipsec_check_global_policy(first_mp, NULL,
NULL, ip6h, mctl_present, ipst->ips_netstack);
if (first_mp == NULL)
return;
}
/*
* On a labeled system, we have to check whether the zone itself is
* permitted to receive raw traffic.
*/
if (is_system_labeled()) {
if (zoneid == ALL_ZONES)
zoneid = tsol_packet_to_zoneid(mp);
if (!tsol_can_accept_raw(mp, B_FALSE)) {
ip1dbg(("icmp_inbound_v6: zone %d can't receive raw",
zoneid));
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(first_mp);
return;
}
}
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
icmp6->icmp6_code));
interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
/* Initiate IPPF processing here */
if (IP6_IN_IPP(flags, ipst)) {
/*
* If the ifindex changes due to SIOCSLIFINDEX
* packet may return to IP on the wrong ill.
*/
ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex);
if (mp == NULL) {
if (mctl_present) {
freeb(first_mp);
}
return;
}
}
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
break;
case ICMP6_TIME_EXCEEDED:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
break;
case ICMP6_PARAM_PROB:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
break;
case ICMP6_PACKET_TOO_BIG:
icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present,
zoneid);
return;
case ICMP6_ECHO_REQUEST:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
!ipst->ips_ipv6_resp_echo_mcast)
break;
/*
* We must have exclusive use of the mblk to convert it to
* a response.
* If not, we copy it.
*/
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
freemsg(mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInErrors);
if (mctl_present)
freeb(first_mp);
return;
}
mp = mp1;
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
}
/*
* Turn the echo into an echo reply.
* Remove any extension headers (do not reverse a source route)
* and clear the flow id (keep traffic class for now).
*/
if (hdr_length != IPV6_HDR_LEN) {
int i;
for (i = 0; i < IPV6_HDR_LEN; i++)
mp->b_rptr[hdr_length - i - 1] =
mp->b_rptr[IPV6_HDR_LEN - i - 1];
mp->b_rptr += (hdr_length - IPV6_HDR_LEN);
ip6h = (ip6_t *)mp->b_rptr;
ip6h->ip6_nxt = IPPROTO_ICMPV6;
hdr_length = IPV6_HDR_LEN;
}
ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
icmp6->icmp6_type = ICMP6_ECHO_REPLY;
ip6h->ip6_plen =
htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
origsrc = ip6h->ip6_src;
/*
* Reverse the source and destination addresses.
* If the return address is a multicast, zero out the source
* (ip_wput_v6 will set an address).
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
ip6h->ip6_src = ipv6_all_zeros;
ip6h->ip6_dst = origsrc;
} else {
ip6h->ip6_src = ip6h->ip6_dst;
ip6h->ip6_dst = origsrc;
}
/* set the hop limit */
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
if (!mctl_present) {
/*
* This packet should go out the same way as it
* came in i.e in clear. To make sure that global
* policy will not be applied to this in ip_wput,
* we attach a IPSEC_IN mp and clear ipsec_in_secure.
*/
ASSERT(first_mp == mp);
first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
if (first_mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(mp);
return;
}
ii = (ipsec_in_t *)first_mp->b_rptr;
/* This is not a secure packet */
ii->ipsec_in_secure = B_FALSE;
first_mp->b_cont = mp;
}
ii->ipsec_in_zoneid = zoneid;
ASSERT(zoneid != ALL_ZONES);
if (!ipsec_in_to_out(first_mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return;
}
put(WR(q), first_mp);
return;
case ICMP6_ECHO_REPLY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
break;
case ND_ROUTER_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
break;
case ND_ROUTER_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
break;
case ND_NEIGHBOR_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
if (mctl_present)
freeb(first_mp);
/* XXX may wish to pass first_mp up to ndp_input someday. */
ndp_input(inill, mp, dl_mp);
return;
case ND_NEIGHBOR_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInNeighborAdvertisements);
if (mctl_present)
freeb(first_mp);
/* XXX may wish to pass first_mp up to ndp_input someday. */
ndp_input(inill, mp, dl_mp);
return;
case ND_REDIRECT: {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
if (ipst->ips_ipv6_ignore_redirect)
break;
/*
* As there is no upper client to deliver, we don't
* need the first_mp any more.
*/
if (mctl_present)
freeb(first_mp);
if (!pullupmsg(mp, -1)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
break;
}
icmp_redirect_v6(q, mp, ill);
return;
}
/*
* The next three icmp messages will be handled by MLD.
* Pass all valid MLD packets up to any process(es)
* listening on a raw ICMP socket. MLD messages are
* freed by mld_input function.
*/
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
case MLD_LISTENER_REDUCTION:
if (mctl_present)
freeb(first_mp);
mld_input(q, mp, ill);
return;
default:
break;
}
if (interested) {
icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill,
inill, mctl_present, zoneid);
} else {
freemsg(first_mp);
}
}
/*
* Process received IPv6 ICMP Packet too big.
* After updating any IRE it does the fanout to any matching transport streams.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*/
/* ARGSUSED */
static void
icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill,
boolean_t mctl_present, zoneid_t zoneid)
{
ip6_t *ip6h;
ip6_t *inner_ip6h;
icmp6_t *icmp6;
uint16_t hdr_length;
uint32_t mtu;
ire_t *ire, *first_ire;
mblk_t *first_mp;
ip_stack_t *ipst = ill->ill_ipst;
first_mp = mp;
if (mctl_present)
mp = first_mp->b_cont;
/*
* We must have exclusive use of the mblk to update the MTU
* in the packet.
* If not, we copy it.
*
* If there's an M_CTL present, we know that allocated first_mp
* earlier in this function, so we know first_mp has refcnt of one.
*/
ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1);
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
freemsg(mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
if (mctl_present)
freeb(first_mp);
return;
}
mp = mp1;
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
}
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
hdr_length = ip_hdr_length_v6(mp, ip6h);
else
hdr_length = IPV6_HDR_LEN;
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN);
inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */
if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) {
if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length];
inner_ip6h = (ip6_t *)&icmp6[1];
}
/*
* For link local destinations matching simply on IRE type is not
* sufficient. Same link local addresses for different ILL's is
* possible.
*/
if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) {
first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL,
IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL,
MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst);
if (first_ire == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no ire for dst %s\n", AF_INET6,
&inner_ip6h->ip6_dst);
}
freemsg(first_mp);
return;
}
mtu = ntohl(icmp6->icmp6_mtu);
rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER);
for (ire = first_ire; ire != NULL &&
IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst);
ire = ire->ire_next) {
mutex_enter(&ire->ire_lock);
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6 "
"min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is received,
* we must include a fragment header in
* subsequent packets.
*/
ire->ire_frag_flag |= IPH_FRAG_HDR;
}
ip1dbg(("Received mtu from router: %d\n", mtu));
ire->ire_max_frag = MIN(ire->ire_max_frag, mtu);
/* Record the new max frag size for the ULP. */
if (ire->ire_frag_flag & IPH_FRAG_HDR) {
/*
* If we need a fragment header in every packet
* (above case or multirouting), make sure the
* ULP takes it into account when computing the
* payload size.
*/
icmp6->icmp6_mtu = htonl(ire->ire_max_frag -
sizeof (ip6_frag_t));
} else {
icmp6->icmp6_mtu = htonl(ire->ire_max_frag);
}
mutex_exit(&ire->ire_lock);
}
rw_exit(&first_ire->ire_bucket->irb_lock);
ire_refrele(first_ire);
} else {
irb_t *irb = NULL;
/*
* for non-link local destinations we match only on the IRE type
*/
ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL,
IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE,
ipst);
if (ire == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no ire for dst %s\n",
AF_INET6, &inner_ip6h->ip6_dst);
}
freemsg(first_mp);
return;
}
irb = ire->ire_bucket;
ire_refrele(ire);
rw_enter(&irb->irb_lock, RW_READER);
for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
&inner_ip6h->ip6_dst)) {
mtu = ntohl(icmp6->icmp6_mtu);
mutex_enter(&ire->ire_lock);
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6"
"min mtu %d: %d\n",
IPV6_MIN_MTU, mtu));
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is
* received, we must include a fragment
* header in subsequent packets.
*/
ire->ire_frag_flag |= IPH_FRAG_HDR;
}
ip1dbg(("Received mtu from router: %d\n", mtu));
ire->ire_max_frag = MIN(ire->ire_max_frag, mtu);
/* Record the new max frag size for the ULP. */
if (ire->ire_frag_flag & IPH_FRAG_HDR) {
/*
* If we need a fragment header in
* every packet (above case or
* multirouting), make sure the ULP
* takes it into account when computing
* the payload size.
*/
icmp6->icmp6_mtu =
htonl(ire->ire_max_frag -
sizeof (ip6_frag_t));
} else {
icmp6->icmp6_mtu =
htonl(ire->ire_max_frag);
}
mutex_exit(&ire->ire_lock);
}
}
rw_exit(&irb->irb_lock);
}
icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill,
mctl_present, zoneid);
}
/*
* Fanout received ICMPv6 error packets to the transports.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*/
void
icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h,
icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present,
zoneid_t zoneid)
{
uint16_t *up; /* Pointer to ports in ULP header */
uint32_t ports; /* reversed ports for fanout */
ip6_t rip6h; /* With reversed addresses */
uint16_t hdr_length;
uint8_t *nexthdrp;
uint8_t nexthdr;
mblk_t *first_mp;
ipsec_in_t *ii;
tcpha_t *tcpha;
conn_t *connp;
ip_stack_t *ipst = ill->ill_ipst;
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
ASSERT(mp != NULL);
ii = (ipsec_in_t *)first_mp->b_rptr;
ASSERT(ii->ipsec_in_type == IPSEC_IN);
} else {
ii = NULL;
}
hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h);
ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN);
/*
* Need to pullup everything in order to use
* ip_hdr_length_nexthdr_v6()
*/
if (mp->b_cont != NULL) {
if (!pullupmsg(mp, -1)) {
ip1dbg(("icmp_inbound_error_fanout_v6: "
"pullupmsg failed\n"));
goto drop_pkt;
}
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
}
ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */
if ((uchar_t *)&ip6h[1] > mp->b_wptr)
goto drop_pkt;
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
goto drop_pkt;
nexthdr = *nexthdrp;
/* Set message type, must be done after pullups */
mp->b_datap->db_type = M_CTL;
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* UDP header to get the port information.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr) {
break;
}
/*
* Attempt to find a client stream based on port.
* Note that we do a reverse lookup since the header is
* in the form we sent it out.
* The rip6h header is only used for the IPCL_UDP_MATCH_V6
* and we only set the src and dst addresses and nexthdr.
*/
up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
rip6h.ip6_src = ip6h->ip6_dst;
rip6h.ip6_dst = ip6h->ip6_src;
rip6h.ip6_nxt = nexthdr;
((uint16_t *)&ports)[0] = up[1];
((uint16_t *)&ports)[1] = up[0];
ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill,
IP6_NO_IPPOLICY, mctl_present, zoneid);
return;
}
case IPPROTO_TCP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* the TCP header to get the port information.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr) {
break;
}
/*
* Attempt to find a client stream based on port.
* Note that we do a reverse lookup since the header is
* in the form we sent it out.
* The rip6h header is only used for the IP_TCP_*MATCH_V6 and
* we only set the src and dst addresses and nexthdr.
*/
tcpha = (tcpha_t *)((char *)ip6h + hdr_length);
connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
if (connp == NULL) {
goto drop_pkt;
}
SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp,
SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR);
return;
}
case IPPROTO_SCTP:
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* the SCTP header to get the port information.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr) {
break;
}
up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
((uint16_t *)&ports)[0] = up[1];
((uint16_t *)&ports)[1] = up[0];
ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0,
mctl_present, IP6_NO_IPPOLICY, zoneid);
return;
case IPPROTO_ESP:
case IPPROTO_AH: {
int ipsec_rc;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
/*
* We need a IPSEC_IN in the front to fanout to AH/ESP.
* We will re-use the IPSEC_IN if it is already present as
* AH/ESP will not affect any fields in the IPSEC_IN for
* ICMP errors. If there is no IPSEC_IN, allocate a new
* one and attach it in the front.
*/
if (ii != NULL) {
/*
* ip_fanout_proto_again converts the ICMP errors
* that come back from AH/ESP to M_DATA so that
* if it is non-AH/ESP and we do a pullupmsg in
* this function, it would work. Convert it back
* to M_CTL before we send up as this is a ICMP
* error. This could have been generated locally or
* by some router. Validate the inner IPSEC
* headers.
*
* NOTE : ill_index is used by ip_fanout_proto_again
* to locate the ill.
*/
ASSERT(ill != NULL);
ii->ipsec_in_ill_index =
ill->ill_phyint->phyint_ifindex;
ii->ipsec_in_rill_index =
inill->ill_phyint->phyint_ifindex;
first_mp->b_cont->b_datap->db_type = M_CTL;
} else {
/*
* IPSEC_IN is not present. We attach a ipsec_in
* message and send up to IPSEC for validating
* and removing the IPSEC headers. Clear
* ipsec_in_secure so that when we return
* from IPSEC, we don't mistakenly think that this
* is a secure packet came from the network.
*
* NOTE : ill_index is used by ip_fanout_proto_again
* to locate the ill.
*/
ASSERT(first_mp == mp);
first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
ASSERT(ill != NULL);
if (first_mp == NULL) {
freemsg(mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return;
}
ii = (ipsec_in_t *)first_mp->b_rptr;
/* This is not a secure packet */
ii->ipsec_in_secure = B_FALSE;
first_mp->b_cont = mp;
mp->b_datap->db_type = M_CTL;
ii->ipsec_in_ill_index =
ill->ill_phyint->phyint_ifindex;
ii->ipsec_in_rill_index =
inill->ill_phyint->phyint_ifindex;
}
if (!ipsec_loaded(ipss)) {
ip_proto_not_sup(q, first_mp, 0, zoneid, ipst);
return;
}
if (nexthdr == IPPROTO_ESP)
ipsec_rc = ipsecesp_icmp_error(first_mp);
else
ipsec_rc = ipsecah_icmp_error(first_mp);
if (ipsec_rc == IPSEC_STATUS_FAILED)
return;
ip_fanout_proto_again(first_mp, ill, inill, NULL);
return;
}
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
if ((uint8_t *)ip6h + hdr_length +
(nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) :
sizeof (ip6_t)) > mp->b_wptr) {
goto drop_pkt;
}
if (nexthdr == IPPROTO_ENCAP ||
!IN6_ARE_ADDR_EQUAL(
&((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src,
&ip6h->ip6_src) ||
!IN6_ARE_ADDR_EQUAL(
&((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst,
&ip6h->ip6_dst)) {
/*
* For tunnels that have used IPsec protection,
* we need to adjust the MTU to take into account
* the IPsec overhead.
*/
if (ii != NULL)
icmp6->icmp6_mtu = htonl(
ntohl(icmp6->icmp6_mtu) -
ipsec_in_extra_length(first_mp));
} else {
/*
* Self-encapsulated case. As in the ipv4 case,
* we need to strip the 2nd IP header. Since mp
* is already pulled-up, we can simply bcopy
* the 3rd header + data over the 2nd header.
*/
uint16_t unused_len;
ip6_t *inner_ip6h = (ip6_t *)
((uchar_t *)ip6h + hdr_length);
/*
* Make sure we don't do recursion more than once.
*/
if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h,
&unused_len, &nexthdrp) ||
*nexthdrp == IPPROTO_IPV6) {
goto drop_pkt;
}
/*
* We are about to modify the packet. Make a copy if
* someone else has a reference to it.
*/
if (DB_REF(mp) > 1) {
mblk_t *mp1;
uint16_t icmp6_offset;
mp1 = copymsg(mp);
if (mp1 == NULL) {
goto drop_pkt;
}
icmp6_offset = (uint16_t)
((uchar_t *)icmp6 - mp->b_rptr);
freemsg(mp);
mp = mp1;
icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset);
ip6h = (ip6_t *)&icmp6[1];
inner_ip6h = (ip6_t *)
((uchar_t *)ip6h + hdr_length);
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
}
/*
* Need to set db_type back to M_DATA before
* refeeding mp into this function.
*/
DB_TYPE(mp) = M_DATA;
/*
* Copy the 3rd header + remaining data on top
* of the 2nd header.
*/
bcopy(inner_ip6h, ip6h,
mp->b_wptr - (uchar_t *)inner_ip6h);
/*
* Subtract length of the 2nd header.
*/
mp->b_wptr -= hdr_length;
/*
* Now recurse, and see what I _really_ should be
* doing here.
*/
icmp_inbound_error_fanout_v6(q, first_mp,
(ip6_t *)mp->b_rptr, icmp6, ill, inill,
mctl_present, zoneid);
return;
}
/* FALLTHRU */
default:
/*
* The rip6h header is only used for the lookup and we
* only set the src and dst addresses and nexthdr.
*/
rip6h.ip6_src = ip6h->ip6_dst;
rip6h.ip6_dst = ip6h->ip6_src;
rip6h.ip6_nxt = nexthdr;
ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0,
IP6_NO_IPPOLICY, mctl_present, zoneid);
return;
}
/* NOTREACHED */
drop_pkt:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
freemsg(first_mp);
}
/*
* Process received IPv6 ICMP Redirect messages.
*/
/* ARGSUSED */
static void
icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill)
{
ip6_t *ip6h;
uint16_t hdr_length;
nd_redirect_t *rd;
ire_t *ire;
ire_t *prev_ire;
ire_t *redir_ire;
in6_addr_t *src, *dst, *gateway;
nd_opt_hdr_t *opt;
nce_t *nce;
int nce_flags = 0;
int err = 0;
boolean_t redirect_to_router = B_FALSE;
int len;
int optlen;
iulp_t ulp_info = { 0 };
ill_t *prev_ire_ill;
ipif_t *ipif;
ip_stack_t *ipst = ill->ill_ipst;
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
hdr_length = ip_hdr_length_v6(mp, ip6h);
else
hdr_length = IPV6_HDR_LEN;
rd = (nd_redirect_t *)&mp->b_rptr[hdr_length];
len = mp->b_wptr - mp->b_rptr - hdr_length;
src = &ip6h->ip6_src;
dst = &rd->nd_rd_dst;
gateway = &rd->nd_rd_target;
/* Verify if it is a valid redirect */
if (!IN6_IS_ADDR_LINKLOCAL(src) ||
(ip6h->ip6_hops != IPV6_MAX_HOPS) ||
(rd->nd_rd_code != 0) ||
(len < sizeof (nd_redirect_t)) ||
(IN6_IS_ADDR_V4MAPPED(dst)) ||
(IN6_IS_ADDR_MULTICAST(dst))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
freemsg(mp);
return;
}
if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
IN6_ARE_ADDR_EQUAL(gateway, dst))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
freemsg(mp);
return;
}
if (len > sizeof (nd_redirect_t)) {
if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1],
len - sizeof (nd_redirect_t))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
freemsg(mp);
return;
}
}
if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
redirect_to_router = B_TRUE;
nce_flags |= NCE_F_ISROUTER;
}
/* ipif will be refreleased afterwards */
ipif = ipif_get_next_ipif(NULL, ill);
if (ipif == NULL) {
freemsg(mp);
return;
}
/*
* Verify that the IP source address of the redirect is
* the same as the current first-hop router for the specified
* ICMP destination address.
* Also, Make sure we had a route for the dest in question and
* that route was pointing to the old gateway (the source of the
* redirect packet.)
*/
prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES,
NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst);
/*
* Check that
* the redirect was not from ourselves
* old gateway is still directly reachable
*/
if (prev_ire == NULL ||
prev_ire->ire_type == IRE_LOCAL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ipif_refrele(ipif);
goto fail_redirect;
}
prev_ire_ill = ire_to_ill(prev_ire);
ASSERT(prev_ire_ill != NULL);
if (prev_ire_ill->ill_flags & ILLF_NONUD)
nce_flags |= NCE_F_NONUD;
/*
* Should we use the old ULP info to create the new gateway? From
* a user's perspective, we should inherit the info so that it
* is a "smooth" transition. If we do not do that, then new
* connections going thru the new gateway will have no route metrics,
* which is counter-intuitive to user. From a network point of
* view, this may or may not make sense even though the new gateway
* is still directly connected to us so the route metrics should not
* change much.
*
* But if the old ire_uinfo is not initialized, we do another
* recursive lookup on the dest using the new gateway. There may
* be a route to that. If so, use it to initialize the redirect
* route.
*/
if (prev_ire->ire_uinfo.iulp_set) {
bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t));
} else if (redirect_to_router) {
/*
* Only do the following if the redirection is really to
* a router.
*/
ire_t *tmp_ire;
ire_t *sire;
tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire,
ALL_ZONES, 0, NULL,
(MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT),
ipst);
if (sire != NULL) {
bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t));
ASSERT(tmp_ire != NULL);
ire_refrele(tmp_ire);
ire_refrele(sire);
} else if (tmp_ire != NULL) {
bcopy(&tmp_ire->ire_uinfo, &ulp_info,
sizeof (iulp_t));
ire_refrele(tmp_ire);
}
}
optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t);
opt = (nd_opt_hdr_t *)&rd[1];
opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
if (opt != NULL) {
err = ndp_lookup_then_add_v6(ill,
B_FALSE, /* don't match across illgrp */
(uchar_t *)&opt[1], /* Link layer address */
gateway,
&ipv6_all_ones, /* prefix mask */
&ipv6_all_zeros, /* Mapping mask */
0,
nce_flags,
ND_STALE,
&nce);
switch (err) {
case 0:
NCE_REFRELE(nce);
break;
case EEXIST:
/*
* Check to see if link layer address has changed and
* process the nce_state accordingly.
*/
ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE);
NCE_REFRELE(nce);
break;
default:
ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
err));
ipif_refrele(ipif);
goto fail_redirect;
}
}
if (redirect_to_router) {
/* icmp_redirect_ok_v6() must have already verified this */
ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
/*
* Create a Route Association. This will allow us to remember
* a router told us to use the particular gateway.
*/
ire = ire_create_v6(
dst,
&ipv6_all_ones, /* mask */
&prev_ire->ire_src_addr_v6, /* source addr */
gateway, /* gateway addr */
&prev_ire->ire_max_frag, /* max frag */
NULL, /* no src nce */
NULL, /* no rfq */
NULL, /* no stq */
IRE_HOST,
prev_ire->ire_ipif,
NULL,
0,
0,
(RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
&ulp_info,
NULL,
NULL,
ipst);
} else {
queue_t *stq;
stq = (ipif->ipif_net_type == IRE_IF_RESOLVER)
? ipif->ipif_rq : ipif->ipif_wq;
/*
* Just create an on link entry, i.e. interface route.
*/
ire = ire_create_v6(
dst, /* gateway == dst */
&ipv6_all_ones, /* mask */
&prev_ire->ire_src_addr_v6, /* source addr */
&ipv6_all_zeros, /* gateway addr */
&prev_ire->ire_max_frag, /* max frag */
NULL, /* no src nce */
NULL, /* ire rfq */
stq, /* ire stq */
ipif->ipif_net_type, /* IF_[NO]RESOLVER */
prev_ire->ire_ipif,
&ipv6_all_ones,
0,
0,
(RTF_DYNAMIC | RTF_HOST),
&ulp_info,
NULL,
NULL,
ipst);
}
/* Release reference from earlier ipif_get_next_ipif() */
ipif_refrele(ipif);
if (ire == NULL)
goto fail_redirect;
if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) {
/* tell routing sockets that we received a redirect */
ip_rts_change_v6(RTM_REDIRECT,
&rd->nd_rd_dst,
&rd->nd_rd_target,
&ipv6_all_ones, 0, &ire->ire_src_addr_v6,
(RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
(RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
/*
* Delete any existing IRE_HOST type ires for this destination.
* This together with the added IRE has the effect of
* modifying an existing redirect.
*/
redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
ire->ire_ipif, NULL, ALL_ZONES, 0, NULL,
(MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst);
ire_refrele(ire); /* Held in ire_add_v6 */
if (redir_ire != NULL) {
if (redir_ire->ire_flags & RTF_DYNAMIC)
ire_delete(redir_ire);
ire_refrele(redir_ire);
}
}
if (prev_ire->ire_type == IRE_CACHE)
ire_delete(prev_ire);
ire_refrele(prev_ire);
prev_ire = NULL;
fail_redirect:
if (prev_ire != NULL)
ire_refrele(prev_ire);
freemsg(mp);
}
static ill_t *
ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst)
{
ill_t *ill;
ASSERT(WR(q) == q);
if (q->q_next != NULL) {
ill = (ill_t *)q->q_ptr;
if (ILL_CAN_LOOKUP(ill))
ill_refhold(ill);
else
ill = NULL;
} else {
ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE,
NULL, NULL, NULL, NULL, NULL, ipst);
}
if (ill == NULL)
ip0dbg(("ip_queue_to_ill_v6: no ill\n"));
return (ill);
}
/*
* Assigns an appropriate source address to the packet.
* If origdst is one of our IP addresses that use it as the source.
* If the queue is an ill queue then select a source from that ill.
* Otherwise pick a source based on a route lookup back to the origsrc.
*
* src is the return parameter. Returns a pointer to src or NULL if failure.
*/
static in6_addr_t *
icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst,
in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst)
{
ill_t *ill;
ire_t *ire;
ipif_t *ipif;
ASSERT(!(wq->q_flag & QREADR));
if (wq->q_next != NULL) {
ill = (ill_t *)wq->q_ptr;
} else {
ill = NULL;
}
ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK),
NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY),
ipst);
if (ire != NULL) {
/* Destined to one of our addresses */
*src = *origdst;
ire_refrele(ire);
return (src);
}
if (ire != NULL) {
ire_refrele(ire);
ire = NULL;
}
if (ill == NULL) {
/* What is the route back to the original source? */
ire = ire_route_lookup_v6(origsrc, 0, 0, 0,
NULL, NULL, zoneid, NULL,
(MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst);
if (ire == NULL) {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes);
return (NULL);
}
ASSERT(ire->ire_ipif != NULL);
ill = ire->ire_ipif->ipif_ill;
ire_refrele(ire);
}
ipif = ipif_select_source_v6(ill, origsrc, B_FALSE,
IPV6_PREFER_SRC_DEFAULT, zoneid);
if (ipif != NULL) {
*src = ipif->ipif_v6src_addr;
ipif_refrele(ipif);
return (src);
}
/*
* Unusual case - can't find a usable source address to reach the
* original source. Use what in the route to the source.
*/
ire = ire_route_lookup_v6(origsrc, 0, 0, 0,
NULL, NULL, zoneid, NULL,
(MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst);
if (ire == NULL) {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes);
return (NULL);
}
ASSERT(ire != NULL);
*src = ire->ire_src_addr_v6;
ire_refrele(ire);
return (src);
}
/*
* Build and ship an IPv6 ICMP message using the packet data in mp,
* and the ICMP header pointed to by "stuff". (May be called as
* writer.)
* Note: assumes that icmp_pkt_err_ok_v6 has been called to
* verify that an icmp error packet can be sent.
*
* If q is an ill write side queue (which is the case when packets
* arrive from ip_rput) then ip_wput code will ensure that packets to
* link-local destinations are sent out that ill.
*
* If v6src_ptr is set use it as a source. Otherwise select a reasonable
* source address (see above function).
*/
static void
icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len,
const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid,
ip_stack_t *ipst)
{
ip6_t *ip6h;
in6_addr_t v6dst;
size_t len_needed;
size_t msg_len;
mblk_t *mp1;
icmp6_t *icmp6;
ill_t *ill;
in6_addr_t v6src;
mblk_t *ipsec_mp;
ipsec_out_t *io;
ill = ip_queue_to_ill_v6(q, ipst);
if (ill == NULL) {
freemsg(mp);
return;
}
if (mctl_present) {
/*
* If it is :
*
* 1) a IPSEC_OUT, then this is caused by outbound
* datagram originating on this host. IPSEC processing
* may or may not have been done. Refer to comments above
* icmp_inbound_error_fanout for details.
*
* 2) a IPSEC_IN if we are generating a icmp_message
* for an incoming datagram destined for us i.e called
* from ip_fanout_send_icmp.
*/
ipsec_info_t *in;
ipsec_mp = mp;
mp = ipsec_mp->b_cont;
in = (ipsec_info_t *)ipsec_mp->b_rptr;
ip6h = (ip6_t *)mp->b_rptr;
ASSERT(in->ipsec_info_type == IPSEC_OUT ||
in->ipsec_info_type == IPSEC_IN);
if (in->ipsec_info_type == IPSEC_IN) {
/*
* Convert the IPSEC_IN to IPSEC_OUT.
*/
if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ill_refrele(ill);
return;
}
} else {
ASSERT(in->ipsec_info_type == IPSEC_OUT);
io = (ipsec_out_t *)in;
/*
* Clear out ipsec_out_proc_begin, so we do a fresh
* ire lookup.
*/
io->ipsec_out_proc_begin = B_FALSE;
}
} else {
/*
* This is in clear. The icmp message we are building
* here should go out in clear.
*/
ipsec_in_t *ii;
ASSERT(mp->b_datap->db_type == M_DATA);
ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
if (ipsec_mp == NULL) {
freemsg(mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ill_refrele(ill);
return;
}
ii = (ipsec_in_t *)ipsec_mp->b_rptr;
/* This is not a secure packet */
ii->ipsec_in_secure = B_FALSE;
/*
* For trusted extensions using a shared IP address we can
* send using any zoneid.
*/
if (zoneid == ALL_ZONES)
ii->ipsec_in_zoneid = GLOBAL_ZONEID;
else
ii->ipsec_in_zoneid = zoneid;
ipsec_mp->b_cont = mp;
ip6h = (ip6_t *)mp->b_rptr;
/*
* Convert the IPSEC_IN to IPSEC_OUT.
*/
if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ill_refrele(ill);
return;
}
}
io = (ipsec_out_t *)ipsec_mp->b_rptr;
if (v6src_ptr != NULL) {
v6src = *v6src_ptr;
} else {
if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst,
&v6src, zoneid, ipst) == NULL) {
freemsg(ipsec_mp);
ill_refrele(ill);
return;
}
}
v6dst = ip6h->ip6_src;
len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
msg_len = msgdsize(mp);
if (msg_len > len_needed) {
if (!adjmsg(mp, len_needed - msg_len)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(ipsec_mp);
ill_refrele(ill);
return;
}
msg_len = len_needed;
}
mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(ipsec_mp);
ill_refrele(ill);
return;
}
ill_refrele(ill);
mp1->b_cont = mp;
mp = mp1;
ASSERT(ipsec_mp->b_datap->db_type == M_CTL &&
io->ipsec_out_type == IPSEC_OUT);
ipsec_mp->b_cont = mp;
/*
* Set ipsec_out_icmp_loopback so we can let the ICMP messages this
* node generates be accepted in peace by all on-host destinations.
* If we do NOT assume that all on-host destinations trust
* self-generated ICMP messages, then rework here, ip.c, and spd.c.
* (Look for ipsec_out_icmp_loopback).
*/
io->ipsec_out_icmp_loopback = B_TRUE;
ip6h = (ip6_t *)mp->b_rptr;
mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_nxt = IPPROTO_ICMPV6;
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
ip6h->ip6_dst = v6dst;
ip6h->ip6_src = v6src;
msg_len += IPV6_HDR_LEN + len;
if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
}
ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
icmp6 = (icmp6_t *)&ip6h[1];
bcopy(stuff, (char *)icmp6, len);
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
if (icmp6->icmp6_type == ND_REDIRECT) {
ip6h->ip6_hops = IPV6_MAX_HOPS;
}
/* Send to V6 writeside put routine */
put(q, ipsec_mp);
}
/*
* Update the output mib when ICMPv6 packets are sent.
*/
static void
icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
{
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
break;
case ICMP6_TIME_EXCEEDED:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
break;
case ICMP6_PARAM_PROB:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
break;
case ICMP6_PACKET_TOO_BIG:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
break;
case ICMP6_ECHO_REQUEST:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
break;
case ICMP6_ECHO_REPLY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
break;
case ND_ROUTER_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
break;
case ND_ROUTER_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
break;
case ND_NEIGHBOR_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
break;
case ND_NEIGHBOR_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpOutNeighborAdvertisements);
break;
case ND_REDIRECT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
break;
case MLD_LISTENER_QUERY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
break;
case MLD_LISTENER_REPORT:
case MLD_V2_LISTENER_REPORT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
break;
case MLD_LISTENER_REDUCTION:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
break;
}
}
/*
* Check if it is ok to send an ICMPv6 error packet in
* response to the IP packet in mp.
* Free the message and return null if no
* ICMP error packet should be sent.
*/
static mblk_t *
icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp,
boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst)
{
ip6_t *ip6h;
if (!mp)
return (NULL);
ip6h = (ip6_t *)mp->b_rptr;
/* Check if source address uniquely identifies the host */
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
freemsg(mp);
return (NULL);
}
if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
icmp6_t *icmp6;
if (mp->b_wptr - mp->b_rptr < len_needed) {
if (!pullupmsg(mp, len_needed)) {
ill_t *ill;
ill = ip_queue_to_ill_v6(q, ipst);
if (ill == NULL) {
BUMP_MIB(&ipst->ips_icmp6_mib,
ipv6IfIcmpInErrors);
} else {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInErrors);
ill_refrele(ill);
}
freemsg(mp);
return (NULL);
}
ip6h = (ip6_t *)mp->b_rptr;
}
icmp6 = (icmp6_t *)&ip6h[1];
/* Explicitly do not generate errors in response to redirects */
if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
icmp6->icmp6_type == ND_REDIRECT) {
freemsg(mp);
return (NULL);
}
}
/*
* Check that the destination is not multicast and that the packet
* was not sent on link layer broadcast or multicast. (Exception
* is Packet too big message as per the draft - when mcast_ok is set.)
*/
if (!mcast_ok &&
(llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
freemsg(mp);
return (NULL);
}
if (icmp_err_rate_limit(ipst)) {
/*
* Only send ICMP error packets every so often.
* This should be done on a per port/source basis,
* but for now this will suffice.
*/
freemsg(mp);
return (NULL);
}
return (mp);
}
/*
* Generate an ICMPv6 redirect message.
* Include target link layer address option if it exits.
* Always include redirect header.
*/
static void
icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp,
in6_addr_t *dest, ill_t *ill, boolean_t llbcast)
{
nd_redirect_t *rd;
nd_opt_rd_hdr_t *rdh;
uchar_t *buf;
nce_t *nce = NULL;
nd_opt_hdr_t *opt;
int len;
int ll_opt_len = 0;
int max_redir_hdr_data_len;
int pkt_len;
in6_addr_t *srcp;
ip_stack_t *ipst = ill->ill_ipst;
/*
* We are called from ip_rput where we could
* not have attached an IPSEC_IN.
*/
ASSERT(mp->b_datap->db_type == M_DATA);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst);
if (mp == NULL)
return;
nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE);
if (nce != NULL && nce->nce_state != ND_INCOMPLETE) {
ll_opt_len = (sizeof (nd_opt_hdr_t) +
ill->ill_phys_addr_length + 7)/8 * 8;
}
len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
ASSERT(len % 4 == 0);
buf = kmem_alloc(len, KM_NOSLEEP);
if (buf == NULL) {
if (nce != NULL)
NCE_REFRELE(nce);
freemsg(mp);
return;
}
rd = (nd_redirect_t *)buf;
rd->nd_rd_type = (uint8_t)ND_REDIRECT;
rd->nd_rd_code = 0;
rd->nd_rd_reserved = 0;
rd->nd_rd_target = *targetp;
rd->nd_rd_dst = *dest;
opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
if (nce != NULL && ll_opt_len != 0) {
opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
opt->nd_opt_len = ll_opt_len/8;
bcopy((char *)nce->nce_res_mp->b_rptr +
NCE_LL_ADDR_OFFSET(ill), &opt[1],
ill->ill_phys_addr_length);
}
if (nce != NULL)
NCE_REFRELE(nce);
rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
max_redir_hdr_data_len =
(ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
pkt_len = msgdsize(mp);
/* Make sure mp is 8 byte aligned */
if (pkt_len > max_redir_hdr_data_len) {
rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
sizeof (nd_opt_rd_hdr_t))/8;
(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
} else {
rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
(void) adjmsg(mp, -(pkt_len % 8));
}
rdh->nd_opt_rh_reserved1 = 0;
rdh->nd_opt_rh_reserved2 = 0;
/* ipif_v6src_addr contains the link-local source address */
srcp = &ill->ill_ipif->ipif_v6src_addr;
/* Redirects sent by router, and router is global zone */
icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst);
kmem_free(buf, len);
}
/* Generate an ICMP time exceeded message. (May be called as writer.) */
void
icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code,
boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid,
ip_stack_t *ipst)
{
icmp6_t icmp6;
boolean_t mctl_present;
mblk_t *first_mp;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
icmp6.icmp6_code = code;
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* Generate an ICMP unreachable message.
*/
void
icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code,
boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid,
ip_stack_t *ipst)
{
icmp6_t icmp6;
boolean_t mctl_present;
mblk_t *first_mp;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = code;
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* Generate an ICMP pkt too big message.
*/
static void
icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu,
boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst)
{
icmp6_t icmp6;
mblk_t *first_mp;
boolean_t mctl_present;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
icmp6.icmp6_code = 0;
icmp6.icmp6_mtu = htonl(mtu);
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* Generate an ICMP parameter problem message. (May be called as writer.)
* 'offset' is the offset from the beginning of the packet in error.
*/
static void
icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code,
uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid,
ip_stack_t *ipst)
{
icmp6_t icmp6;
boolean_t mctl_present;
mblk_t *first_mp;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero((char *)&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_PARAM_PROB;
icmp6.icmp6_code = code;
icmp6.icmp6_pptr = htonl(offset);
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* This code will need to take into account the possibility of binding
* to a link local address on a multi-homed host, in which case the
* outgoing interface (from the conn) will need to be used when getting
* an ire for the dst. Going through proper outgoing interface and
* choosing the source address corresponding to the outgoing interface
* is necessary when the destination address is a link-local address and
* IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set.
* This can happen when active connection is setup; thus ipp pointer
* is passed here from tcp_connect_*() routines, in non-TCP cases NULL
* pointer is passed as ipp pointer.
*/
mblk_t *
ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
{
ssize_t len;
int protocol;
struct T_bind_req *tbr;
sin6_t *sin6;
ipa6_conn_t *ac6;
in6_addr_t *v6srcp;
in6_addr_t *v6dstp;
uint16_t lport;
uint16_t fport;
uchar_t *ucp;
int error = 0;
boolean_t local_bind;
ipa6_conn_x_t *acx6;
boolean_t verify_dst;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
cred_t *cr;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
cr = msg_getcred(mp, NULL);
ASSERT(cr != NULL);
if (cr == NULL) {
error = EINVAL;
goto bad_addr;
}
ASSERT(connp->conn_af_isv6);
len = mp->b_wptr - mp->b_rptr;
if (len < (sizeof (*tbr) + 1)) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"ip_bind_v6: bogus msg, len %ld", len);
goto bad_addr;
}
/* Back up and extract the protocol identifier. */
mp->b_wptr--;
tbr = (struct T_bind_req *)mp->b_rptr;
/* Reset the message type in preparation for shipping it back. */
mp->b_datap->db_type = M_PCPROTO;
protocol = *mp->b_wptr & 0xFF;
connp->conn_ulp = (uint8_t)protocol;
/*
* Check for a zero length address. This is from a protocol that
* wants to register to receive all packets of its type.
*/
if (tbr->ADDR_length == 0) {
if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP ||
protocol == IPPROTO_ESP || protocol == IPPROTO_AH) &&
ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head !=
NULL) {
/*
* TCP, SCTP, AH, and ESP have single protocol fanouts.
* Do not allow others to bind to these.
*/
goto bad_addr;
}
/*
*
* The udp module never sends down a zero-length address,
* and allowing this on a labeled system will break MLP
* functionality.
*/
if (is_system_labeled() && protocol == IPPROTO_UDP)
goto bad_addr;
/* Allow ipsec plumbing */
if (connp->conn_mac_exempt && protocol != IPPROTO_AH &&
protocol != IPPROTO_ESP)
goto bad_addr;
connp->conn_srcv6 = ipv6_all_zeros;
ipcl_proto_insert_v6(connp, protocol);
tbr->PRIM_type = T_BIND_ACK;
return (mp);
}
/* Extract the address pointer from the message. */
ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset,
tbr->ADDR_length);
if (ucp == NULL) {
ip1dbg(("ip_bind_v6: no address\n"));
goto bad_addr;
}
if (!OK_32PTR(ucp)) {
ip1dbg(("ip_bind_v6: unaligned address\n"));
goto bad_addr;
}
switch (tbr->ADDR_length) {
default:
ip1dbg(("ip_bind_v6: bad address length %d\n",
(int)tbr->ADDR_length));
goto bad_addr;
case IPV6_ADDR_LEN:
/* Verification of local address only */
v6srcp = (in6_addr_t *)ucp;
lport = 0;
local_bind = B_TRUE;
break;
case sizeof (sin6_t):
sin6 = (sin6_t *)ucp;
v6srcp = &sin6->sin6_addr;
lport = sin6->sin6_port;
local_bind = B_TRUE;
break;
case sizeof (ipa6_conn_t):
/*
* Verify that both the source and destination addresses
* are valid.
*/
ac6 = (ipa6_conn_t *)ucp;
v6srcp = &ac6->ac6_laddr;
v6dstp = &ac6->ac6_faddr;
fport = ac6->ac6_fport;
/* For raw socket, the local port is not set. */
lport = ac6->ac6_lport != 0 ? ac6->ac6_lport :
connp->conn_lport;
local_bind = B_FALSE;
/* Always verify destination reachability. */
verify_dst = B_TRUE;
break;
case sizeof (ipa6_conn_x_t):
/*
* Verify that the source address is valid.
*/
acx6 = (ipa6_conn_x_t *)ucp;
ac6 = &acx6->ac6x_conn;
v6srcp = &ac6->ac6_laddr;
v6dstp = &ac6->ac6_faddr;
fport = ac6->ac6_fport;
lport = ac6->ac6_lport;
local_bind = B_FALSE;
/*
* Client that passed ipa6_conn_x_t to us specifies whether to
* verify destination reachability.
*/
verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0;
break;
}
if (local_bind) {
error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol,
v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN);
} else {
error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol,
v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr);
}
if (error == 0) {
/* Send it home. */
mp->b_datap->db_type = M_PCPROTO;
tbr->PRIM_type = T_BIND_ACK;
return (mp);
}
bad_addr:
ASSERT(error != EINPROGRESS);
if (error > 0)
mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
else
mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
return (mp);
}
static void
ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp,
boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst)
{
/* Update conn_send and pktversion if v4/v6 changed */
if (version_changed) {
ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst);
}
/*
* Pass the IPSEC headers size in ire_ipsec_overhead.
* We can't do this in ip_bind_insert_ire because the policy
* may not have been inherited at that point in time and hence
* conn_out_enforce_policy may not be set.
*/
if (ire_requested && connp->conn_out_enforce_policy &&
mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) {
ire_t *ire = (ire_t *)mp->b_rptr;
ASSERT(MBLKL(mp) >= sizeof (ire_t));
ire->ire_ipsec_overhead = (conn_ipsec_length(connp));
}
}
/*
* Here address is verified to be a valid local address.
* If the IRE_DB_REQ_TYPE mp is present, a multicast
* address is also considered a valid local address.
* In the case of a multicast address, however, the
* upper protocol is expected to reset the src address
* to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that
* no packets are emitted with multicast address as
* source address.
* The addresses valid for bind are:
* (1) - in6addr_any
* (2) - IP address of an UP interface
* (3) - IP address of a DOWN interface
* (4) - a multicast address. In this case
* the conn will only receive packets destined to
* the specified multicast address. Note: the
* application still has to issue an
* IPV6_JOIN_GROUP socket option.
*
* In all the above cases, the bound address must be valid in the current zone.
* When the address is loopback or multicast, there might be many matching IREs
* so bind has to look up based on the zone.
*/
/*
* Verify the local IP address. Does not change the conn_t except
* conn_fully_bound and conn_policy_cached.
*/
static int
ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert)
{
int error = 0;
ire_t *src_ire = NULL;
zoneid_t zoneid;
mblk_t *mp = NULL;
boolean_t ire_requested;
boolean_t ipsec_policy_set;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
if (mpp)
mp = *mpp;
ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET);
/*
* If it was previously connected, conn_fully_bound would have
* been set.
*/
connp->conn_fully_bound = B_FALSE;
zoneid = connp->conn_zoneid;
if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
src_ire = ire_route_lookup_v6(v6src, 0, 0,
0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst);
/*
* If an address other than in6addr_any is requested,
* we verify that it is a valid address for bind
* Note: Following code is in if-else-if form for
* readability compared to a condition check.
*/
ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST));
/* LINTED - statement has no consequent */
if (IRE_IS_LOCAL(src_ire)) {
/*
* (2) Bind to address of local UP interface
*/
} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
ipif_t *multi_ipif = NULL;
ire_t *save_ire;
/*
* (4) bind to multicast address.
* Fake out the IRE returned to upper
* layer to be a broadcast IRE in
* ip_bind_insert_ire_v6().
* Pass other information that matches
* the ipif (e.g. the source address).
* conn_multicast_ill is only used for
* IPv6 packets
*/
mutex_enter(&connp->conn_lock);
if (connp->conn_multicast_ill != NULL) {
(void) ipif_lookup_zoneid(
connp->conn_multicast_ill, zoneid, 0,
&multi_ipif);
} else {
/*
* Look for default like
* ip_wput_v6
*/
multi_ipif = ipif_lookup_group_v6(
&ipv6_unspecified_group, zoneid, ipst);
}
mutex_exit(&connp->conn_lock);
save_ire = src_ire;
src_ire = NULL;
if (multi_ipif == NULL || !ire_requested ||
(src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) {
src_ire = save_ire;
error = EADDRNOTAVAIL;
} else {
ASSERT(src_ire != NULL);
if (save_ire != NULL)
ire_refrele(save_ire);
}
if (multi_ipif != NULL)
ipif_refrele(multi_ipif);
} else {
if (!ip_addr_exists_v6(v6src, zoneid, ipst)) {
/*
* Not a valid address for bind
*/
error = EADDRNOTAVAIL;
}
}
if (error != 0) {
/* Red Alert! Attempting to be a bogon! */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_laddr_v6: bad src"
" address %s\n", AF_INET6, v6src);
}
goto bad_addr;
}
}
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
* to B_TRUE before, we should set it to B_FALSE, so that policy
* can change after the disconnect.
*/
connp->conn_policy_cached = B_FALSE;
/* If not fanout_insert this was just an address verification */
if (fanout_insert) {
/*
* The addresses have been verified. Time to insert in
* the correct fanout list.
*/
connp->conn_srcv6 = *v6src;
connp->conn_remv6 = ipv6_all_zeros;
connp->conn_lport = lport;
connp->conn_fport = 0;
error = ipcl_bind_insert_v6(connp, protocol, v6src, lport);
}
if (error == 0) {
if (ire_requested) {
if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL,
ipst)) {
error = -1;
goto bad_addr;
}
mp = *mpp;
} else if (ipsec_policy_set) {
if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
goto bad_addr;
}
}
}
bad_addr:
if (error != 0) {
if (connp->conn_anon_port) {
(void) tsol_mlp_anon(crgetzone(connp->conn_cred),
connp->conn_mlp_type, connp->conn_ulp, ntohs(lport),
B_FALSE);
}
connp->conn_mlp_type = mlptSingle;
}
if (src_ire != NULL)
ire_refrele(src_ire);
if (ipsec_policy_set) {
ASSERT(mp != NULL);
freeb(mp);
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
*mpp = NULL;
}
return (error);
}
int
ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert)
{
int error;
boolean_t ire_requested;
mblk_t *mp = NULL;
boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
/*
* Note that we allow connect to broadcast and multicast
* address when ire_requested is set. Thus the ULP
* has to check for IRE_BROADCAST and multicast.
*/
if (mpp)
mp = *mpp;
ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
ASSERT(connp->conn_af_isv6);
connp->conn_ulp = protocol;
if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) {
/* Bind to IPv4 address */
ipaddr_t v4src;
IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport,
fanout_insert);
if (error != 0)
goto bad_addr;
connp->conn_pkt_isv6 = B_FALSE;
} else {
if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
error = 0;
goto bad_addr;
}
error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp,
lport, fanout_insert);
if (error != 0)
goto bad_addr;
connp->conn_pkt_isv6 = B_TRUE;
}
ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL,
orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst);
return (0);
bad_addr:
if (error < 0)
error = -TBADADDR;
return (error);
}
/*
* Verify that both the source and destination addresses
* are valid. If verify_dst, then destination address must also be reachable,
* i.e. have a route. Protocols like TCP want this. Tunnels do not.
* It takes ip6_pkt_t * as one of the arguments to determine correct
* source address when IPV6_PKTINFO or scope_id is set along with a link-local
* destination address. Note that parameter ipp is only useful for TCP connect
* when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all
* non-TCP cases, it is NULL and for all other tcp cases it is not useful.
*
*/
int
ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst,
ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert,
boolean_t verify_dst, cred_t *cr)
{
ire_t *src_ire;
ire_t *dst_ire;
int error = 0;
ire_t *sire = NULL;
ire_t *md_dst_ire = NULL;
ill_t *md_ill = NULL;
ill_t *dst_ill = NULL;
ipif_t *src_ipif = NULL;
zoneid_t zoneid;
boolean_t ill_held = B_FALSE;
mblk_t *mp = NULL;
boolean_t ire_requested = B_FALSE;
boolean_t ipsec_policy_set = B_FALSE;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
ts_label_t *tsl = NULL;
if (mpp)
mp = *mpp;
if (mp != NULL) {
ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE);
ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET);
}
if (cr != NULL)
tsl = crgetlabel(cr);
src_ire = dst_ire = NULL;
/*
* If we never got a disconnect before, clear it now.
*/
connp->conn_fully_bound = B_FALSE;
zoneid = connp->conn_zoneid;
if (IN6_IS_ADDR_MULTICAST(v6dst)) {
ipif_t *ipif;
/*
* Use an "emulated" IRE_BROADCAST to tell the transport it
* is a multicast.
* Pass other information that matches
* the ipif (e.g. the source address).
*
* conn_multicast_ill is only used for IPv6 packets
*/
mutex_enter(&connp->conn_lock);
if (connp->conn_multicast_ill != NULL) {
(void) ipif_lookup_zoneid(connp->conn_multicast_ill,
zoneid, 0, &ipif);
} else {
/* Look for default like ip_wput_v6 */
ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst);
}
mutex_exit(&connp->conn_lock);
if (ipif == NULL || ire_requested ||
(dst_ire = ipif_to_ire_v6(ipif)) == NULL) {
if (ipif != NULL)
ipif_refrele(ipif);
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad "
"connected multicast %s\n", AF_INET6,
v6dst);
}
error = ENETUNREACH;
goto bad_addr;
}
if (ipif != NULL)
ipif_refrele(ipif);
} else {
dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0,
NULL, &sire, zoneid, tsl,
MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR,
ipst);
/*
* We also prevent ire's with src address INADDR_ANY to
* be used, which are created temporarily for
* sending out packets from endpoints that have
* conn_unspec_src set.
*/
if (dst_ire == NULL ||
(dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) {
/*
* When verifying destination reachability, we always
* complain.
*
* When not verifying destination reachability but we
* found an IRE, i.e. the destination is reachable,
* then the other tests still apply and we complain.
*/
if (verify_dst || (dst_ire != NULL)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad"
" connected dst %s\n", AF_INET6,
v6dst);
}
if (dst_ire == NULL ||
!(dst_ire->ire_type & IRE_HOST)) {
error = ENETUNREACH;
} else {
error = EHOSTUNREACH;
}
goto bad_addr;
}
}
}
/*
* We now know that routing will allow us to reach the destination.
* Check whether Trusted Solaris policy allows communication with this
* host, and pretend that the destination is unreachable if not.
*
* This is never a problem for TCP, since that transport is known to
* compute the label properly as part of the tcp_rput_other T_BIND_ACK
* handling. If the remote is unreachable, it will be detected at that
* point, so there's no reason to check it here.
*
* Note that for sendto (and other datagram-oriented friends), this
* check is done as part of the data path label computation instead.
* The check here is just to make non-TCP connect() report the right
* error.
*/
if (dst_ire != NULL && is_system_labeled() &&
!IPCL_IS_TCP(connp) &&
tsol_compute_label_v6(cr, v6dst, NULL,
connp->conn_mac_exempt, ipst) != 0) {
error = EHOSTUNREACH;
if (ip_debug > 2) {
pr_addr_dbg("ip_bind_connected: no label for dst %s\n",
AF_INET6, v6dst);
}
goto bad_addr;
}
/*
* If the app does a connect(), it means that it will most likely
* send more than 1 packet to the destination. It makes sense
* to clear the temporary flag.
*/
if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE &&
(dst_ire->ire_marks & IRE_MARK_TEMPORARY)) {
irb_t *irb = dst_ire->ire_bucket;
rw_enter(&irb->irb_lock, RW_WRITER);
/*
* We need to recheck for IRE_MARK_TEMPORARY after acquiring
* the lock in order to guarantee irb_tmp_ire_cnt.
*/
if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) {
dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY;
irb->irb_tmp_ire_cnt--;
}
rw_exit(&irb->irb_lock);
}
ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION);
/*
* See if we should notify ULP about MDT; we do this whether or not
* ire_requested is TRUE, in order to handle active connects; MDT
* eligibility tests for passive connects are handled separately
* through tcp_adapt_ire(). We do this before the source address
* selection, because dst_ire may change after a call to
* ipif_select_source_v6(). This is a best-effort check, as the
* packet for this connection may not actually go through
* dst_ire->ire_stq, and the exact IRE can only be known after
* calling ip_newroute_v6(). This is why we further check on the
* IRE during Multidata packet transmission in tcp_multisend().
*/
if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set &&
dst_ire != NULL &&
!(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) &&
(md_ill = ire_to_ill(dst_ire), md_ill != NULL) &&
ILL_MDT_CAPABLE(md_ill)) {
md_dst_ire = dst_ire;
IRE_REFHOLD(md_dst_ire);
}
if (dst_ire != NULL &&
dst_ire->ire_type == IRE_LOCAL &&
dst_ire->ire_zoneid != zoneid &&
dst_ire->ire_zoneid != ALL_ZONES) {
src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL,
zoneid, 0, NULL,
MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_RJ_BHOLE, ipst);
if (src_ire == NULL) {
error = EHOSTUNREACH;
goto bad_addr;
} else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
if (!(src_ire->ire_type & IRE_HOST))
error = ENETUNREACH;
else
error = EHOSTUNREACH;
goto bad_addr;
}
if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
src_ipif = src_ire->ire_ipif;
ipif_refhold(src_ipif);
*v6src = src_ipif->ipif_v6lcl_addr;
}
ire_refrele(src_ire);
src_ire = NULL;
} else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) {
if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) {
*v6src = sire->ire_src_addr_v6;
ire_refrele(dst_ire);
dst_ire = sire;
sire = NULL;
} else if (dst_ire->ire_type == IRE_CACHE &&
(dst_ire->ire_flags & RTF_SETSRC)) {
ASSERT(dst_ire->ire_zoneid == zoneid ||
dst_ire->ire_zoneid == ALL_ZONES);
*v6src = dst_ire->ire_src_addr_v6;
} else {
/*
* Pick a source address so that a proper inbound load
* spreading would happen. Use dst_ill specified by the
* app. when socket option or scopeid is set.
*/
int err;
if (ipp != NULL && ipp->ipp_ifindex != 0) {
uint_t if_index;
/*
* Scope id or IPV6_PKTINFO
*/
if_index = ipp->ipp_ifindex;
dst_ill = ill_lookup_on_ifindex(
if_index, B_TRUE, NULL, NULL, NULL, NULL,
ipst);
if (dst_ill == NULL) {
ip1dbg(("ip_bind_connected_v6:"
" bad ifindex %d\n", if_index));
error = EADDRNOTAVAIL;
goto bad_addr;
}
ill_held = B_TRUE;
} else if (connp->conn_outgoing_ill != NULL) {
/*
* For IPV6_BOUND_IF socket option,
* conn_outgoing_ill should be set
* already in TCP or UDP/ICMP.
*/
dst_ill = conn_get_held_ill(connp,
&connp->conn_outgoing_ill, &err);
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_bind_connected_v6:"
"no ill for bound_if\n"));
error = EADDRNOTAVAIL;
goto bad_addr;
}
ill_held = B_TRUE;
} else if (dst_ire->ire_stq != NULL) {
/* No need to hold ill here */
dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr;
} else {
/* No need to hold ill here */
dst_ill = dst_ire->ire_ipif->ipif_ill;
}
if (ip6_asp_can_lookup(ipst)) {
src_ipif = ipif_select_source_v6(dst_ill,
v6dst, B_FALSE, connp->conn_src_preferences,
zoneid);
ip6_asp_table_refrele(ipst);
if (src_ipif == NULL) {
pr_addr_dbg("ip_bind_connected_v6: "
"no usable source address for "
"connection to %s\n",
AF_INET6, v6dst);
error = EADDRNOTAVAIL;
goto bad_addr;
}
*v6src = src_ipif->ipif_v6lcl_addr;
} else {
error = EADDRNOTAVAIL;
goto bad_addr;
}
}
}
/*
* We do ire_route_lookup_v6() here (and not an interface lookup)
* as we assert that v6src should only come from an
* UP interface for hard binding.
*/
src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL,
NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst);
/* src_ire must be a local|loopback */
if (!IRE_IS_LOCAL(src_ire)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad "
"connected src %s\n", AF_INET6, v6src);
}
error = EADDRNOTAVAIL;
goto bad_addr;
}
/*
* If the source address is a loopback address, the
* destination had best be local or multicast.
* The transports that can't handle multicast will reject
* those addresses.
*/
if (src_ire->ire_type == IRE_LOOPBACK &&
!(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) ||
IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) {
ip1dbg(("ip_bind_connected_v6: bad connected loopback\n"));
error = -1;
goto bad_addr;
}
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
* to B_TRUE before, we should set it to B_FALSE, so that policy
* can change after the disconnect.
*/
connp->conn_policy_cached = B_FALSE;
/*
* The addresses have been verified. Initialize the conn
* before calling the policy as they expect the conns
* initialized.
*/
connp->conn_srcv6 = *v6src;
connp->conn_remv6 = *v6dst;
connp->conn_lport = lport;
connp->conn_fport = fport;
ASSERT(!(ipsec_policy_set && ire_requested));
if (ire_requested) {
iulp_t *ulp_info = NULL;
/*
* Note that sire will not be NULL if this is an off-link
* connection and there is not cache for that dest yet.
*
* XXX Because of an existing bug, if there are multiple
* default routes, the IRE returned now may not be the actual
* default route used (default routes are chosen in a
* round robin fashion). So if the metrics for different
* default routes are different, we may return the wrong