blob: dc5af5e7c1555251364248e04c39ef9309a615e4 [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Tunnel driver
* This module acts like a driver/DLPI provider as viewed from the top
* and a stream head/TPI user from the bottom
* Implements the logic for IP (IPv4 or IPv6) encapsulation
* within IP (IPv4 or IPv6)
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/strsubr.h>
#include <sys/strlog.h>
#include <sys/tihdr.h>
#include <sys/tiuser.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ethernet.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/netstack.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/vtrace.h>
#include <sys/isa_defs.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/route.h>
#include <sys/sockio.h>
#include <netinet/in.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/arp.h>
#include <inet/snmpcom.h>
#include <netinet/igmp_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <net/if_dl.h>
#include <inet/ip_if.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <inet/ipsec_impl.h>
#include <inet/ipdrop.h>
#include <inet/tun.h>
#include <inet/ipsec_impl.h>
#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/stat.h>
#include <inet/ip_ire.h> /* for ire_route_lookup_v6 */
static void tun_cancel_rec_evs(queue_t *, eventid_t *);
static void tun_bufcall_handler(void *);
static boolean_t tun_icmp_message_v4(queue_t *, ipha_t *, icmph_t *, mblk_t *);
static boolean_t tun_icmp_too_big_v4(queue_t *, ipha_t *, uint16_t, mblk_t *);
static boolean_t tun_icmp_message_v6(queue_t *, ip6_t *, icmp6_t *, uint8_t,
mblk_t *);
static boolean_t tun_icmp_too_big_v6(queue_t *, ip6_t *, uint32_t, uint8_t,
mblk_t *);
static void tun_sendokack(queue_t *, mblk_t *, t_uscalar_t);
static void tun_sendsdusize(queue_t *);
static void tun_senderrack(queue_t *, mblk_t *, t_uscalar_t, t_uscalar_t,
t_uscalar_t);
static int tun_fastpath(queue_t *, mblk_t *);
static int tun_ioctl(queue_t *, mblk_t *);
static void tun_timeout_handler(void *);
static int tun_rproc(queue_t *, mblk_t *);
static int tun_wproc_mdata(queue_t *, mblk_t *);
static int tun_wproc(queue_t *, mblk_t *);
static int tun_rdata(queue_t *, mblk_t *, mblk_t *, tun_t *, uint_t);
static int tun_rdata_v4(queue_t *, mblk_t *, mblk_t *, tun_t *);
static int tun_rdata_v6(queue_t *, mblk_t *, mblk_t *, tun_t *);
static int tun_set_sec_simple(tun_t *, ipsec_req_t *);
static void tun_send_ire_req(queue_t *);
static uint32_t tun_update_link_mtu(queue_t *, uint32_t, boolean_t);
static mblk_t *tun_realloc_mblk(queue_t *, mblk_t *, size_t, mblk_t *,
boolean_t);
static void tun_recover(queue_t *, mblk_t *, size_t);
static void tun_rem_ppa_list(tun_t *);
static void tun_rem_tun_byaddr_list(tun_t *);
static void tun_rput_icmp_err_v4(queue_t *, mblk_t *, mblk_t *);
static void icmp_ricmp_err_v4_v4(queue_t *, mblk_t *, mblk_t *);
static void icmp_ricmp_err_v6_v4(queue_t *, mblk_t *, mblk_t *);
static void icmp_ricmp_err_v4_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *);
static void icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *);
static void tun_rput_icmp_err_v6(queue_t *, mblk_t *, mblk_t *);
static int tun_rput_tpi(queue_t *, mblk_t *);
static int tun_send_bind_req(queue_t *);
static void tun_statinit(tun_stats_t *, char *, netstackid_t);
static int tun_stat_kstat_update(kstat_t *, int);
static void tun_wdata_v4(queue_t *, mblk_t *);
static void tun_wdata_v6(queue_t *, mblk_t *);
static char *tun_who(queue_t *, char *);
static int tun_wput_dlpi(queue_t *, mblk_t *);
static int tun_wputnext_v6(queue_t *, mblk_t *);
static int tun_wputnext_v4(queue_t *, mblk_t *);
static boolean_t tun_limit_value_v6(queue_t *, mblk_t *, ip6_t *, int *);
static void tun_freemsg_chain(mblk_t *, uint64_t *);
static void *tun_stack_init(netstackid_t, netstack_t *);
static void tun_stack_fini(netstackid_t, void *);
/* module's defined constants, globals and data structures */
#define IP "ip"
#define IP6 "ip6"
static major_t IP_MAJ;
static major_t IP6_MAJ;
#define TUN_DEBUG
#define TUN_LINK_EXTRA_OFF 32
#define IPV6V4_DEF_TTL 60
#define IPV6V4_DEF_ENCAP 60
#define TUN_WHO_BUF 60
#ifdef TUN_DEBUG
/* levels of debugging verbosity */
#define TUN0DBG 0x00 /* crucial */
#define TUN1DBG 0x01 /* informational */
#define TUN2DBG 0x02 /* verbose */
#define TUN3DBG 0x04 /* very verbose */
/*
* Global variable storing debugging level for all tunnels. By default
* all crucial messages will be printed. Value can be masked to exclusively
* print certain debug levels and not others.
*/
int8_t tun_debug = TUN0DBG;
#define TUN_LEVEL(dbg, lvl) ((dbg & lvl) == lvl)
#define tun0dbg(a) printf a
#define tun1dbg(a) if (TUN_LEVEL(tun_debug, TUN1DBG)) printf a
#define tun2dbg(a) if (TUN_LEVEL(tun_debug, TUN2DBG)) printf a
#define tun3dbg(a) if (TUN_LEVEL(tun_debug, TUN3DBG)) printf a
#else
#define tun0dbg(a) /* */
#define tun1dbg(a) /* */
#define tun2dbg(a) /* */
#define tun3dbg(a) /* */
#endif /* TUN_DEBUG */
#define TUN_RECOVER_WAIT (1*hz)
/* canned DL_INFO_ACK - adjusted based on tunnel type */
dl_info_ack_t infoack = {
DL_INFO_ACK, /* dl_primitive */
4196, /* dl_max_sdu */
0, /* dl_min_sdu */
0, /* dl_addr_length */
DL_IPV4, /* dl_mac_type */
0, /* dl_reserved */
DL_UNATTACHED, /* dl_current_state */
0, /* dl_sap_length */
DL_CLDLS, /* dl_service_mode */
0, /* dl_qos_length */
0, /* dl_qos_offset */
0, /* dl_qos_range_length */
0, /* dl_qos_range_offset */
DL_STYLE2, /* dl_provider_style */
0, /* dl_addr_offset */
DL_VERSION_2, /* dl_version */
0, /* dl_brdcast_addr_length */
0, /* dl_brdcst_addr_offset */
0 /* dl_grow */
};
/*
* canned DL_BIND_ACK - IP doesn't use any of this info.
*/
dl_bind_ack_t bindack = {
DL_BIND_ACK, /* dl_primitive */
0, /* dl_sap */
0, /* dl_addr_length */
0, /* dl_addr_offset */
0, /* dl_max_conind */
0 /* dl_xidtest_flg */
};
/*
* Canned IPv6 destination options header containing Tunnel
* Encapsulation Limit option.
*/
static struct tun_encap_limit tun_limit_init_upper_v4 = {
{ IPPROTO_ENCAP, 0 },
IP6OPT_TUNNEL_LIMIT,
1,
IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
IP6OPT_PADN,
1,
0
};
static struct tun_encap_limit tun_limit_init_upper_v6 = {
{ IPPROTO_IPV6, 0 },
IP6OPT_TUNNEL_LIMIT,
1,
IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
IP6OPT_PADN,
1,
0
};
static tun_stats_t *tun_add_stat(queue_t *);
static void tun_add_byaddr(tun_t *);
static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int,
netstack_t *);
/* Setable in /etc/system */
static boolean_t tun_do_fastpath = B_TRUE;
/* streams linkages */
static struct module_info info = {
TUN_MODID, /* module id number */
TUN_NAME, /* module name */
1, /* min packet size accepted */
INFPSZ, /* max packet size accepted */
65536, /* hi-water mark */
1024 /* lo-water mark */
};
static struct qinit tunrinit = {
(pfi_t)tun_rput, /* read side put procedure */
(pfi_t)tun_rsrv, /* read side service procedure */
tun_open, /* open procedure */
tun_close, /* close procedure */
NULL, /* for future use */
&info, /* module information structure */
NULL /* module statistics structure */
};
static struct qinit tunwinit = {
(pfi_t)tun_wput, /* write side put procedure */
(pfi_t)tun_wsrv, /* write side service procedure */
NULL,
NULL,
NULL,
&info,
NULL
};
struct streamtab tuninfo = {
&tunrinit, /* read side queue init */
&tunwinit, /* write side queue init */
NULL, /* mux read side init */
NULL /* mux write side init */
};
static struct fmodsw tun_fmodsw = {
TUN_NAME,
&tuninfo,
(D_MP | D_MTQPAIR | D_MTPUTSHARED)
};
static struct modlstrmod modlstrmod = {
&mod_strmodops,
"configured tunneling module",
&tun_fmodsw
};
static struct modlinkage modlinkage = {
MODREV_1,
&modlstrmod,
NULL
};
int
_init(void)
{
int rc;
IP_MAJ = ddi_name_to_major(IP);
IP6_MAJ = ddi_name_to_major(IP6);
/*
* We want to be informed each time a stack is created or
* destroyed in the kernel, so we can maintain the
* set of tun_stack_t's.
*/
netstack_register(NS_TUN, tun_stack_init, NULL, tun_stack_fini);
rc = mod_install(&modlinkage);
if (rc != 0)
netstack_unregister(NS_TUN);
return (rc);
}
int
_fini(void)
{
int error;
error = mod_remove(&modlinkage);
if (error == 0)
netstack_unregister(NS_TUN);
return (error);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
/*
* this module is meant to be pushed on an instance of IP and
* have an instance of IP pushed on top of it.
*/
/* ARGSUSED */
int
tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
tun_t *atp;
mblk_t *hello;
ipsec_info_t *ii;
netstack_t *ns;
zoneid_t zoneid;
if (q->q_ptr != NULL) {
/* re-open of an already open instance */
return (0);
}
if (sflag != MODOPEN) {
return (EINVAL);
}
tun1dbg(("tun_open\n"));
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
/*
* For exclusive stacks we set the zoneid to zero
* to make IP operate as if in the global zone.
*/
if (ns->netstack_stackid != GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
else
zoneid = crgetzoneid(credp);
hello = allocb(sizeof (ipsec_info_t), BPRI_HI);
if (hello == NULL) {
netstack_rele(ns);
return (ENOMEM);
}
/* allocate per-instance structure */
atp = kmem_zalloc(sizeof (tun_t), KM_SLEEP);
atp->tun_state = DL_UNATTACHED;
atp->tun_dev = *devp;
atp->tun_zoneid = zoneid;
atp->tun_netstack = ns;
atp->tun_cred = credp;
crhold(credp);
/*
* Based on the lower version of IP, initialize stuff that
* won't change
*/
if (getmajor(*devp) == IP_MAJ) {
ipha_t *ipha;
atp->tun_flags = TUN_L_V4 | TUN_HOP_LIM;
atp->tun_hop_limit = IPV6V4_DEF_TTL;
/*
* The tunnel MTU is recalculated when we know more
* about the tunnel destination.
*/
atp->tun_mtu = IP_MAXPACKET - sizeof (ipha_t);
ipha = &atp->tun_ipha;
ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
ipha->ipha_type_of_service = 0;
ipha->ipha_ident = 0; /* to be filled in by IP */
ipha->ipha_fragment_offset_and_flags = htons(IPH_DF);
ipha->ipha_ttl = atp->tun_hop_limit;
ipha->ipha_hdr_checksum = 0; /* to be filled in by IP */
} else if (getmajor(*devp) == IP6_MAJ) {
atp->tun_flags = TUN_L_V6 | TUN_HOP_LIM | TUN_ENCAP_LIM;
atp->tun_hop_limit = IPV6_DEFAULT_HOPS;
atp->tun_encap_lim = IPV6_DEFAULT_ENCAPLIMIT;
atp->tun_mtu = IP_MAXPACKET - sizeof (ip6_t) -
IPV6_TUN_ENCAP_OPT_LEN;
atp->tun_ip6h.ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
atp->tun_ip6h.ip6_hops = IPV6_DEFAULT_HOPS;
} else {
netstack_rele(ns);
crfree(credp);
kmem_free(atp, sizeof (tun_t));
return (ENXIO);
}
atp->tun_extra_offset = TUN_LINK_EXTRA_OFF;
mutex_init(&atp->tun_lock, NULL, MUTEX_DEFAULT, NULL);
/*
* If this is the automatic tunneling module, atun, verify that the
* lower protocol is IPv4 and set TUN_AUTOMATIC. Since we don't do
* automatic tunneling over IPv6, trying to run over IPv6 is an error,
* so free memory and return an error.
*/
if (q->q_qinfo->qi_minfo->mi_idnum == ATUN_MODID) {
if (atp->tun_flags & TUN_L_V4) {
atp->tun_flags |= TUN_AUTOMATIC;
atp->tun_mtu = ATUN_MTU;
} else {
/* Error. */
netstack_rele(ns);
crfree(credp);
kmem_free(atp, sizeof (tun_t));
return (ENXIO);
}
} else if (q->q_qinfo->qi_minfo->mi_idnum == TUN6TO4_MODID) {
/*
* Set 6to4 flag if this is the 6to4tun module and make
* the same checks mentioned above.
*/
if (atp->tun_flags & TUN_L_V4) {
atp->tun_flags |= TUN_6TO4;
atp->tun_mtu = ATUN_MTU;
} else {
/* Error. */
netstack_rele(ns);
crfree(credp);
kmem_free(atp, sizeof (tun_t));
return (ENXIO);
}
}
q->q_ptr = WR(q)->q_ptr = atp;
atp->tun_wq = WR(q);
mutex_enter(&ns->netstack_tun->tuns_global_lock);
tun_add_byaddr(atp);
mutex_exit(&ns->netstack_tun->tuns_global_lock);
ii = (ipsec_info_t *)hello->b_rptr;
hello->b_wptr = hello->b_rptr + sizeof (*ii);
hello->b_datap->db_type = M_CTL;
ii->ipsec_info_type = TUN_HELLO;
ii->ipsec_info_len = sizeof (*ii);
qprocson(q);
putnext(WR(q), hello);
return (0);
}
/* ARGSUSED */
int
tun_close(queue_t *q, int flag, cred_t *cred_p)
{
tun_t *atp = (tun_t *)q->q_ptr;
netstack_t *ns;
tun_stack_t *tuns;
ASSERT(atp != NULL);
ns = atp->tun_netstack;
tuns = ns->netstack_tun;
/* Cancel outstanding qtimeouts() or qbufcalls() */
tun_cancel_rec_evs(q, &atp->tun_events);
qprocsoff(q);
crfree(atp->tun_cred);
atp->tun_cred = NULL;
/* NOTE: tun_rem_ppa_list() may unlink tun_itp from its AVL tree. */
if (atp->tun_stats != NULL)
tun_rem_ppa_list(atp);
if (atp->tun_itp != NULL) {
/* In brackets because of ITP_REFRELE's brackets. */
ITP_REFRELE(atp->tun_itp, ns);
}
netstack_rele(ns);
mutex_destroy(&atp->tun_lock);
/* remove tun_t from global list */
mutex_enter(&tuns->tuns_global_lock);
tun_rem_tun_byaddr_list(atp);
mutex_exit(&tuns->tuns_global_lock);
/* free per-instance struct */
kmem_free(atp, sizeof (tun_t));
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
/*
* Cancel bufcall and timer requests
* Don't need to hold lock. protected by perimeter
*/
static void
tun_cancel_rec_evs(queue_t *q, eventid_t *evs)
{
if (evs->ev_rbufcid != 0) {
qunbufcall(RD(q), evs->ev_rbufcid);
evs->ev_rbufcid = 0;
}
if (evs->ev_wbufcid != 0) {
qunbufcall(WR(q), evs->ev_wbufcid);
evs->ev_wbufcid = 0;
}
if (evs->ev_rtimoutid != 0) {
(void) quntimeout(RD(q), evs->ev_rtimoutid);
evs->ev_rtimoutid = 0;
}
if (evs->ev_wtimoutid != 0) {
(void) quntimeout(WR(q), evs->ev_wtimoutid);
evs->ev_wtimoutid = 0;
}
}
/*
* Called by bufcall() when memory becomes available
* Don't need to hold lock. protected by perimeter
*/
static void
tun_bufcall_handler(void *arg)
{
queue_t *q = arg;
tun_t *atp = (tun_t *)q->q_ptr;
eventid_t *evs;
ASSERT(atp);
evs = &atp->tun_events;
if ((q->q_flag & QREADR) != 0) {
ASSERT(evs->ev_rbufcid);
evs->ev_rbufcid = 0;
} else {
ASSERT(evs->ev_wbufcid);
evs->ev_wbufcid = 0;
}
enableok(q);
qenable(q);
}
/*
* Called by timeout (if we couldn't do a bufcall)
* Don't need to hold lock. protected by perimeter
*/
static void
tun_timeout_handler(void *arg)
{
queue_t *q = arg;
tun_t *atp = (tun_t *)q->q_ptr;
eventid_t *evs;
ASSERT(atp);
evs = &atp->tun_events;
if (q->q_flag & QREADR) {
ASSERT(evs->ev_rtimoutid);
evs->ev_rtimoutid = 0;
} else {
ASSERT(evs->ev_wtimoutid);
evs->ev_wtimoutid = 0;
}
enableok(q);
qenable(q);
}
/*
* This routine is called when a message buffer can not
* be allocated. M_PCPROT message are converted to M_PROTO, but
* other than that, the mblk passed in must not be a high
* priority message (putting a hight priority message back on
* the queue is a bad idea)
* Side effect: the queue is disabled
* (timeout or bufcall handler will re-enable the queue)
* tun_cancel_rec_evs() must be called in close to cancel all
* outstanding requests.
*/
static void
tun_recover(queue_t *q, mblk_t *mp, size_t size)
{
tun_t *atp = (tun_t *)q->q_ptr;
timeout_id_t tid;
bufcall_id_t bid;
eventid_t *evs = &atp->tun_events;
ASSERT(mp != NULL);
/*
* To avoid re-enabling the queue, change the high priority
* M_PCPROTO message to a M_PROTO before putting it on the queue
*/
if (mp->b_datap->db_type == M_PCPROTO)
mp->b_datap->db_type = M_PROTO;
ASSERT(mp->b_datap->db_type < QPCTL);
(void) putbq(q, mp);
/*
* Make sure there is at most one outstanding request per queue.
*/
if (q->q_flag & QREADR) {
if (evs->ev_rtimoutid || evs->ev_rbufcid)
return;
} else {
if (evs->ev_wtimoutid || evs->ev_wbufcid)
return;
}
noenable(q);
/*
* locking is needed here because this routine may be called
* with two puts() running
*/
mutex_enter(&atp->tun_lock);
if (!(bid = qbufcall(q, size, BPRI_MED, tun_bufcall_handler, q))) {
tid = qtimeout(q, tun_timeout_handler, q, TUN_RECOVER_WAIT);
if (q->q_flag & QREADR)
evs->ev_rtimoutid = tid;
else
evs->ev_wtimoutid = tid;
} else {
if (q->q_flag & QREADR)
evs->ev_rbufcid = bid;
else
evs->ev_wbufcid = bid;
}
mutex_exit(&atp->tun_lock);
}
/*
* tun_realloc_mblk(q, mp, size, orig_mp, copy)
*
* q - pointer to a queue_t, must not be NULL
* mp - pointer to an mblk to copy, can be NULL
* size - Number of bytes being (re)allocated
* orig_mp - pointer to the original mblk_t which will be passed to
* tun_recover if the memory (re)allocation fails. This is done
* so that the message can be rescheduled on the queue.
* orig_mp must be NULL if the original mblk_t is a high priority
* message of type other then M_PCPROTO.
* copy - a boolean to specify wheater the contents of mp should be copied
* into the new mblk_t returned by this function.
*
* note: this routine will adjust the b_rptr and b_wptr of the
* mblk. Returns an mblk able to hold the requested size or
* NULL if allocation failed. If copy is true, original
* contents, if any, will be copied to new mblk
*/
static mblk_t *
tun_realloc_mblk(queue_t *q, mblk_t *mp, size_t size, mblk_t *orig_mp,
boolean_t copy)
{
/*
* If we are passed in an mblk.. check to make sure that
* it is big enough and we are the only users of the mblk
* If not, then try and allocate one
*/
if (mp == NULL || mp->b_datap->db_lim - mp->b_datap->db_base < size ||
mp->b_datap->db_ref > 1) {
size_t asize;
mblk_t *newmp;
/* allocate at least as much as we had -- don't shrink */
if (mp != NULL) {
asize = MAX(size,
mp->b_datap->db_lim - mp->b_datap->db_base);
} else {
asize = size;
}
newmp = allocb(asize, BPRI_HI);
if (newmp == NULL) {
/*
* Reschedule the mblk via bufcall or timeout
* if orig_mp is non-NULL
*/
if (orig_mp != NULL) {
tun_recover(q, orig_mp, asize);
}
tun1dbg(("tun_realloc_mblk: couldn't allocate" \
" dl_ok_ack mblk\n"));
return (NULL);
}
if (mp != NULL) {
if (copy)
bcopy(mp->b_rptr, newmp->b_rptr,
mp->b_wptr - mp->b_rptr);
newmp->b_datap->db_type = mp->b_datap->db_type;
freemsg(mp);
}
mp = newmp;
} else {
if (mp->b_rptr != mp->b_datap->db_base) {
if (copy)
bcopy(mp->b_rptr, mp->b_datap->db_base,
mp->b_wptr - mp->b_rptr);
mp->b_rptr = mp->b_datap->db_base;
}
}
mp->b_wptr = mp->b_rptr + size;
return (mp);
}
/* send a DL_OK_ACK back upstream */
static void
tun_sendokack(queue_t *q, mblk_t *mp, t_uscalar_t prim)
{
dl_ok_ack_t *dlok;
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
dlok = (dl_ok_ack_t *)mp->b_rptr;
dlok->dl_primitive = DL_OK_ACK;
dlok->dl_correct_primitive = prim;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
}
/*
* Send a DL_NOTIFY_IND message with DL_NOTE_SDU_SIZE up to notify IP of a
* link MTU change.
*/
static void
tun_sendsdusize(queue_t *q)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *mp = NULL;
dl_notify_ind_t *notify;
if (!(atp->tun_notifications & DL_NOTE_SDU_SIZE))
return;
if ((mp = tun_realloc_mblk(q, NULL, DL_NOTIFY_IND_SIZE, NULL,
B_FALSE)) == NULL) {
return;
}
mp->b_datap->db_type = M_PROTO;
notify = (dl_notify_ind_t *)mp->b_rptr;
notify->dl_primitive = DL_NOTIFY_IND;
notify->dl_notification = DL_NOTE_SDU_SIZE;
notify->dl_data = atp->tun_mtu;
notify->dl_addr_length = 0;
notify->dl_addr_offset = 0;
tun1dbg(("tun_sendsdusize: notifying ip of new mtu: %d", atp->tun_mtu));
/*
* We send this notification to the upper IP instance who is using
* us as a device.
*/
putnext(RD(q), mp);
}
/* send a DL_ERROR_ACK back upstream */
static void
tun_senderrack(queue_t *q, mblk_t *mp, t_uscalar_t prim, t_uscalar_t dl_err,
t_uscalar_t error)
{
dl_error_ack_t *dl_err_ack;
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_error_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
dl_err_ack = (dl_error_ack_t *)mp->b_rptr;
dl_err_ack->dl_error_primitive = prim;
dl_err_ack->dl_primitive = DL_ERROR_ACK;
dl_err_ack->dl_errno = dl_err;
dl_err_ack->dl_unix_errno = error;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
}
/*
* Free all messages in an mblk chain and optionally collect
* byte-counter stats. Caller responsible for per-packet stats
*/
static void
tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount)
{
mblk_t *mpnext;
while (mp != NULL) {
ASSERT(mp->b_prev == NULL);
mpnext = mp->b_next;
mp->b_next = NULL;
if (bytecount != NULL)
atomic_add_64(bytecount, (int64_t)msgdsize(mp));
freemsg(mp);
mp = mpnext;
}
}
/*
* Send all messages in a chain of mblk chains and optionally collect
* byte-counter stats. Caller responsible for per-packet stats, and insuring
* mp is always non-NULL.
*
* This is a macro so we can save stack. Assume the caller function
* has local-variable "nmp" as a placeholder. Define two versions, one with
* byte-counting stats and one without.
*/
#define TUN_PUTMSG_CHAIN_STATS(q, mp, nmp, bytecount) \
(nmp) = NULL; \
ASSERT((mp) != NULL); \
do { \
if ((nmp) != NULL) \
putnext(q, (nmp)); \
ASSERT((mp)->b_prev == NULL); \
(nmp) = (mp); \
(mp) = (mp)->b_next; \
(nmp)->b_next = NULL; \
atomic_add_64(bytecount, (int64_t)msgdsize(nmp)); \
} while ((mp) != NULL); \
\
putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */
#define TUN_PUTMSG_CHAIN(q, mp, nmp) \
(nmp) = NULL; \
ASSERT((mp) != NULL); \
do { \
if ((nmp) != NULL) \
putnext(q, (nmp)); \
ASSERT((mp)->b_prev == NULL); \
(nmp) = (mp); \
(mp) = (mp)->b_next; \
(nmp)->b_next = NULL; \
} while ((mp) != NULL); \
\
putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */
/*
* Macro that not only checks tun_itp, but also sees if one got loaded
* via ipsecconf(1m)/PF_POLICY behind our backs. Note the sleazy update of
* (tun)->tun_itp_gen so we don't lose races with other possible updates via
* PF_POLICY.
*/
#define tun_policy_present(tun, ns, ipss) \
(((tun)->tun_itp != NULL) || \
(((tun)->tun_itp_gen < ipss->ipsec_tunnel_policy_gen) && \
((tun)->tun_itp_gen = ipss->ipsec_tunnel_policy_gen) && \
(((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname, ns)) \
!= NULL)))
/*
* Search tuns_byaddr_list for occurrence of tun_t with matching
* inner addresses. This function does not take into account
* prefixes. Possibly we could generalize this function in the
* future with V6_MASK_EQ() and pass in an all 1's prefix for IP
* address matches.
* Returns NULL on no match.
* This function is not directly called - it's assigned into itp_get_byaddr().
*/
static ipsec_tun_pol_t *
itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af, netstack_t *ns)
{
tun_t *tun_list;
uint_t index;
in6_addr_t lmapped, fmapped, *laddr, *faddr;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tun_stack_t *tuns = ns->netstack_tun;
if (af == AF_INET) {
laddr = &lmapped;
faddr = &fmapped;
IN6_INADDR_TO_V4MAPPED((struct in_addr *)lin, laddr);
IN6_INADDR_TO_V4MAPPED((struct in_addr *)fin, faddr);
} else {
laddr = (in6_addr_t *)lin;
faddr = (in6_addr_t *)fin;
}
index = TUN_BYADDR_LIST_HASH(*faddr);
/*
* it's ok to grab global lock while holding tun_lock/perimeter
*/
mutex_enter(&tuns->tuns_global_lock);
/*
* walk through list of tun_t looking for a match of
* inner addresses. Addresses are inserted with
* IN6_IPADDR_TO_V4MAPPED(), so v6 matching works for
* all cases.
*/
for (tun_list = tuns->tuns_byaddr_list[index]; tun_list;
tun_list = tun_list->tun_next) {
if (IN6_ARE_ADDR_EQUAL(&tun_list->tun_laddr, laddr) &&
IN6_ARE_ADDR_EQUAL(&tun_list->tun_faddr, faddr)) {
ipsec_tun_pol_t *itp;
if (!tun_policy_present(tun_list, ns, ipss)) {
tun1dbg(("itp_get_byaddr: No IPsec policy on "
"matching tun_t instance %p/%s\n",
(void *)tun_list, tun_list->tun_lifname));
continue;
}
tun1dbg(("itp_get_byaddr: Found matching tun_t %p with "
"IPsec policy\n", (void *)tun_list));
mutex_enter(&tun_list->tun_itp->itp_lock);
itp = tun_list->tun_itp;
mutex_exit(&tuns->tuns_global_lock);
ITP_REFHOLD(itp);
mutex_exit(&itp->itp_lock);
tun1dbg(("itp_get_byaddr: Found itp %p \n",
(void *)itp));
return (itp);
}
}
/* didn't find one, return zilch */
tun1dbg(("itp_get_byaddr: No matching tunnel instances with policy\n"));
mutex_exit(&tuns->tuns_global_lock);
return (NULL);
}
/*
* Search tuns_byaddr_list for occurrence of tun_t, same upper and lower stream,
* and same type (6to4 vs automatic vs configured)
* If none is found, insert this tun entry.
*/
static void
tun_add_byaddr(tun_t *atp)
{
tun_t *tun_list;
t_uscalar_t ppa = atp->tun_ppa;
uint_t mask = atp->tun_flags & (TUN_LOWER_MASK | TUN_UPPER_MASK);
uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4));
uint_t index = TUN_BYADDR_LIST_HASH(atp->tun_faddr);
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
tun1dbg(("tun_add_byaddr: index = %d\n", index));
ASSERT(MUTEX_HELD(&tuns->tuns_global_lock));
ASSERT(atp->tun_next == NULL);
/*
* walk through list of tun_t looking for a match of
* ppa, same upper and lower stream and same tunnel type
* (automatic or configured).
* There shouldn't be all that many tunnels, so a sequential
* search of the bucket should be fine.
*/
for (tun_list = tuns->tuns_byaddr_list[index]; tun_list;
tun_list = tun_list->tun_next) {
if (tun_list->tun_ppa == ppa &&
((tun_list->tun_flags & (TUN_LOWER_MASK |
TUN_UPPER_MASK)) == mask) &&
((tun_list->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) ==
tun_type)) {
tun1dbg(("tun_add_byaddr: tun 0x%p Found ppa %d " \
"tun_stats 0x%p\n", (void *)atp, ppa,
(void *)tun_list));
tun1dbg(("tun_add_byaddr: Nothing to do."));
/* Collision, do nothing. */
return;
}
}
/* didn't find one, throw it in the global list */
atp->tun_next = tuns->tuns_byaddr_list[index];
atp->tun_ptpn = &(tuns->tuns_byaddr_list[index]);
if (tuns->tuns_byaddr_list[index] != NULL)
tuns->tuns_byaddr_list[index]->tun_ptpn = &(atp->tun_next);
tuns->tuns_byaddr_list[index] = atp;
}
/*
* Search tuns_ppa_list for occurrence of tun_ppa, same lower stream,
* and same type (6to4 vs automatic vs configured)
* If none is found, insert this tun entry and create a new kstat for
* the entry.
* This is needed so that multiple tunnels with the same interface
* name (e.g. ip.tun0 under IPv4 and ip.tun0 under IPv6) can share the
* same kstats. (they share the same tun_stat and kstat)
* Don't need to hold tun_lock if we are coming is as qwriter()
*/
static tun_stats_t *
tun_add_stat(queue_t *q)
{
tun_t *atp = (tun_t *)q->q_ptr;
tun_stats_t *tun_list;
tun_stats_t *tun_stat;
t_uscalar_t ppa = atp->tun_ppa;
uint_t lower = atp->tun_flags & TUN_LOWER_MASK;
uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4));
uint_t index = TUN_LIST_HASH(ppa);
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
ASSERT(atp->tun_stats == NULL);
ASSERT(atp->tun_kstat_next == NULL);
/*
* it's ok to grab global lock while holding tun_lock/perimeter
*/
mutex_enter(&tuns->tuns_global_lock);
/*
* walk through list of tun_stats looking for a match of
* ppa, same lower stream and same tunnel type (automatic
* or configured
* There shouldn't be all that many tunnels, so a sequential
* search should be fine
* XXX - this may change if tunnels get ever get created on the fly
*/
for (tun_list = tuns->tuns_ppa_list[index]; tun_list;
tun_list = tun_list->ts_next) {
if (tun_list->ts_ppa == ppa &&
tun_list->ts_lower == lower &&
tun_list->ts_type == tun_type) {
tun1dbg(("tun_add_stat: tun 0x%p Found ppa %d " \
"tun_stats 0x%p\n", (void *)atp, ppa,
(void *)tun_list));
mutex_enter(&tun_list->ts_lock);
mutex_exit(&tuns->tuns_global_lock);
ASSERT(tun_list->ts_refcnt > 0);
tun_list->ts_refcnt++;
ASSERT(atp->tun_kstat_next == NULL);
ASSERT(atp != tun_list->ts_atp);
/*
* add this tunnel instance to head of list
* of tunnels referencing this kstat structure
*/
atp->tun_kstat_next = tun_list->ts_atp;
tun_list->ts_atp = atp;
atp->tun_stats = tun_list;
mutex_exit(&tun_list->ts_lock);
/*
* Check for IPsec tunnel policy pointer, if it hasn't
* been set already. If we call get_tunnel_policy()
* and return NULL, there's none configured.
*/
if (atp->tun_lifname[0] != '\0' &&
atp->tun_itp == NULL) {
atp->tun_itp =
get_tunnel_policy(atp->tun_lifname,
atp->tun_netstack);
}
return (tun_list);
}
}
/* didn't find one, allocate a new one */
tun_stat = kmem_zalloc(sizeof (tun_stats_t), KM_NOSLEEP);
if (tun_stat != NULL) {
mutex_init(&tun_stat->ts_lock, NULL, MUTEX_DEFAULT,
NULL);
tun1dbg(("tun_add_stat: New ppa %d tun_stat 0x%p\n", ppa,
(void *)tun_stat));
tun_stat->ts_refcnt = 1;
tun_stat->ts_lower = lower;
tun_stat->ts_type = tun_type;
tun_stat->ts_ppa = ppa;
tun_stat->ts_next = tuns->tuns_ppa_list[index];
tuns->tuns_ppa_list[index] = tun_stat;
tun_stat->ts_atp = atp;
atp->tun_kstat_next = NULL;
atp->tun_stats = tun_stat;
mutex_exit(&tuns->tuns_global_lock);
tun_statinit(tun_stat, q->q_qinfo->qi_minfo->mi_idname,
atp->tun_netstack->netstack_stackid);
} else {
mutex_exit(&tuns->tuns_global_lock);
}
return (tun_stat);
}
/*
* remove tun from tuns_byaddr_list
* called either holding tun_lock or in perimeter
*/
static void
tun_rem_tun_byaddr_list(tun_t *atp)
{
ASSERT(MUTEX_HELD(&atp->tun_netstack->netstack_tun->tuns_global_lock));
if (atp->tun_ptpn == NULL) {
/*
* If we reach here, it means that this tun_t was passed into
* tun_add_byaddr() and hit a collision when trying to insert
* itself into a list. (See "Collision, do nothing"
* earlier.) Therefore this tun_t needs no removal.
*/
goto bail;
}
/*
* remove tunnel instance from list of tun_t
*/
*(atp->tun_ptpn) = atp->tun_next;
if (atp->tun_next != NULL) {
atp->tun_next->tun_ptpn = atp->tun_ptpn;
atp->tun_next = NULL;
}
atp->tun_ptpn = NULL;
bail:
ASSERT(atp->tun_next == NULL);
}
/*
* remove tun from tuns_ppa_list
* called either holding tun_lock or in perimeter
*/
static void
tun_rem_ppa_list(tun_t *atp)
{
uint_t index = TUN_LIST_HASH(atp->tun_ppa);
tun_stats_t *tun_stat = atp->tun_stats;
tun_stats_t **tun_list;
tun_t **at_list;
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
if (tun_stat == NULL)
return;
ASSERT(atp->tun_ppa == tun_stat->ts_ppa);
mutex_enter(&tuns->tuns_global_lock);
mutex_enter(&tun_stat->ts_lock);
atp->tun_stats = NULL;
tun_stat->ts_refcnt--;
/*
* If this is the last instance, delete the tun_stat AND unlink the
* ipsec_tun_pol_t from the AVL tree.
*/
if (tun_stat->ts_refcnt == 0) {
kstat_t *tksp;
tun1dbg(("tun_rem_ppa_list: tun 0x%p Last ref ppa %d tun_stat" \
" 0x%p\n", (void *)atp, tun_stat->ts_ppa,
(void *)tun_stat));
if (atp->tun_itp != NULL)
itp_unlink(atp->tun_itp, atp->tun_netstack);
ASSERT(atp->tun_kstat_next == NULL);
for (tun_list = &tuns->tuns_ppa_list[index]; *tun_list;
tun_list = &(*tun_list)->ts_next) {
if (tun_stat == *tun_list) {
*tun_list = tun_stat->ts_next;
tun_stat->ts_next = NULL;
break;
}
}
mutex_exit(&tuns->tuns_global_lock);
tksp = tun_stat->ts_ksp;
tun_stat->ts_ksp = NULL;
mutex_exit(&tun_stat->ts_lock);
kstat_delete_netstack(tksp,
atp->tun_netstack->netstack_stackid);
mutex_destroy(&tun_stat->ts_lock);
kmem_free(tun_stat, sizeof (tun_stats_t));
return;
}
mutex_exit(&tuns->tuns_global_lock);
tun1dbg(("tun_rem_ppa_list: tun 0x%p Removing ref ppa %d tun_stat " \
"0x%p\n", (void *)atp, tun_stat->ts_ppa, (void *)tun_stat));
ASSERT(tun_stat->ts_atp->tun_kstat_next != NULL);
/*
* remove tunnel instance from list of tunnels referencing
* this kstat. List should be short, so we just search
* sequentially
*/
for (at_list = &tun_stat->ts_atp; *at_list;
at_list = &(*at_list)->tun_kstat_next) {
if (atp == *at_list) {
*at_list = atp->tun_kstat_next;
atp->tun_kstat_next = NULL;
break;
}
}
ASSERT(tun_stat->ts_atp != NULL);
ASSERT(atp->tun_kstat_next == NULL);
mutex_exit(&tun_stat->ts_lock);
}
/*
* handle all non-unitdata DLPI requests from above
* called as qwriter()
*/
static void
tun_wput_dlpi_other(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
uint_t lvers;
t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr);
t_uscalar_t dl_err = DL_UNSUPPORTED;
t_uscalar_t dl_errno = 0;
switch (prim) {
case DL_INFO_REQ: {
dl_info_ack_t *dinfo;
tun1dbg(("tun_wput_dlpi_other: got DL_INFO_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_info_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
mp->b_datap->db_type = M_PCPROTO;
/* send DL_INFO_ACK back up */
dinfo = (dl_info_ack_t *)mp->b_rptr;
*dinfo = infoack;
dinfo->dl_current_state = atp->tun_state;
dinfo->dl_max_sdu = atp->tun_mtu;
/* dl_mac_type is set to DL_IPV4 by default. */
if (atp->tun_flags & TUN_L_V6)
dinfo->dl_mac_type = DL_IPV6;
/*
* We set the address length to non-zero so that
* automatic tunnels will not have multicast or
* point to point set.
* Someday IPv6 needs to support multicast over automatic
* tunnels
* 6to4 tunnels should behave the same as automatic tunnels
*/
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
/*
* set length to size of ip address so that
* ip_newroute will generate dl_unitdata_req for
* us with gateway or dest filed in. (i.e.
* might as well have ip do something useful)
*/
dinfo->dl_addr_length = IPV6_ADDR_LEN;
} else {
dinfo->dl_addr_length = 0;
}
qreply(q, mp);
return;
}
case DL_ATTACH_REQ: {
dl_attach_req_t *dla;
tun1dbg(("tun_wput_dlpi_other: got DL_ATTACH_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_TRUE)) == NULL) {
return;
}
dla = (dl_attach_req_t *)mp->b_rptr;
if (atp->tun_state != DL_UNATTACHED) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: "
"DL_ATTACH_REQ state not DL_UNATTACHED (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_ppa = dla->dl_ppa;
/*
* get (possibly shared) kstat structure
*/
if (tun_add_stat(q) == NULL) {
ASSERT(atp->tun_stats == NULL);
dl_err = DL_SYSERR;
dl_errno = ENOMEM;
break;
}
atp->tun_state = DL_UNBOUND;
tun_sendokack(q, mp, prim);
return;
}
case DL_DETACH_REQ:
tun1dbg(("tun_wput_dlpi_other: got DL_DETACH_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
if (atp->tun_state != DL_UNBOUND) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_DETACH_REQ state not DL_UNBOUND (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_state = DL_UNATTACHED;
/*
* don't need to hold tun_lock
* since this is really a single thread operation
* for this instance
*/
if (atp->tun_stats) {
tun_rem_ppa_list(atp);
tun1dbg(("tun_wput_dlpi_other: deleting kstat"));
}
tun_sendokack(q, mp, prim);
return;
case DL_BIND_REQ: {
dl_bind_req_t *bind_req;
t_uscalar_t dl_sap = 0;
tun1dbg(("tun_wput_dlpi_other: got DL_BIND_REQ\n"));
if (atp->tun_state != DL_UNBOUND) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ state not DL_UNBOUND (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_state = DL_IDLE;
bind_req = (dl_bind_req_t *)mp->b_rptr;
dl_sap = bind_req->dl_sap;
ASSERT(bind_req->dl_sap == IP_DL_SAP ||
bind_req->dl_sap == IP6_DL_SAP);
lvers = atp->tun_flags & TUN_LOWER_MASK;
if (dl_sap == IP_DL_SAP) {
if ((atp->tun_flags & TUN_U_V6) != 0) {
dl_err = DL_BOUND;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ upper TUN_U_V6 (0x%x)\n",
atp->tun_flags));
break;
}
if ((atp->tun_flags & TUN_AUTOMATIC) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ for IPv4 atun (0x%x)\n",
atp->tun_flags));
break;
}
if ((atp->tun_flags & TUN_6TO4) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ for 6to4 tunnel (0x%x)\n",
atp->tun_flags));
break;
}
atp->tun_flags |= TUN_U_V4;
if (lvers == TUN_L_V4) {
atp->tun_ipha.ipha_protocol = IPPROTO_ENCAP;
} else {
ASSERT(lvers == TUN_L_V6);
/* Adjust headers. */
if (atp->tun_encap_lim >= 0) {
atp->tun_ip6h.ip6_nxt =
IPPROTO_DSTOPTS;
atp->tun_telopt =
tun_limit_init_upper_v4;
atp->tun_telopt.tel_telopt.
ip6ot_encap_limit =
atp->tun_encap_lim;
} else {
atp->tun_ip6h.ip6_nxt = IPPROTO_ENCAP;
}
}
} else if (dl_sap == IP6_DL_SAP) {
if ((atp->tun_flags & TUN_U_V4) != 0) {
dl_err = DL_BOUND;
tun0dbg(("tun_wput_dlpi_other: "
"DL_BIND_REQ upper TUN_U_V4 (0x%x)\n",
atp->tun_flags));
break;
}
atp->tun_flags |= TUN_U_V6;
if (lvers == TUN_L_V4) {
atp->tun_ipha.ipha_protocol = IPPROTO_IPV6;
} else {
ASSERT(lvers == TUN_L_V6);
if (atp->tun_encap_lim >= 0) {
atp->tun_ip6h.ip6_nxt =
IPPROTO_DSTOPTS;
atp->tun_telopt =
tun_limit_init_upper_v6;
atp->tun_telopt.tel_telopt.
ip6ot_encap_limit =
atp->tun_encap_lim;
} else {
atp->tun_ip6h.ip6_nxt = IPPROTO_IPV6;
}
}
} else {
atp->tun_state = DL_UNBOUND;
break;
}
/*
* Send DL_BIND_ACK, which is the same size as the
* request, so we can re-use the mblk.
*/
*(dl_bind_ack_t *)mp->b_rptr = bindack;
((dl_bind_ack_t *)mp->b_rptr)->dl_sap = dl_sap;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
return;
}
case DL_UNBIND_REQ:
tun1dbg(("tun_wput_dlpi_other: got DL_UNBIND_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
if (atp->tun_state != DL_IDLE) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_UNBIND_REQ state not DL_IDLE (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_state = DL_UNBOUND;
/* Send a DL_OK_ACK. */
tun_sendokack(q, mp, prim);
return;
case DL_PHYS_ADDR_REQ: {
dl_phys_addr_ack_t *dpa;
tun1dbg(("tun_wput_dlpi_other: got DL_PHYS_ADDR_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_phys_addr_ack_t),
mp, B_FALSE)) == NULL) {
return;
}
dpa = (dl_phys_addr_ack_t *)mp->b_rptr;
dpa->dl_primitive = DL_PHYS_ADDR_ACK;
/*
* dl_addr_length must match info ack
*/
if (atp->tun_flags & TUN_AUTOMATIC) {
if ((atp->tun_flags & TUN_U_V4) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_PHYS_ADDR_REQ for IPv4 atun\n"));
break;
} else {
dpa->dl_addr_length = IPV6_ADDR_LEN;
}
} else if (atp->tun_flags & TUN_6TO4) {
if ((atp->tun_flags & TUN_U_V4) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_PHYS_ADDR_REQ for 6to4 tunnel\n"));
break;
} else {
dpa->dl_addr_length = IPV6_ADDR_LEN;
}
} else {
dpa->dl_addr_length = 0;
}
dpa->dl_addr_offset = 0;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
return;
}
case DL_SUBS_BIND_REQ:
case DL_ENABMULTI_REQ:
case DL_DISABMULTI_REQ:
case DL_PROMISCON_REQ:
case DL_PROMISCOFF_REQ:
case DL_AGGR_REQ:
case DL_UNAGGR_REQ:
case DL_UDQOS_REQ:
case DL_CONNECT_REQ:
case DL_TOKEN_REQ:
case DL_DISCONNECT_REQ:
case DL_RESET_REQ:
case DL_DATA_ACK_REQ:
case DL_REPLY_REQ:
case DL_REPLY_UPDATE_REQ:
case DL_XID_REQ:
case DL_TEST_REQ:
case DL_SET_PHYS_ADDR_REQ:
case DL_GET_STATISTICS_REQ:
case DL_CAPABILITY_REQ:
case DL_CONTROL_REQ:
/* unsupported command */
break;
default:
/* unknown command */
tun0dbg(("tun_wput_dlpi_other: unknown DLPI message type: " \
"%d\n", prim));
dl_err = DL_BADPRIM;
}
tun_senderrack(q, mp, prim, dl_err, dl_errno);
}
/*
* handle all DLPI requests from above
*/
static int
tun_wput_dlpi(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *mp1;
int error = 0;
t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr);
switch (prim) {
case DL_UNITDATA_REQ:
if (atp->tun_state != DL_IDLE) {
break;
}
if (!canputnext(q)) {
atomic_add_32(&atp->tun_xmtretry, 1);
(void) putbq(q, mp);
return (ENOMEM); /* to get service proc to stop */
}
/* we don't use any of the data in the DLPI header */
mp1 = mp->b_cont;
freeb(mp);
if (mp1 == NULL) {
break;
}
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
tun_wdata_v4(q, mp1);
break;
case TUN_U_V6:
tun_wdata_v6(q, mp1);
break;
default:
atomic_add_32(&atp->tun_OutErrors, 1);
ASSERT((atp->tun_flags & TUN_UPPER_MASK) != TUN_U_V4 ||
(atp->tun_flags & TUN_UPPER_MASK) != TUN_U_V6);
break;
}
break;
case DL_NOTIFY_REQ: {
dl_notify_req_t *dlip;
if (MBLKL(mp) < DL_NOTIFY_REQ_SIZE) {
tun_senderrack(q, mp, prim, DL_BADPRIM, 0);
break;
}
dlip = (dl_notify_req_t *)mp->b_rptr;
atp->tun_notifications =
dlip->dl_notifications & DL_NOTE_SDU_SIZE;
dlip->dl_notifications &= DL_NOTE_SDU_SIZE;
dlip->dl_primitive = DL_NOTIFY_ACK;
mp->b_wptr = mp->b_rptr + DL_NOTIFY_ACK_SIZE;
qreply(q, mp);
tun_sendsdusize(q);
break;
}
default:
qwriter(q, mp, tun_wput_dlpi_other, PERIM_INNER);
break;
}
return (error);
}
/*
* set the tunnel parameters
* called as qwriter
*/
static void
tun_sparam(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
struct iocblk *iocp = (struct iocblk *)(mp->b_rptr);
struct iftun_req *ta;
mblk_t *mp1;
int uerr = 0;
uint_t lvers;
sin_t *sin;
sin6_t *sin6;
size_t size;
boolean_t new;
ipsec_stack_t *ipss = atp->tun_netstack->netstack_ipsec;
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
/* don't allow changes after dl_bind_req */
if (atp->tun_state == DL_IDLE) {
uerr = EAGAIN;
goto nak;
}
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
mp1 = mp1->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if ((size != sizeof (struct iftun_req)) &&
(size != sizeof (struct old_iftun_req))) {
uerr = EPROTO;
goto nak;
}
new = (size == sizeof (struct iftun_req));
if (atp->tun_iocmp) {
uerr = EBUSY;
goto nak;
}
lvers = atp->tun_flags & TUN_LOWER_MASK;
ta = (struct iftun_req *)mp1->b_rptr;
/*
* Check version number for parsing the security settings.
*/
if (ta->ifta_vers != IFTUN_VERSION) {
uerr = EINVAL;
goto nak;
}
/*
* Upper layer will give us a v4/v6 indicator, in case we don't know
* already.
*/
if ((atp->tun_flags & TUN_UPPER_MASK) == 0) {
if (ta->ifta_flags & 0x80000000) {
atp->tun_flags |= TUN_U_V6;
} else {
atp->tun_flags |= TUN_U_V4;
}
}
if (((atp->tun_flags & (TUN_AUTOMATIC | TUN_U_V4)) ==
(TUN_AUTOMATIC | TUN_U_V4)) ||
((atp->tun_flags & (TUN_6TO4 | TUN_U_V4)) ==
(TUN_6TO4 | TUN_U_V4))) {
uerr = EINVAL;
goto nak;
}
if (ta->ifta_flags & IFTUN_SRC) {
switch (ta->ifta_saddr.ss_family) {
case AF_INET:
sin = (sin_t *)&ta->ifta_saddr;
if (lvers != TUN_L_V4) {
uerr = EINVAL;
goto nak;
}
if ((sin->sin_addr.s_addr == INADDR_ANY) ||
(sin->sin_addr.s_addr == 0xffffffff) ||
CLASSD(sin->sin_addr.s_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
atp->tun_ipha.ipha_src = sin->sin_addr.s_addr;
IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
&atp->tun_laddr);
break;
case AF_INET6:
sin6 = (sin6_t *)&ta->ifta_saddr;
if (lvers != TUN_L_V6) {
uerr = EINVAL;
goto nak;
}
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
atp->tun_ip6h.ip6_src = atp->tun_laddr =
sin6->sin6_addr;
break;
default:
uerr = EAFNOSUPPORT;
goto nak;
}
/*
* If I reach here, then I didn't bail, the src address
* was good.
*/
atp->tun_flags |= TUN_SRC;
}
if (ta->ifta_flags & IFTUN_DST) {
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
uerr = EINVAL;
goto nak;
}
if (ta->ifta_saddr.ss_family == AF_INET) {
sin = (sin_t *)&ta->ifta_daddr;
if (lvers != TUN_L_V4) {
uerr = EINVAL;
goto nak;
}
if ((sin->sin_addr.s_addr == 0) ||
(sin->sin_addr.s_addr == 0xffffffff) ||
CLASSD(sin->sin_addr.s_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
atp->tun_ipha.ipha_dst = sin->sin_addr.s_addr;
/* Remove from previous hash bucket */
IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
&atp->tun_faddr);
} else if (ta->ifta_saddr.ss_family == AF_INET6) {
sin6 = (sin6_t *)&ta->ifta_daddr;
if (lvers != TUN_L_V6) {
uerr = EINVAL;
goto nak;
}
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
/* Remove from previous hash bucket */
atp->tun_ip6h.ip6_dst = atp->tun_faddr =
sin6->sin6_addr;
} else {
uerr = EAFNOSUPPORT;
goto nak;
}
/*
* If I reach here, then I didn't bail, the dst address
* was good.
*/
atp->tun_flags |= TUN_DST;
/* tun_faddr changed, move to proper hash bucket */
mutex_enter(&tuns->tuns_global_lock);
tun_rem_tun_byaddr_list(atp);
tun_add_byaddr(atp);
mutex_exit(&tuns->tuns_global_lock);
}
if (new && (ta->ifta_flags & IFTUN_HOPLIMIT)) {
/* Check bounds. */
if (ta->ifta_hop_limit < 1) {
uerr = EINVAL;
goto nak;
}
atp->tun_hop_limit = ta->ifta_hop_limit;
/* XXX do we really need this flag */
atp->tun_flags |= TUN_HOP_LIM;
if (lvers == TUN_L_V4) {
atp->tun_ipha.ipha_ttl = atp->tun_hop_limit;
} else {
atp->tun_ip6h.ip6_hops = atp->tun_hop_limit;
}
}
if (new && (ta->ifta_flags & IFTUN_ENCAP)) {
/* Bounds checking. */
if ((ta->ifta_encap_lim > IPV6_MAX_ENCAPLIMIT) ||
(lvers != TUN_L_V6)) {
uerr = EINVAL;
goto nak;
}
atp->tun_encap_lim = ta->ifta_encap_lim;
atp->tun_flags |= TUN_ENCAP_LIM;
if (ta->ifta_encap_lim >= 0) {
atp->tun_telopt.tel_telopt.ip6ot_encap_limit =
ta->ifta_encap_lim;
atp->tun_ip6h.ip6_nxt = IPPROTO_DSTOPTS;
} else {
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
atp->tun_ip6h.ip6_nxt = IPPROTO_ENCAP;
break;
case TUN_U_V6:
atp->tun_ip6h.ip6_nxt = IPPROTO_IPV6;
break;
default:
/* This shouldn't happen! */
ASSERT((atp->tun_flags & TUN_UPPER_MASK) != 0);
break;
}
}
}
/*
* If we passed in IFTUN_COMPLEX_SECURITY, do not do anything. This
* allows us to let dumb ifconfig(1m)-like apps reflect what they see
* without a penalty.
*/
if ((ta->ifta_flags & (IFTUN_SECURITY | IFTUN_COMPLEX_SECURITY)) ==
IFTUN_SECURITY) {
/* Can't set security properties for automatic tunnels. */
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
uerr = EINVAL;
goto nak;
}
/*
* The version number checked out, so just cast
* ifta_secinfo to an ipsr.
*/
if (ipsec_loaded(ipss)) {
uerr = tun_set_sec_simple(atp,
(ipsec_req_t *)&ta->ifta_secinfo);
} else {
if (ipsec_failed(ipss)) {
uerr = EPROTONOSUPPORT;
goto nak;
}
/* Otherwise, try again later and load IPsec. */
(void) putq(q, mp);
ipsec_loader_loadnow(ipss);
return;
}
if (uerr != 0)
goto nak;
}
mp->b_datap->db_type = M_IOCACK;
iocp->ioc_error = 0;
/*
* Send a T_BIND_REQ if and only if a tsrc/tdst change was requested
* _AND_ tsrc is turned on _AND_ the tunnel either has tdst turned on
* or is an automatic tunnel.
*/
if ((ta->ifta_flags & (IFTUN_SRC | IFTUN_DST)) != 0 &&
(atp->tun_flags & TUN_SRC) != 0 &&
(atp->tun_flags & (TUN_DST | TUN_AUTOMATIC | TUN_6TO4)) != 0) {
atp->tun_iocmp = mp;
uerr = tun_send_bind_req(q);
if (uerr == 0) {
/* qreply() done by T_BIND_ACK processing */
return;
} else {
atp->tun_iocmp = NULL;
goto nak;
}
}
qreply(q, mp);
return;
nak:
iocp->ioc_error = uerr;
mp->b_datap->db_type = M_IOCNAK;
qreply(q, mp);
}
static boolean_t
tun_thisvers_policy(tun_t *atp)
{
boolean_t rc;
ipsec_policy_head_t *iph;
int uvec = atp->tun_flags & TUN_UPPER_MASK;
if (atp->tun_itp == NULL)
return (B_FALSE);
iph = atp->tun_itp->itp_policy;
rw_enter(&iph->iph_lock, RW_READER);
rc = iph_ipvN(iph, (uvec & TUN_U_V6));
rw_exit(&iph->iph_lock);
return (rc);
}
/*
* Processes SIOCs to setup a tunnel and IOCs to configure tunnel module.
* M_IOCDATA->M_COPY->DATA or M_IOCTL->DATA
*/
static int
tun_ioctl(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
struct iocblk *iocp = (struct iocblk *)(mp->b_rptr);
struct iftun_req *ta;
mblk_t *mp1;
int reterr = 0;
int uerr = 0;
uint_t lvers;
sin_t *sin;
sin6_t *sin6;
size_t size;
boolean_t new;
ipaddr_t *rr_addr;
char buf[INET6_ADDRSTRLEN];
struct lifreq *lifr;
netstack_t *ns = atp->tun_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tun_stack_t *tuns = ns->netstack_tun;
lvers = atp->tun_flags & TUN_LOWER_MASK;
switch (iocp->ioc_cmd) {
case OSIOCSTUNPARAM:
case SIOCSTUNPARAM:
qwriter(q, mp, tun_sparam, PERIM_INNER);
return (0);
case OSIOCGTUNPARAM:
case SIOCGTUNPARAM:
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
mp1 = mp1->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if ((size != sizeof (struct iftun_req)) &&
(size != sizeof (struct old_iftun_req))) {
uerr = EPROTO;
goto nak;
}
new = (size == sizeof (struct iftun_req));
/*
* don't need to hold any locks. Can only be
* changed by qwriter
*/
ta = (struct iftun_req *)mp1->b_rptr;
ta->ifta_flags = 0;
/*
* Unlike tun_sparam(), the version number for security
* parameters is ignored, since we're filling it in!
*/
ta->ifta_vers = IFTUN_VERSION;
/* in case we are pushed under something unsupported */
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
ta->ifta_upper = IFTAP_IPV4;
break;
case TUN_U_V6:
ta->ifta_upper = IFTAP_IPV6;
break;
default:
ta->ifta_upper = 0;
break;
}
/*
* Copy in security information.
*
* If we revise IFTUN_VERSION, this will become revision-
* dependent.
*/
if (tun_policy_present(atp, ns, ipss) &&
tun_thisvers_policy(atp)) {
ipsec_req_t *ipsr;
ipsr = (ipsec_req_t *)ta->ifta_secinfo;
mutex_enter(&atp->tun_itp->itp_lock);
if (!(atp->tun_itp->itp_flags & ITPF_P_TUNNEL) &&
(atp->tun_policy_index >=
atp->tun_itp->itp_next_policy_index)) {
/*
* Convert 0.0.0.0/0, 0::0/0 tree entry to
* ipsec_req_t.
*/
*ipsr = atp->tun_secinfo;
/* Reality check for empty polhead. */
if (ipsr->ipsr_ah_req != 0 ||
ipsr->ipsr_esp_req != 0)
ta->ifta_flags |= IFTUN_SECURITY;
} else {
bzero(ipsr, sizeof (*ipsr));
ta->ifta_flags |=
(IFTUN_COMPLEX_SECURITY | IFTUN_SECURITY);
}
mutex_exit(&atp->tun_itp->itp_lock);
}
if (new && (iocp->ioc_cmd == SIOCGTUNPARAM)) {
/* Copy in hop limit. */
if (atp->tun_flags & TUN_HOP_LIM) {
ta->ifta_flags |= IFTUN_HOPLIMIT;
ta->ifta_hop_limit = atp->tun_hop_limit;
}
/* Copy in encapsulation limit. */
if (atp->tun_flags & TUN_ENCAP_LIM) {
ta->ifta_flags |= IFTUN_ENCAP;
ta->ifta_encap_lim = atp->tun_encap_lim;
}
}
/* lower must be IPv4 or IPv6, otherwise open fails */
if (lvers == TUN_L_V4) {
sin = (sin_t *)&ta->ifta_saddr;
ta->ifta_lower = IFTAP_IPV4;
bzero(sin, sizeof (sin_t));
sin->sin_family = AF_INET;
if (atp->tun_flags & TUN_SRC) {
IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr,
sin->sin_addr.s_addr);
ta->ifta_flags |= IFTUN_SRC;
} else {
sin->sin_addr.s_addr = 0;
}
sin = (sin_t *)&ta->ifta_daddr;
bzero(sin, sizeof (sin_t));
sin->sin_family = AF_INET;
if (atp->tun_flags & TUN_DST) {
IN6_V4MAPPED_TO_IPADDR(&atp->tun_faddr,
sin->sin_addr.s_addr);
ta->ifta_flags |= IFTUN_DST;
} else {
sin->sin_addr.s_addr = 0;
}
} else {
ASSERT(lvers == TUN_L_V6);
ta->ifta_lower = IFTAP_IPV6;
sin6 = (sin6_t *)&ta->ifta_saddr;
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
if (atp->tun_flags & TUN_SRC) {
sin6->sin6_addr = atp->tun_laddr;
ta->ifta_flags |= IFTUN_SRC;
} else {
V6_SET_ZERO(sin6->sin6_addr);
}
sin6 = (sin6_t *)&ta->ifta_daddr;
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
if (atp->tun_flags & TUN_DST) {
ta->ifta_flags |= IFTUN_DST;
sin6->sin6_addr = atp->tun_faddr;
} else {
V6_SET_ZERO(sin6->sin6_addr);
}
}
break;
case SIOCS6TO4TUNRRADDR: {
struct iocblk *iocp;
/* check to make sure this is not a TRANSPARENT ioctl */
iocp = (struct iocblk *)mp->b_rptr;
if (iocp->ioc_count == TRANSPARENT) {
uerr = EINVAL;
goto nak;
}
/* skip over iocblk to M_DATA */
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if (size != (sizeof (ipaddr_t))) {
uerr = EPROTO;
goto nak;
}
rr_addr = (ipaddr_t *)mp1->b_rptr;
/*
* Value read MUST equal either:
* 1) a valid unicast IPv4 Address
* 2) INADDR_ANY
*
* (1) enables 6to4 Relay Router communication support on
* this system and denotes the IPv4 destination address used
* for sending to 6to4 Relay Routers.
* (2) disables 6to4 Relay Router communication support on
* this system.
*
* Any other value results in a NAK.
*/
if ((*rr_addr == INADDR_ANY) || (!CLASSD(*rr_addr))) {
tun1dbg(("tun_ioctl: 6to4 Relay Router = %s\n",
inet_ntop(AF_INET, rr_addr, buf,
sizeof (buf))));
tuns->tuns_relay_rtr_addr_v4 = *rr_addr;
} else {
tun1dbg(("tun_ioctl: Invalid 6to4 Relay Router " \
"address (%s)\n",
inet_ntop(AF_INET, rr_addr, buf,
sizeof (buf))));
uerr = EINVAL;
goto nak;
}
break;
}
case SIOCG6TO4TUNRRADDR:
/* skip over iocblk to M_DATA */
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if (size != (sizeof (ipaddr_t))) {
uerr = EPROTO;
goto nak;
}
rr_addr = (ipaddr_t *)mp1->b_rptr;
*rr_addr = tuns->tuns_relay_rtr_addr_v4;
break;
case DL_IOC_HDR_INFO:
uerr = tun_fastpath(q, mp);
if (uerr != 0)
goto nak;
break;
case SIOCSLIFNAME:
/*
* Intercept SIOCSLIFNAME and attach the name to my
* tunnel_instance. For extra paranoia, if my name is not ""
* (as it would be at tun_t initialization), don't change
* anything.
*
* For now, this is the only way to tie tunnel names (as
* used in IPsec Tunnel Policy (ITP) instances) to actual
* tunnel instances. In practice, SIOCSLIFNAME is only
* used by ifconfig(1m) to change the ill name to something
* ifconfig can handle.
*/
mp1 = mp->b_cont;
if (mp1 != NULL) {
lifr = (struct lifreq *)mp1->b_rptr;
if (atp->tun_lifname[0] == '\0') {
(void) strncpy(atp->tun_lifname,
lifr->lifr_name, LIFNAMSIZ);
ASSERT(atp->tun_itp == NULL);
atp->tun_itp =
get_tunnel_policy(atp->tun_lifname,
ns);
/*
* It really doesn't matter if we return
* NULL or not. If we get the itp pointer,
* we're in good shape.
*/
} else {
tun0dbg(("SIOCSLIFNAME: new is %s, old is %s"
" - not changing\n",
lifr->lifr_name, atp->tun_lifname));
}
}
break;
default:
/*
* We are module that thinks it's a driver so nak anything we
* don't understand
*/
uerr = EINVAL;
goto nak;
}
mp->b_datap->db_type = M_IOCACK;
iocp->ioc_error = 0;
qreply(q, mp);
return (reterr);
nak:
iocp->ioc_error = uerr;
mp->b_datap->db_type = M_IOCNAK;
qreply(q, mp);
return (reterr);
}
/*
* mp contains the M_IOCTL DL_IOC_HDR_INFO message
* allocate mblk for fast path.
* XXX - fix IP so that db_base and rptr can be different
*/
static int
tun_fastpath(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *nmp;
int error;
dl_unitdata_req_t *dludp;
int hdrlen;
if (!tun_do_fastpath || atp->tun_state != DL_IDLE)
return (EINVAL);
error = miocpullup(mp, sizeof (dl_unitdata_req_t));
if (error != 0)
return (error);
dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
if (dludp->dl_primitive != DL_UNITDATA_REQ)
return (EINVAL);
switch (atp->tun_flags & TUN_LOWER_MASK) {
case TUN_L_V4:
nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset, BPRI_HI);
if (nmp == NULL) {
return (ENOMEM);
}
linkb(mp, nmp);
nmp->b_rptr += atp->tun_extra_offset;
nmp->b_wptr = nmp->b_rptr + sizeof (ipha_t);
*(ipha_t *)(nmp->b_rptr) = atp->tun_ipha;
nmp->b_rptr = nmp->b_datap->db_base;
break;
case TUN_L_V6:
hdrlen = sizeof (ip6_t);
if (atp->tun_encap_lim >= 0) {
hdrlen += IPV6_TUN_ENCAP_OPT_LEN;
}
nmp = allocb(hdrlen + atp->tun_extra_offset, BPRI_HI);
if (nmp == NULL) {
return (ENOMEM);
}
linkb(mp, nmp);
nmp->b_rptr += atp->tun_extra_offset;
nmp->b_wptr = nmp->b_rptr + hdrlen;
bcopy(&atp->tun_ip6h, nmp->b_rptr, hdrlen);
nmp->b_rptr = nmp->b_datap->db_base;
break;
default:
return (EPFNOSUPPORT);
}
atp->tun_flags |= TUN_FASTPATH;
return (0);
}
/*
* write side service procedure
*/
void
tun_wsrv(queue_t *q)
{
mblk_t *mp;
tun_t *atp = (tun_t *)q->q_ptr;
while (mp = getq(q)) {
/* out of memory or canputnext failed */
if (tun_wproc(q, mp) == ENOMEM) {
break;
}
/*
* If we called qwriter, then the only way we
* can tell if we ran out of memory is to check if
* any events have been scheduled
*/
if (atp->tun_events.ev_wtimoutid != 0 &&
atp->tun_events.ev_wbufcid != 0) {
break;
}
}
}
/* write side put procedure */
void
tun_wput(queue_t *q, mblk_t *mp)
{
/* note: q_first is 'protected' by perimeter */
if (q->q_first != NULL) {
(void) putq(q, mp);
} else {
(void) tun_wproc(q, mp);
}
}
/*
* called from write side put or service procedure to process
* messages
*/
static int
tun_wproc(queue_t *q, mblk_t *mp)
{
int error = 0;
switch (mp->b_datap->db_type) {
case M_DATA:
error = tun_wproc_mdata(q, mp);
break;
case M_PROTO:
case M_PCPROTO:
/* its a DLPI message */
error = tun_wput_dlpi(q, mp);
break;
case M_IOCDATA:
case M_IOCTL:
/* Data to be copied out arrives from ip as M_IOCDATA */
error = tun_ioctl(q, mp);
break;
/* we are a module pretending to be a driver.. turn around flush */
case M_FLUSH:
if (*mp->b_rptr & FLUSHW) {
flushq(q, FLUSHALL);
*mp->b_rptr &= ~FLUSHW;
}
if (*mp->b_rptr & FLUSHR)
flushq(RD(q), FLUSHALL);
qreply(q, mp);
break;
/*
* we are a module pretending to be a driver.. so just free message
* we don't understand
*/
default: {
char buf[TUN_WHO_BUF];
tun0dbg(("tun_wproc: %s got unknown mblk type %d\n",
tun_who(q, buf), mp->b_datap->db_type));
freemsg(mp);
break;
}
}
return (error);
}
/*
* handle fast path M_DATA message
*/
static int
tun_wproc_mdata(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
int error = 0;
ASSERT(atp->tun_flags & TUN_FASTPATH);
ASSERT((atp->tun_flags & TUN_L_V6) ?
(mp->b_wptr - mp->b_rptr >= atp->tun_extra_offset +
sizeof (ip6_t)) :
((atp->tun_flags & TUN_L_V4) ?
(mp->b_wptr - mp->b_rptr >= atp->tun_extra_offset +
sizeof (ipha_t)) : 1));
if (!canputnext(q)) {
atomic_add_32(&atp->tun_xmtretry, 1);
(void) putbq(q, mp);
return (ENOMEM); /* get service procedure to stop */
}
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
int iph_hdr_length;
/*
* get rid of fastpath header. let tun_wdata*
* fill in real thing
*/
iph_hdr_length = IPH_HDR_LENGTH((ipha_t *)(mp->b_rptr +
atp->tun_extra_offset));
if (mp->b_wptr - mp->b_rptr < iph_hdr_length +
atp->tun_extra_offset + sizeof (ip6_t)) {
if (!pullupmsg(mp, iph_hdr_length +
atp->tun_extra_offset + sizeof (ip6_t))) {
tun0dbg(("tun_wproc_mdata: message too " \
"short for IPv6 header\n"));
atomic_add_32(&atp->tun_InErrors, 1);
atomic_add_32(&atp->tun_InDiscard, 1);
freemsg(mp);
return (0);
}
}
mp->b_rptr += atp->tun_extra_offset + iph_hdr_length;
ASSERT((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6);
tun_wdata_v6(q, mp);
return (error);
}
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
error = tun_wputnext_v4(q, mp);
break;
case TUN_U_V6:
error = tun_wputnext_v6(q, mp);
break;
default:
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
error = EINVAL;
}
return (error);
}
/*
* Because a TUNSPARAM ioctl()'s requirement to only set IPsec policy for a
* given upper instance (IPv4-over-IP* or IPv6-over-IP*), have a special
* AF-specific flusher. This way, setting one upper instance doesn't sabotage
* the other. Don't bother with the hash-chained policy heads - they won't be
* filled in in TUNSPARAM cases.
*/
static void
flush_af(ipsec_policy_head_t *polhead, int ulp_vector, netstack_t *ns)
{
int dir;
int af = (ulp_vector == TUN_U_V4) ? IPSEC_AF_V4 : IPSEC_AF_V6;
ipsec_policy_t *ip, *nip;
ASSERT(RW_WRITE_HELD(&polhead->iph_lock));
for (dir = 0; dir < IPSEC_NTYPES; dir++) {
for (ip = polhead->iph_root[dir].ipr_nonhash[af]; ip != NULL;
ip = nip) {
nip = ip->ipsp_hash.hash_next;
IPPOL_UNCHAIN(polhead, ip, ns);
}
}
}
/*
* Set and insert the actual simple policies.
*/
static boolean_t
insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact,
int ulp_vector, netstack_t *ns)
{
ipsec_selkey_t selkey;
ipsec_policy_t *pol;
ipsec_policy_root_t *pr;
ipsec_policy_head_t *polhead = itp->itp_policy;
bzero(&selkey, sizeof (selkey));
if (ulp_vector & TUN_U_V4) {
selkey.ipsl_valid = IPSL_IPV4;
/* v4 inbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_INBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
/* v4 outbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
}
if (ulp_vector & TUN_U_V6) {
selkey.ipsl_valid = IPSL_IPV6;
/* v6 inbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_INBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
/* v6 outbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
}
return (B_TRUE);
}
/*
* For the old-fashioned tunnel-ioctl method of setting tunnel security
* properties. In the new world, set this to be a low-priority 0.0.0.0/0
* match.
*/
static int
tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr)
{
int rc = 0;
uint_t nact;
ipsec_act_t *actp = NULL;
boolean_t clear_all, old_policy = B_FALSE;
ipsec_tun_pol_t *itp;
tun_t *other_tun;
netstack_t *ns = atp->tun_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tun1dbg(
("tun_set_sec_simple: adjusting tunnel security the old way."));
#define REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
/* Can't specify self-encap on a tunnel!!! */
if ((ipsr->ipsr_self_encap_req && REQ_MASK) != 0)
return (EINVAL);
/*
* If it's a "clear-all" entry, unset the security flags and
* resume normal cleartext (or inherit-from-global) policy.
*/
clear_all = ((ipsr->ipsr_ah_req & REQ_MASK) == 0 &&
(ipsr->ipsr_esp_req & REQ_MASK) == 0);
#undef REQ_MASK
mutex_enter(&atp->tun_lock);
if (!tun_policy_present(atp, ns, ipss)) {
if (clear_all) {
bzero(&atp->tun_secinfo, sizeof (ipsec_req_t));
atp->tun_policy_index = 0;
goto bail; /* No need to allocate! */
}
ASSERT(atp->tun_lifname[0] != '\0');
atp->tun_itp = create_tunnel_policy(atp->tun_lifname,
&rc, &atp->tun_itp_gen, ns);
/* NOTE: "rc" set by create_tunnel_policy(). */
if (atp->tun_itp == NULL)
goto bail;
}
itp = atp->tun_itp;
/* Allocate the actvec now, before holding itp or polhead locks. */
ipsec_actvec_from_req(ipsr, &actp, &nact, ns);
if (actp == NULL) {
rc = ENOMEM;
goto bail;
}
/*
* Just write on the active polhead. Save the primary/secondary
* stuff for spdsock operations.
*
* Mutex because we need to write to the polhead AND flags atomically.
* Other threads will acquire the polhead lock as a reader if the
* (unprotected) flag is set.
*/
mutex_enter(&itp->itp_lock);
if (itp->itp_flags & ITPF_P_TUNNEL) {
/*
* Oops, we lost a race. Let's get out of here.
*/
rc = EBUSY;
goto mutex_bail;
}
old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0);
if (old_policy) {
/*
* We have to be more subtle here than we would
* in the spdosock code-paths, due to backward compatibility.
*/
ITPF_CLONE(itp->itp_flags);
rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns);
if (rc != 0) {
/* inactive has already been cleared. */
itp->itp_flags &= ~ITPF_IFLAGS;
goto mutex_bail;
}
rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
flush_af(itp->itp_policy, atp->tun_flags & TUN_UPPER_MASK, ns);
} else {
/* Else assume itp->itp_policy is already flushed. */
rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
}
if (clear_all) {
/* We've already cleared out the polhead. We are now done. */
if (avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0)
itp->itp_flags &= ~ITPF_PFLAGS;
rw_exit(&itp->itp_policy->iph_lock);
bzero(&atp->tun_secinfo, sizeof (ipsec_req_t));
old_policy = B_FALSE; /* Clear out the inactive one too. */
goto recover_bail;
}
if (insert_actual_policies(itp, actp, nact,
atp->tun_flags & TUN_UPPER_MASK, ns)) {
rw_exit(&itp->itp_policy->iph_lock);
/*
* Adjust MTU and make sure the DL side knows what's up.
*/
atp->tun_ipsec_overhead = ipsec_act_ovhd(actp);
itp->itp_flags = ITPF_P_ACTIVE;
/*
* <sigh> There has to be a better way, but for now, send an
* IRE_DB_REQ again. We will resynch from scratch, but have
* the tun_ipsec_overhead taken into account.
*/
if (atp->tun_flags & TUN_DST)
tun_send_ire_req(atp->tun_wq);
old_policy = B_FALSE; /* Blank out inactive - we succeeded */
/* Copy ipsec_req_t for subsequent SIOGTUNPARAM ops. */
atp->tun_secinfo = *ipsr;
} else {
rw_exit(&itp->itp_policy->iph_lock);
rc = ENOMEM;
}
recover_bail:
atp->tun_policy_index = itp->itp_next_policy_index;
/* Find the "other guy" (v4/v6) and update his tun_policy_index too. */
if (atp->tun_stats != NULL) {
if (atp->tun_stats->ts_atp == atp) {
other_tun = atp->tun_kstat_next;
ASSERT(other_tun == NULL ||
other_tun->tun_kstat_next == NULL);
} else {
other_tun = atp->tun_stats->ts_atp;
ASSERT(other_tun != NULL);
ASSERT(other_tun->tun_kstat_next == atp);
}
if (other_tun != NULL)
other_tun->tun_policy_index = atp->tun_policy_index;
}
if (old_policy) {
/* Recover policy in in active polhead. */
ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns);
ITPF_SWAP(itp->itp_flags);
atp->tun_extra_offset = TUN_LINK_EXTRA_OFF;
}
/* Clear policy in inactive polhead. */
itp->itp_flags &= ~ITPF_IFLAGS;
rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER);
ipsec_polhead_flush(itp->itp_inactive, ns);
rw_exit(&itp->itp_inactive->iph_lock);
mutex_bail:
mutex_exit(&itp->itp_lock);
bail:
if (actp != NULL)
ipsec_actvec_free(actp, nact);
mutex_exit(&atp->tun_lock);
return (rc);
}
/*
* Send an IRE_DB_REQ_TYPE to the lower module to obtain an IRE for the
* tunnel destination. If the tunnel has no destination, then request an
* IRE for the source instead.
*/
static void
tun_send_ire_req(queue_t *q)
{
tun_t *atp = q->q_ptr;
mblk_t *mp;
ire_t *ire;
uint_t lvers = (atp->tun_flags & TUN_LOWER_MASK);
char addrstr[INET6_ADDRSTRLEN];
if ((mp = tun_realloc_mblk(q, NULL, sizeof (ire_t), NULL, B_FALSE)) ==
NULL) {
tun0dbg(("tun_send_ire_req: couldn't allocate mblk\n"));
return;
}
mp->b_datap->db_type = IRE_DB_REQ_TYPE;
ire = (ire_t *)mp->b_rptr;
if (lvers == TUN_L_V4) {
ire->ire_ipversion = IPV4_VERSION;
/*
* For tunnels without destinations, we request the source
* ire so that we can account for IPsec policy in our MTU
* calculation.
*/
ire->ire_addr = (atp->tun_flags & TUN_DST) ?
atp->tun_ipha.ipha_dst : atp->tun_ipha.ipha_src;
} else {
ASSERT(lvers == TUN_L_V6 && (atp->tun_flags & TUN_DST));
ire->ire_ipversion = IPV6_VERSION;
ire->ire_addr_v6 = atp->tun_ip6h.ip6_dst;
}
tun1dbg(("tun_send_ire_req: requesting ire for %s",
(lvers == TUN_L_V4 ?
inet_ntop(AF_INET, &ire->ire_addr, addrstr, INET6_ADDRSTRLEN) :
inet_ntop(AF_INET6, &ire->ire_addr_v6, addrstr,
INET6_ADDRSTRLEN))));
atp->tun_ire_lastreq = lbolt;
putnext(WR(q), mp);
}
/*
* Given the pa