| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| /* |
| * iptun - IP Tunneling Driver |
| * |
| * This module is a GLDv3 driver that implements virtual datalinks over IP |
| * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl |
| * interface (see iptun_ctl.c), and registered with GLDv3 using |
| * mac_register(). It implements the logic for various forms of IP (IPv4 or |
| * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip |
| * module below it. Each virtual IP tunnel datalink has a conn_t associated |
| * with it representing the "outer" IP connection. |
| * |
| * The module implements the following locking semantics: |
| * |
| * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. |
| * See comments above iptun_hash_lock for details. |
| * |
| * No locks are ever held while calling up to GLDv3. The general architecture |
| * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a |
| * given link will be held while making downcalls (iptun_m_*() callbacks). |
| * Because we need to hold locks while handling downcalls, holding these locks |
| * while issuing upcalls results in deadlock scenarios. See the block comment |
| * above iptun_task_cb() for details on how we safely issue upcalls without |
| * holding any locks. |
| * |
| * The contents of each iptun_t is protected by an iptun_mutex which is held |
| * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in |
| * iptun_exit(). |
| * |
| * See comments in iptun_delete() and iptun_free() for details on how the |
| * iptun_t is deleted safely. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/kmem.h> |
| #include <sys/errno.h> |
| #include <sys/modhash.h> |
| #include <sys/list.h> |
| #include <sys/strsun.h> |
| #include <sys/file.h> |
| #include <sys/systm.h> |
| #include <sys/tihdr.h> |
| #include <sys/param.h> |
| #include <sys/mac_provider.h> |
| #include <sys/mac_ipv4.h> |
| #include <sys/mac_ipv6.h> |
| #include <sys/mac_6to4.h> |
| #include <sys/tsol/tnet.h> |
| #include <sys/sunldi.h> |
| #include <netinet/in.h> |
| #include <netinet/ip6.h> |
| #include <inet/ip.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ipsec_impl.h> |
| #include <inet/iptun.h> |
| #include "iptun_impl.h" |
| |
| /* Do the tunnel type and address family match? */ |
| #define IPTUN_ADDR_MATCH(iptun_type, family) \ |
| ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ |
| (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ |
| (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) |
| |
| #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) |
| |
| #define IPTUNQ_DEV "/dev/iptunq" |
| |
| #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ |
| #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU |
| #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) |
| #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ |
| sizeof (iptun_encaplim_t)) |
| |
| #define IPTUN_MIN_HOPLIMIT 1 |
| #define IPTUN_MAX_HOPLIMIT UINT8_MAX |
| |
| #define IPTUN_MIN_ENCAPLIMIT 0 |
| #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX |
| |
| #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) |
| |
| static iptun_encaplim_t iptun_encaplim_init = { |
| { IPPROTO_NONE, 0 }, |
| IP6OPT_TUNNEL_LIMIT, |
| 1, |
| IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ |
| IP6OPT_PADN, |
| 1, |
| 0 |
| }; |
| |
| /* Table containing per-iptun-type information. */ |
| static iptun_typeinfo_t iptun_type_table[] = { |
| { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, ip_output, |
| IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, |
| { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, ip_output_v6, |
| IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, |
| { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, ip_output, |
| IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, |
| { IPTUN_TYPE_UNKNOWN, NULL, 0, NULL, 0, 0, B_FALSE } |
| }; |
| |
| /* |
| * iptun_hash is an iptun_t lookup table by link ID protected by |
| * iptun_hash_lock. While the hash table's integrity is maintained via |
| * internal locking in the mod_hash_*() functions, we need additional locking |
| * so that an iptun_t cannot be deleted after a hash lookup has returned an |
| * iptun_t and before iptun_lock has been entered. As such, we use |
| * iptun_hash_lock when doing lookups and removals from iptun_hash. |
| */ |
| mod_hash_t *iptun_hash; |
| static kmutex_t iptun_hash_lock; |
| |
| static uint_t iptun_tunnelcount; /* total for all stacks */ |
| kmem_cache_t *iptun_cache; |
| ddi_taskq_t *iptun_taskq; |
| |
| typedef enum { |
| IPTUN_TASK_PMTU_UPDATE, /* obtain new destination path-MTU */ |
| IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ |
| IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ |
| IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ |
| IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ |
| IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ |
| } iptun_task_t; |
| |
| typedef struct iptun_task_data_s { |
| iptun_task_t itd_task; |
| datalink_id_t itd_linkid; |
| } iptun_task_data_t; |
| |
| static void iptun_task_dispatch(iptun_t *, iptun_task_t); |
| static int iptun_enter(iptun_t *); |
| static void iptun_exit(iptun_t *); |
| static void iptun_headergen(iptun_t *, boolean_t); |
| static void iptun_drop_pkt(mblk_t *, uint64_t *); |
| static void iptun_input(void *, mblk_t *, void *); |
| static void iptun_output(iptun_t *, mblk_t *); |
| static uint32_t iptun_get_maxmtu(iptun_t *, uint32_t); |
| static uint32_t iptun_update_mtu(iptun_t *, uint32_t); |
| static uint32_t iptun_get_dst_pmtu(iptun_t *); |
| static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); |
| |
| static mac_callbacks_t iptun_m_callbacks; |
| |
| static int |
| iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) |
| { |
| iptun_t *iptun = arg; |
| int err = 0; |
| |
| switch (stat) { |
| case MAC_STAT_IERRORS: |
| *val = iptun->iptun_ierrors; |
| break; |
| case MAC_STAT_OERRORS: |
| *val = iptun->iptun_oerrors; |
| break; |
| case MAC_STAT_RBYTES: |
| *val = iptun->iptun_rbytes; |
| break; |
| case MAC_STAT_IPACKETS: |
| *val = iptun->iptun_ipackets; |
| break; |
| case MAC_STAT_OBYTES: |
| *val = iptun->iptun_obytes; |
| break; |
| case MAC_STAT_OPACKETS: |
| *val = iptun->iptun_opackets; |
| break; |
| case MAC_STAT_NORCVBUF: |
| *val = iptun->iptun_norcvbuf; |
| break; |
| case MAC_STAT_NOXMTBUF: |
| *val = iptun->iptun_noxmtbuf; |
| break; |
| default: |
| err = ENOTSUP; |
| } |
| |
| return (err); |
| } |
| |
| static int |
| iptun_m_start(void *arg) |
| { |
| iptun_t *iptun = arg; |
| int err; |
| |
| if ((err = iptun_enter(iptun)) == 0) { |
| iptun->iptun_flags |= IPTUN_MAC_STARTED; |
| iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); |
| iptun_exit(iptun); |
| } |
| return (err); |
| } |
| |
| static void |
| iptun_m_stop(void *arg) |
| { |
| iptun_t *iptun = arg; |
| |
| if (iptun_enter(iptun) == 0) { |
| iptun->iptun_flags &= ~IPTUN_MAC_STARTED; |
| iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); |
| iptun_exit(iptun); |
| } |
| } |
| |
| /* |
| * iptun_m_setpromisc() does nothing and always succeeds. This is because a |
| * tunnel data-link only ever receives packets that are destined exclusively |
| * for the local address of the tunnel. |
| */ |
| /* ARGSUSED */ |
| static int |
| iptun_m_setpromisc(void *arg, boolean_t on) |
| { |
| return (0); |
| } |
| |
| /* ARGSUSED */ |
| static int |
| iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) |
| { |
| return (ENOTSUP); |
| } |
| |
| /* |
| * iptun_m_unicst() sets the local address. |
| */ |
| /* ARGSUSED */ |
| static int |
| iptun_m_unicst(void *arg, const uint8_t *addrp) |
| { |
| iptun_t *iptun = arg; |
| int err; |
| struct sockaddr_storage ss; |
| struct sockaddr_in *sin; |
| struct sockaddr_in6 *sin6; |
| |
| if ((err = iptun_enter(iptun)) == 0) { |
| switch (iptun->iptun_typeinfo->iti_ipvers) { |
| case IPV4_VERSION: |
| sin = (struct sockaddr_in *)&ss; |
| sin->sin_family = AF_INET; |
| bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); |
| break; |
| case IPV6_VERSION: |
| sin6 = (struct sockaddr_in6 *)&ss; |
| sin6->sin6_family = AF_INET6; |
| bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); |
| break; |
| default: |
| ASSERT(0); |
| } |
| err = iptun_setladdr(iptun, &ss); |
| iptun_exit(iptun); |
| } |
| return (err); |
| } |
| |
| static mblk_t * |
| iptun_m_tx(void *arg, mblk_t *mpchain) |
| { |
| mblk_t *mp, *nmp; |
| iptun_t *iptun = arg; |
| |
| if (!IS_IPTUN_RUNNING(iptun)) { |
| iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); |
| return (NULL); |
| } |
| |
| /* |
| * Request the destination's path MTU information regularly in case |
| * path MTU has increased. |
| */ |
| if (IPTUN_PMTU_TOO_OLD(iptun)) |
| iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); |
| |
| for (mp = mpchain; mp != NULL; mp = nmp) { |
| nmp = mp->b_next; |
| mp->b_next = NULL; |
| iptun_output(iptun, mp); |
| } |
| |
| return (NULL); |
| } |
| |
| /* ARGSUSED */ |
| static int |
| iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, |
| uint_t pr_valsize, const void *pr_val) |
| { |
| iptun_t *iptun = barg; |
| uint32_t value = *(uint32_t *)pr_val; |
| int err; |
| |
| /* |
| * We need to enter this iptun_t since we'll be modifying the outer |
| * header. |
| */ |
| if ((err = iptun_enter(iptun)) != 0) |
| return (err); |
| |
| switch (pr_num) { |
| case MAC_PROP_IPTUN_HOPLIMIT: |
| if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { |
| err = EINVAL; |
| break; |
| } |
| if (value != iptun->iptun_hoplimit) { |
| iptun->iptun_hoplimit = (uint8_t)value; |
| iptun_headergen(iptun, B_TRUE); |
| } |
| break; |
| case MAC_PROP_IPTUN_ENCAPLIMIT: |
| if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || |
| value > IPTUN_MAX_ENCAPLIMIT) { |
| err = EINVAL; |
| break; |
| } |
| if (value != iptun->iptun_encaplimit) { |
| iptun->iptun_encaplimit = (uint8_t)value; |
| iptun_headergen(iptun, B_TRUE); |
| } |
| break; |
| case MAC_PROP_MTU: { |
| uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); |
| |
| if (value < iptun->iptun_typeinfo->iti_minmtu || |
| value > maxmtu) { |
| err = EINVAL; |
| break; |
| } |
| iptun->iptun_flags |= IPTUN_FIXED_MTU; |
| if (value != iptun->iptun_mtu) { |
| iptun->iptun_mtu = value; |
| iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); |
| } |
| break; |
| } |
| default: |
| err = EINVAL; |
| } |
| iptun_exit(iptun); |
| return (err); |
| } |
| |
| /* ARGSUSED */ |
| static int |
| iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, |
| uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) |
| { |
| iptun_t *iptun = barg; |
| mac_propval_range_t range; |
| boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); |
| boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); |
| int err; |
| |
| if ((err = iptun_enter(iptun)) != 0) |
| return (err); |
| |
| if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { |
| err = ENOTSUP; |
| goto done; |
| } |
| if (is_default && is_possible) { |
| err = EINVAL; |
| goto done; |
| } |
| |
| *perm = MAC_PROP_PERM_RW; |
| |
| if (is_possible) { |
| if (pr_valsize < sizeof (mac_propval_range_t)) { |
| err = EINVAL; |
| goto done; |
| } |
| range.mpr_count = 1; |
| range.mpr_type = MAC_PROPVAL_UINT32; |
| } else if (pr_valsize < sizeof (uint32_t)) { |
| err = EINVAL; |
| goto done; |
| } |
| |
| switch (pr_num) { |
| case MAC_PROP_IPTUN_HOPLIMIT: |
| if (is_possible) { |
| range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; |
| range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; |
| } else if (is_default) { |
| *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; |
| } else { |
| *(uint32_t *)pr_val = iptun->iptun_hoplimit; |
| } |
| break; |
| case MAC_PROP_IPTUN_ENCAPLIMIT: |
| if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { |
| err = ENOTSUP; |
| goto done; |
| } |
| if (is_possible) { |
| range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; |
| range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; |
| } else if (is_default) { |
| *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; |
| } else { |
| *(uint32_t *)pr_val = iptun->iptun_encaplimit; |
| } |
| break; |
| case MAC_PROP_MTU: { |
| uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); |
| |
| if (is_possible) { |
| range.range_uint32[0].mpur_min = |
| iptun->iptun_typeinfo->iti_minmtu; |
| range.range_uint32[0].mpur_max = maxmtu; |
| } else { |
| /* |
| * The MAC module knows the current value and should |
| * never call us for it. There is also no default |
| * MTU, as by default, it is a dynamic property. |
| */ |
| err = ENOTSUP; |
| goto done; |
| } |
| break; |
| } |
| default: |
| err = EINVAL; |
| goto done; |
| } |
| if (is_possible) |
| bcopy(&range, pr_val, sizeof (range)); |
| done: |
| iptun_exit(iptun); |
| return (err); |
| } |
| |
| uint_t |
| iptun_count(void) |
| { |
| return (iptun_tunnelcount); |
| } |
| |
| /* |
| * Enter an iptun_t exclusively. This is essentially just a mutex, but we |
| * don't allow iptun_enter() to succeed on a tunnel if it's in the process of |
| * being deleted. |
| */ |
| static int |
| iptun_enter(iptun_t *iptun) |
| { |
| mutex_enter(&iptun->iptun_lock); |
| while (iptun->iptun_flags & IPTUN_DELETE_PENDING) |
| cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); |
| if (iptun->iptun_flags & IPTUN_CONDEMNED) { |
| mutex_exit(&iptun->iptun_lock); |
| return (ENOENT); |
| } |
| return (0); |
| } |
| |
| /* |
| * Exit the tunnel entered in iptun_enter(). |
| */ |
| static void |
| iptun_exit(iptun_t *iptun) |
| { |
| mutex_exit(&iptun->iptun_lock); |
| } |
| |
| /* |
| * Enter the IP tunnel instance by datalink ID. |
| */ |
| static int |
| iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) |
| { |
| int err; |
| |
| mutex_enter(&iptun_hash_lock); |
| if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), |
| (mod_hash_val_t *)iptun) == 0) |
| err = iptun_enter(*iptun); |
| else |
| err = ENOENT; |
| if (err != 0) |
| *iptun = NULL; |
| mutex_exit(&iptun_hash_lock); |
| return (err); |
| } |
| |
| /* |
| * Handle tasks that were deferred through the iptun_taskq. These fall into |
| * two categories: |
| * |
| * 1. Tasks that were defered because we didn't want to spend time doing them |
| * while in the data path. Only IPTUN_TASK_PMTU_UPDATE falls into this |
| * category. |
| * |
| * 2. Tasks that were defered because they require calling up to the mac |
| * module, and we can't call up to the mac module while holding locks. |
| * |
| * Handling 1 is easy; we just lookup the iptun_t, perform the task, exit the |
| * tunnel, and we're done. |
| * |
| * Handling 2 is tricky to get right without introducing race conditions and |
| * deadlocks with the mac module, as we cannot issue an upcall while in the |
| * iptun_t. The reason is that upcalls may try and enter the mac perimeter, |
| * while iptun callbacks (such as iptun_m_setprop()) called from the mac |
| * module will already have the perimeter held, and will then try and enter |
| * the iptun_t. You can see the lock ordering problem with this; this will |
| * deadlock. |
| * |
| * The safe way to do this is to enter the iptun_t in question and copy the |
| * information we need out of it so that we can exit it and know that the |
| * information being passed up to the upcalls won't be subject to modification |
| * by other threads. The problem now is that we need to exit it prior to |
| * issuing the upcall, but once we do this, a thread could come along and |
| * delete the iptun_t and thus the mac handle required to issue the upcall. |
| * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the |
| * iptun_t. This flag is the condition associated with iptun_upcall_cv, which |
| * iptun_delete() will cv_wait() on. When the upcall completes, we clear |
| * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting |
| * iptun_delete(). We can thus still safely use iptun->iptun_mh after having |
| * exited the iptun_t. |
| */ |
| static void |
| iptun_task_cb(void *arg) |
| { |
| iptun_task_data_t *itd = arg; |
| iptun_task_t task = itd->itd_task; |
| datalink_id_t linkid = itd->itd_linkid; |
| iptun_t *iptun; |
| uint32_t mtu; |
| iptun_addr_t addr; |
| link_state_t linkstate; |
| size_t header_size; |
| iptun_header_t header; |
| |
| kmem_free(itd, sizeof (*itd)); |
| |
| /* |
| * Note that if the lookup fails, it's because the tunnel was deleted |
| * between the time the task was dispatched and now. That isn't an |
| * error. |
| */ |
| if (iptun_enter_by_linkid(linkid, &iptun) != 0) |
| return; |
| |
| if (task == IPTUN_TASK_PMTU_UPDATE) { |
| (void) iptun_update_mtu(iptun, 0); |
| iptun_exit(iptun); |
| return; |
| } |
| |
| iptun->iptun_flags |= IPTUN_UPCALL_PENDING; |
| |
| switch (task) { |
| case IPTUN_TASK_MTU_UPDATE: |
| mtu = iptun->iptun_mtu; |
| break; |
| case IPTUN_TASK_LADDR_UPDATE: |
| addr = iptun->iptun_laddr; |
| break; |
| case IPTUN_TASK_RADDR_UPDATE: |
| addr = iptun->iptun_raddr; |
| break; |
| case IPTUN_TASK_LINK_UPDATE: |
| linkstate = IS_IPTUN_RUNNING(iptun) ? |
| LINK_STATE_UP : LINK_STATE_DOWN; |
| break; |
| case IPTUN_TASK_PDATA_UPDATE: |
| header_size = iptun->iptun_header_size; |
| header = iptun->iptun_header; |
| break; |
| default: |
| ASSERT(0); |
| } |
| |
| iptun_exit(iptun); |
| |
| switch (task) { |
| case IPTUN_TASK_MTU_UPDATE: |
| (void) mac_maxsdu_update(iptun->iptun_mh, mtu); |
| break; |
| case IPTUN_TASK_LADDR_UPDATE: |
| mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); |
| break; |
| case IPTUN_TASK_RADDR_UPDATE: |
| mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); |
| break; |
| case IPTUN_TASK_LINK_UPDATE: |
| mac_link_update(iptun->iptun_mh, linkstate); |
| break; |
| case IPTUN_TASK_PDATA_UPDATE: |
| if (mac_pdata_update(iptun->iptun_mh, |
| header_size == 0 ? NULL : &header, header_size) != 0) |
| atomic_inc_64(&iptun->iptun_taskq_fail); |
| break; |
| } |
| |
| mutex_enter(&iptun->iptun_lock); |
| iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; |
| cv_signal(&iptun->iptun_upcall_cv); |
| mutex_exit(&iptun->iptun_lock); |
| } |
| |
| static void |
| iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) |
| { |
| iptun_task_data_t *itd; |
| |
| itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); |
| if (itd == NULL) { |
| atomic_inc_64(&iptun->iptun_taskq_fail); |
| return; |
| } |
| itd->itd_task = iptun_task; |
| itd->itd_linkid = iptun->iptun_linkid; |
| if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { |
| atomic_inc_64(&iptun->iptun_taskq_fail); |
| kmem_free(itd, sizeof (*itd)); |
| } |
| } |
| |
| /* |
| * Convert an iptun_addr_t to sockaddr_storage. |
| */ |
| static void |
| iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) |
| { |
| struct sockaddr_in *sin; |
| struct sockaddr_in6 *sin6; |
| |
| bzero(ss, sizeof (*ss)); |
| switch (iptun_addr->ia_family) { |
| case AF_INET: |
| sin = (struct sockaddr_in *)ss; |
| sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; |
| break; |
| case AF_INET6: |
| sin6 = (struct sockaddr_in6 *)ss; |
| sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; |
| break; |
| default: |
| ASSERT(0); |
| } |
| ss->ss_family = iptun_addr->ia_family; |
| } |
| |
| /* |
| * General purpose function to set an IP tunnel source or destination address. |
| */ |
| static int |
| iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, |
| const struct sockaddr_storage *ss) |
| { |
| if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) |
| return (EINVAL); |
| |
| switch (ss->ss_family) { |
| case AF_INET: { |
| struct sockaddr_in *sin = (struct sockaddr_in *)ss; |
| |
| if ((sin->sin_addr.s_addr == INADDR_ANY) || |
| (sin->sin_addr.s_addr == INADDR_BROADCAST) || |
| CLASSD(sin->sin_addr.s_addr)) { |
| return (EADDRNOTAVAIL); |
| } |
| iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; |
| break; |
| } |
| case AF_INET6: { |
| struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; |
| |
| if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || |
| IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || |
| IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { |
| return (EADDRNOTAVAIL); |
| } |
| iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; |
| break; |
| } |
| default: |
| return (EAFNOSUPPORT); |
| } |
| iptun_addr->ia_family = ss->ss_family; |
| return (0); |
| } |
| |
| static int |
| iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) |
| { |
| return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, |
| &iptun->iptun_laddr, laddr)); |
| } |
| |
| static int |
| iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) |
| { |
| if (!(iptun->iptun_typeinfo->iti_hasraddr)) |
| return (EINVAL); |
| return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, |
| &iptun->iptun_raddr, raddr)); |
| } |
| |
| static boolean_t |
| iptun_canbind(iptun_t *iptun) |
| { |
| /* |
| * A tunnel may bind when its source address has been set, and if its |
| * tunnel type requires one, also its destination address. |
| */ |
| return ((iptun->iptun_flags & IPTUN_LADDR) && |
| ((iptun->iptun_flags & IPTUN_RADDR) || |
| !(iptun->iptun_typeinfo->iti_hasraddr))); |
| } |
| |
| static int |
| iptun_bind(iptun_t *iptun) |
| { |
| conn_t *connp = iptun->iptun_connp; |
| int err; |
| |
| ASSERT(iptun_canbind(iptun)); |
| |
| switch (iptun->iptun_typeinfo->iti_type) { |
| case IPTUN_TYPE_IPV4: |
| /* |
| * When we set a tunnel's destination address, we do not care |
| * if the destination is reachable. Transient routing issues |
| * should not inhibit the creation of a tunnel interface, for |
| * example. For that reason, we pass in B_FALSE for the |
| * verify_dst argument of ip_proto_bind_connected_v4() (and |
| * similarly for IPv6 tunnels below). |
| */ |
| err = ip_proto_bind_connected_v4(connp, NULL, IPPROTO_ENCAP, |
| &iptun->iptun_laddr4, 0, iptun->iptun_raddr4, 0, B_TRUE, |
| B_FALSE, iptun->iptun_cred); |
| break; |
| case IPTUN_TYPE_IPV6: |
| err = ip_proto_bind_connected_v6(connp, NULL, IPPROTO_IPV6, |
| &iptun->iptun_laddr6, 0, &iptun->iptun_raddr6, NULL, 0, |
| B_TRUE, B_FALSE, iptun->iptun_cred); |
| break; |
| case IPTUN_TYPE_6TO4: |
| err = ip_proto_bind_laddr_v4(connp, NULL, IPPROTO_IPV6, |
| iptun->iptun_laddr4, 0, B_TRUE); |
| break; |
| } |
| |
| if (err == 0) { |
| iptun->iptun_flags |= IPTUN_BOUND; |
| |
| /* |
| * Now that we're bound with ip below us, this is a good time |
| * to initialize the destination path MTU and to re-calculate |
| * the tunnel's link MTU. |
| */ |
| (void) iptun_update_mtu(iptun, 0); |
| |
| if (IS_IPTUN_RUNNING(iptun)) |
| iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); |
| } |
| return (err); |
| } |
| |
| static void |
| iptun_unbind(iptun_t *iptun) |
| { |
| ASSERT(iptun->iptun_flags & IPTUN_BOUND); |
| ASSERT(mutex_owned(&iptun->iptun_lock) || |
| (iptun->iptun_flags & IPTUN_CONDEMNED)); |
| ip_unbind(iptun->iptun_connp); |
| iptun->iptun_flags &= ~IPTUN_BOUND; |
| if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) |
| iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); |
| } |
| |
| /* |
| * Re-generate the template data-link header for a given IP tunnel given the |
| * tunnel's current parameters. |
| */ |
| static void |
| iptun_headergen(iptun_t *iptun, boolean_t update_mac) |
| { |
| switch (iptun->iptun_typeinfo->iti_ipvers) { |
| case IPV4_VERSION: |
| /* |
| * We only need to use a custom IP header if the administrator |
| * has supplied a non-default hoplimit. |
| */ |
| if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { |
| iptun->iptun_header_size = 0; |
| break; |
| } |
| iptun->iptun_header_size = sizeof (ipha_t); |
| iptun->iptun_header4.ipha_version_and_hdr_length = |
| IP_SIMPLE_HDR_VERSION; |
| iptun->iptun_header4.ipha_fragment_offset_and_flags = |
| htons(IPH_DF); |
| iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; |
| break; |
| case IPV6_VERSION: { |
| ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; |
| |
| /* |
| * We only need to use a custom IPv6 header if either the |
| * administrator has supplied a non-default hoplimit, or we |
| * need to include an encapsulation limit option in the outer |
| * header. |
| */ |
| if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && |
| iptun->iptun_encaplimit == 0) { |
| iptun->iptun_header_size = 0; |
| break; |
| } |
| |
| (void) memset(ip6hp, 0, sizeof (*ip6hp)); |
| if (iptun->iptun_encaplimit == 0) { |
| iptun->iptun_header_size = sizeof (ip6_t); |
| ip6hp->ip6_nxt = IPPROTO_NONE; |
| } else { |
| iptun_encaplim_t *iel; |
| |
| iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); |
| /* |
| * The mac_ipv6 plugin requires ip6_plen to be in host |
| * byte order and reflect the extension headers |
| * present in the template. The actual network byte |
| * order ip6_plen will be set on a per-packet basis on |
| * transmit. |
| */ |
| ip6hp->ip6_plen = sizeof (*iel); |
| ip6hp->ip6_nxt = IPPROTO_DSTOPTS; |
| iel = &iptun->iptun_header6.it6h_encaplim; |
| *iel = iptun_encaplim_init; |
| iel->iel_telopt.ip6ot_encap_limit = |
| iptun->iptun_encaplimit; |
| } |
| |
| ip6hp->ip6_hlim = iptun->iptun_hoplimit; |
| break; |
| } |
| } |
| |
| if (update_mac) |
| iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); |
| } |
| |
| /* |
| * Insert inbound and outbound IPv4 and IPv6 policy into the given policy |
| * head. |
| */ |
| static boolean_t |
| iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, |
| uint_t n, netstack_t *ns) |
| { |
| int f = IPSEC_AF_V4; |
| |
| if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || |
| !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) |
| return (B_FALSE); |
| |
| f = IPSEC_AF_V6; |
| return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && |
| ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); |
| } |
| |
| /* |
| * Used to set IPsec policy when policy is set through the IPTUN_CREATE or |
| * IPTUN_MODIFY ioctls. |
| */ |
| static int |
| iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) |
| { |
| int rc = 0; |
| uint_t nact; |
| ipsec_act_t *actp = NULL; |
| boolean_t clear_all, old_policy = B_FALSE; |
| ipsec_tun_pol_t *itp; |
| char name[MAXLINKNAMELEN]; |
| uint64_t gen; |
| netstack_t *ns = iptun->iptun_ns; |
| |
| /* Can't specify self-encap on a tunnel. */ |
| if (ipsr->ipsr_self_encap_req != 0) |
| return (EINVAL); |
| |
| /* |
| * If it's a "clear-all" entry, unset the security flags and resume |
| * normal cleartext (or inherit-from-global) policy. |
| */ |
| clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && |
| (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); |
| |
| ASSERT(mutex_owned(&iptun->iptun_lock)); |
| itp = iptun->iptun_itp; |
| if (itp == NULL) { |
| if (clear_all) |
| goto bail; |
| if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, |
| NULL, NULL)) != 0) |
| goto bail; |
| ASSERT(name[0] != '\0'); |
| if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) |
| goto bail; |
| iptun->iptun_itp = itp; |
| } |
| |
| /* Allocate the actvec now, before holding itp or polhead locks. */ |
| ipsec_actvec_from_req(ipsr, &actp, &nact, ns); |
| if (actp == NULL) { |
| rc = ENOMEM; |
| goto bail; |
| } |
| |
| /* |
| * Just write on the active polhead. Save the primary/secondary stuff |
| * for spdsock operations. |
| * |
| * Mutex because we need to write to the polhead AND flags atomically. |
| * Other threads will acquire the polhead lock as a reader if the |
| * (unprotected) flag is set. |
| */ |
| mutex_enter(&itp->itp_lock); |
| if (itp->itp_flags & ITPF_P_TUNNEL) { |
| /* Oops, we lost a race. Let's get out of here. */ |
| rc = EBUSY; |
| goto mutex_bail; |
| } |
| old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); |
| |
| if (old_policy) { |
| ITPF_CLONE(itp->itp_flags); |
| rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); |
| if (rc != 0) { |
| /* inactive has already been cleared. */ |
| itp->itp_flags &= ~ITPF_IFLAGS; |
| goto mutex_bail; |
| } |
| rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); |
| ipsec_polhead_flush(itp->itp_policy, ns); |
| } else { |
| /* Else assume itp->itp_policy is already flushed. */ |
| rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); |
| } |
| |
| if (clear_all) { |
| ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); |
| itp->itp_flags &= ~ITPF_PFLAGS; |
| rw_exit(&itp->itp_policy->iph_lock); |
| old_policy = B_FALSE; /* Clear out the inactive one too. */ |
| goto recover_bail; |
| } |
| |
| if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { |
| rw_exit(&itp->itp_policy->iph_lock); |
| /* |
| * Adjust MTU and make sure the DL side knows what's up. |
| */ |
| itp->itp_flags = ITPF_P_ACTIVE; |
| (void) iptun_update_mtu(iptun, 0); |
| old_policy = B_FALSE; /* Blank out inactive - we succeeded */ |
| } else { |
| rw_exit(&itp->itp_policy->iph_lock); |
| rc = ENOMEM; |
| } |
| |
| recover_bail: |
| if (old_policy) { |
| /* Recover policy in in active polhead. */ |
| ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); |
| ITPF_SWAP(itp->itp_flags); |
| } |
| |
| /* Clear policy in inactive polhead. */ |
| itp->itp_flags &= ~ITPF_IFLAGS; |
| rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); |
| ipsec_polhead_flush(itp->itp_inactive, ns); |
| rw_exit(&itp->itp_inactive->iph_lock); |
| |
| mutex_bail: |
| mutex_exit(&itp->itp_lock); |
| |
| bail: |
| if (actp != NULL) |
| ipsec_actvec_free(actp, nact); |
| |
| return (rc); |
| } |
| |
| static iptun_typeinfo_t * |
| iptun_gettypeinfo(iptun_type_t type) |
| { |
| int i; |
| |
| for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { |
| if (iptun_type_table[i].iti_type == type) |
| break; |
| } |
| return (&iptun_type_table[i]); |
| } |
| |
| /* |
| * Set the parameters included in ik on the tunnel iptun. Parameters that can |
| * only be set at creation time are set in iptun_create(). |
| */ |
| static int |
| iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) |
| { |
| int err = 0; |
| netstack_t *ns = iptun->iptun_ns; |
| iptun_addr_t orig_laddr, orig_raddr; |
| uint_t orig_flags = iptun->iptun_flags; |
| |
| if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { |
| if (orig_flags & IPTUN_LADDR) |
| orig_laddr = iptun->iptun_laddr; |
| if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) |
| return (err); |
| iptun->iptun_flags |= IPTUN_LADDR; |
| } |
| |
| if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { |
| if (orig_flags & IPTUN_RADDR) |
| orig_raddr = iptun->iptun_raddr; |
| if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) |
| goto done; |
| iptun->iptun_flags |= IPTUN_RADDR; |
| } |
| |
| if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { |
| /* |
| * Set IPsec policy originating from the ifconfig(1M) command |
| * line. This is traditionally called "simple" policy because |
| * the ipsec_req_t (iptun_kparam_secinfo) can only describe a |
| * simple policy of "do ESP on everything" and/or "do AH on |
| * everything" (as opposed to the rich policy that can be |
| * defined with ipsecconf(1M)). |
| */ |
| if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { |
| /* |
| * Can't set security properties for automatic |
| * tunnels. |
| */ |
| err = EINVAL; |
| goto done; |
| } |
| |
| if (!ipsec_loaded(ns->netstack_ipsec)) { |
| /* If IPsec can be loaded, try and load it now. */ |
| if (ipsec_failed(ns->netstack_ipsec)) { |
| err = EPROTONOSUPPORT; |
| goto done; |
| } |
| ipsec_loader_loadnow(ns->netstack_ipsec); |
| /* |
| * ipsec_loader_loadnow() returns while IPsec is |
| * loaded asynchronously. While a method exists to |
| * wait for IPsec to load (ipsec_loader_wait()), it |
| * requires use of a STREAMS queue to do a qwait(). |
| * We're not in STREAMS context here, and so we can't |
| * use it. This is not a problem in practice because |
| * in the vast majority of cases, key management and |
| * global policy will have loaded before any tunnels |
| * are plumbed, and so IPsec will already have been |
| * loaded. |
| */ |
| err = EAGAIN; |
| goto done; |
| } |
| |
| err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); |
| if (err == 0) { |
| iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; |
| iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; |
| } |
| } |
| done: |
| if (err != 0) { |
| /* Restore original source and destination. */ |
| if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && |
| (orig_flags & IPTUN_LADDR)) |
| iptun->iptun_laddr = orig_laddr; |
| if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && |
| (orig_flags & IPTUN_RADDR)) |
| iptun->iptun_raddr = orig_raddr; |
| iptun->iptun_flags = orig_flags; |
| } |
| return (err); |
| } |
| |
| static int |
| iptun_register(iptun_t *iptun) |
| { |
| mac_register_t *mac; |
| int err; |
| |
| ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); |
| |
| if ((mac = mac_alloc(MAC_VERSION)) == NULL) |
| return (EINVAL); |
| |
| mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; |
| mac->m_driver = iptun; |
| mac->m_dip = iptun_dip; |
| mac->m_instance = (uint_t)-1; |
| mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; |
| mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? |
| (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; |
| mac->m_callbacks = &iptun_m_callbacks; |
| mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; |
| mac->m_max_sdu = iptun->iptun_mtu; |
| if (iptun->iptun_header_size != 0) { |
| mac->m_pdata = &iptun->iptun_header; |
| mac->m_pdata_size = iptun->iptun_header_size; |
| } |
| if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) |
| iptun->iptun_flags |= IPTUN_MAC_REGISTERED; |
| mac_free(mac); |
| return (err); |
| } |
| |
| static int |
| iptun_unregister(iptun_t *iptun) |
| { |
| int err; |
| |
| ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); |
| if ((err = mac_unregister(iptun->iptun_mh)) == 0) |
| iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; |
| return (err); |
| } |
| |
| static conn_t * |
| iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) |
| { |
| conn_t *connp; |
| |
| if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) |
| return (NULL); |
| |
| connp->conn_flags |= IPCL_IPTUN; |
| connp->conn_iptun = iptun; |
| connp->conn_recv = iptun_input; |
| connp->conn_rq = ns->netstack_iptun->iptuns_g_q; |
| connp->conn_wq = WR(connp->conn_rq); |
| /* |
| * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done |
| * for all other conn_t's. |
| * |
| * Note that there's an important distinction between iptun_zoneid and |
| * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global |
| * exclusive stack zones to make the ip module believe that the |
| * non-global zone is actually a global zone. Therefore, when |
| * interacting with the ip module, we must always use conn_zoneid. |
| */ |
| connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? |
| crgetzoneid(credp) : GLOBAL_ZONEID; |
| connp->conn_cred = credp; |
| /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ |
| crhold(connp->conn_cred); |
| |
| connp->conn_send = iptun->iptun_typeinfo->iti_txfunc; |
| connp->conn_af_isv6 = iptun->iptun_typeinfo->iti_ipvers == IPV6_VERSION; |
| ASSERT(connp->conn_ref == 1); |
| |
| mutex_enter(&connp->conn_lock); |
| connp->conn_state_flags &= ~CONN_INCIPIENT; |
| mutex_exit(&connp->conn_lock); |
| return (connp); |
| } |
| |
| static void |
| iptun_conn_destroy(conn_t *connp) |
| { |
| ip_quiesce_conn(connp); |
| connp->conn_iptun = NULL; |
| ASSERT(connp->conn_ref == 1); |
| CONN_DEC_REF(connp); |
| } |
| |
| static int |
| iptun_create_g_q(iptun_stack_t *iptuns, cred_t *credp) |
| { |
| int err; |
| conn_t *connp; |
| |
| ASSERT(iptuns->iptuns_g_q == NULL); |
| /* |
| * The global queue for this stack is set when iptunq_open() calls |
| * iptun_set_g_q(). |
| */ |
| err = ldi_open_by_name(IPTUNQ_DEV, FWRITE|FREAD, credp, |
| &iptuns->iptuns_g_q_lh, iptun_ldi_ident); |
| if (err == 0) { |
| connp = iptuns->iptuns_g_q->q_ptr; |
| connp->conn_recv = iptun_input; |
| } |
| return (err); |
| } |
| |
| static iptun_t * |
| iptun_alloc(void) |
| { |
| iptun_t *iptun; |
| |
| if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { |
| bzero(iptun, sizeof (*iptun)); |
| atomic_inc_32(&iptun_tunnelcount); |
| } |
| return (iptun); |
| } |
| |
| static void |
| iptun_free(iptun_t *iptun) |
| { |
| ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); |
| |
| if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { |
| iptun_stack_t *iptuns = iptun->iptun_iptuns; |
| |
| mutex_enter(&iptun_hash_lock); |
| VERIFY(mod_hash_remove(iptun_hash, |
| IPTUN_HASH_KEY(iptun->iptun_linkid), |
| (mod_hash_val_t *)&iptun) == 0); |
| mutex_exit(&iptun_hash_lock); |
| iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; |
| mutex_enter(&iptuns->iptuns_lock); |
| list_remove(&iptuns->iptuns_iptunlist, iptun); |
| mutex_exit(&iptuns->iptuns_lock); |
| } |
| |
| if (iptun->iptun_flags & IPTUN_BOUND) |
| iptun_unbind(iptun); |
| |
| /* |
| * After iptun_unregister(), there will be no threads executing a |
| * downcall from the mac module, including in the tx datapath. |
| */ |
| if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) |
| VERIFY(iptun_unregister(iptun) == 0); |
| |
| if (iptun->iptun_itp != NULL) { |
| /* |
| * Remove from the AVL tree, AND release the reference iptun_t |
| * itself holds on the ITP. |
| */ |
| itp_unlink(iptun->iptun_itp, iptun->iptun_ns); |
| ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); |
| iptun->iptun_itp = NULL; |
| iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; |
| } |
| |
| /* |
| * After ipcl_conn_destroy(), there will be no threads executing an |
| * upcall from ip (i.e., iptun_input()), and it is then safe to free |
| * the iptun_t. |
| */ |
| if (iptun->iptun_connp != NULL) { |
| iptun_conn_destroy(iptun->iptun_connp); |
| iptun->iptun_connp = NULL; |
| } |
| |
| netstack_rele(iptun->iptun_ns); |
| iptun->iptun_ns = NULL; |
| crfree(iptun->iptun_cred); |
| iptun->iptun_cred = NULL; |
| |
| kmem_cache_free(iptun_cache, iptun); |
| atomic_dec_32(&iptun_tunnelcount); |
| } |
| |
| int |
| iptun_create(iptun_kparams_t *ik, cred_t *credp) |
| { |
| iptun_t *iptun = NULL; |
| int err = 0, mherr; |
| char linkname[MAXLINKNAMELEN]; |
| ipsec_tun_pol_t *itp; |
| netstack_t *ns = NULL; |
| iptun_stack_t *iptuns; |
| datalink_id_t tmpid; |
| zoneid_t zoneid = crgetzoneid(credp); |
| boolean_t link_created = B_FALSE; |
| |
| /* The tunnel type is mandatory */ |
| if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) |
| return (EINVAL); |
| |
| /* |
| * Is the linkid that the caller wishes to associate with this new |
| * tunnel assigned to this zone? |
| */ |
| if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { |
| if (zoneid != GLOBAL_ZONEID) |
| return (EINVAL); |
| } else if (zoneid == GLOBAL_ZONEID) { |
| return (EINVAL); |
| } |
| |
| /* |
| * Make sure that we're not trying to create a tunnel that has already |
| * been created. |
| */ |
| if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { |
| iptun_exit(iptun); |
| iptun = NULL; |
| err = EEXIST; |
| goto done; |
| } |
| |
| ns = netstack_find_by_cred(credp); |
| iptuns = ns->netstack_iptun; |
| |
| /* |
| * Before we create any tunnel, we need to ensure that the default |
| * STREAMS queue (used to satisfy the ip module's requirement for one) |
| * is created. We only do this once per stack. The stream is closed |
| * when the stack is destroyed in iptun_stack_fni(). |
| */ |
| mutex_enter(&iptuns->iptuns_lock); |
| if (iptuns->iptuns_g_q == NULL) |
| err = iptun_create_g_q(iptuns, zone_kcred()); |
| mutex_exit(&iptuns->iptuns_lock); |
| if (err != 0) |
| goto done; |
| |
| if ((iptun = iptun_alloc()) == NULL) { |
| err = ENOMEM; |
| goto done; |
| } |
| |
| iptun->iptun_linkid = ik->iptun_kparam_linkid; |
| iptun->iptun_zoneid = zoneid; |
| crhold(credp); |
| iptun->iptun_cred = credp; |
| iptun->iptun_ns = ns; |
| |
| iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); |
| if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { |
| err = EINVAL; |
| goto done; |
| } |
| |
| if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) |
| iptun->iptun_flags |= IPTUN_IMPLICIT; |
| |
| if ((err = iptun_setparams(iptun, ik)) != 0) |
| goto done; |
| |
| iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; |
| if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) |
| iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; |
| |
| iptun_headergen(iptun, B_FALSE); |
| |
| iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); |
| if (iptun->iptun_connp == NULL) { |
| err = ENOMEM; |
| goto done; |
| } |
| |
| iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; |
| iptun->iptun_dpmtu = iptun->iptun_mtu; |
| |
| /* |
| * Find an ITP based on linkname. If we have parms already set via |
| * the iptun_setparams() call above, it may have created an ITP for |
| * us. We always try get_tunnel_policy() for DEBUG correctness |
| * checks, and we may wish to refactor this to only check when |
| * iptun_itp is NULL. |
| */ |
| if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, |
| NULL, NULL)) != 0) |
| goto done; |
| if ((itp = get_tunnel_policy(linkname, ns)) != NULL) |
| iptun->iptun_itp = itp; |
| |
| /* |
| * See if we have the necessary IP addresses assigned to this tunnel |
| * to try and bind them with ip underneath us. If we're not ready to |
| * bind yet, then we'll defer the bind operation until the addresses |
| * are modified. |
| */ |
| if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) |
| goto done; |
| |
| if ((err = iptun_register(iptun)) != 0) |
| goto done; |
| |
| err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, |
| iptun->iptun_zoneid); |
| if (err != 0) |
| goto done; |
| link_created = B_TRUE; |
| |
| /* |
| * We hash by link-id as that is the key used by all other iptun |
| * interfaces (modify, delete, etc.). |
| */ |
| if ((mherr = mod_hash_insert(iptun_hash, |
| IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { |
| mutex_enter(&iptuns->iptuns_lock); |
| list_insert_head(&iptuns->iptuns_iptunlist, iptun); |
| mutex_exit(&iptuns->iptuns_lock); |
| iptun->iptun_flags |= IPTUN_HASH_INSERTED; |
| } else if (mherr == MH_ERR_NOMEM) { |
| err = ENOMEM; |
| } else if (mherr == MH_ERR_DUPLICATE) { |
| err = EEXIST; |
| } else { |
| err = EINVAL; |
| } |
| |
| done: |
| if (iptun == NULL && ns != NULL) |
| netstack_rele(ns); |
| if (err != 0 && iptun != NULL) { |
| if (link_created) { |
| (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, |
| B_TRUE); |
| } |
| iptun->iptun_flags |= IPTUN_CONDEMNED; |
| iptun_free(iptun); |
| } |
| return (err); |
| } |
| |
| int |
| iptun_delete(datalink_id_t linkid, cred_t *credp) |
| { |
| int err; |
| iptun_t *iptun = NULL; |
| |
| if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) |
| return (err); |
| |
| /* One cannot delete a tunnel that belongs to another zone. */ |
| if (iptun->iptun_zoneid != crgetzoneid(credp)) { |
| iptun_exit(iptun); |
| return (EACCES); |
| } |
| |
| /* |
| * We need to exit iptun in order to issue calls up the stack such as |
| * dls_devnet_destroy(). If we call up while still in iptun, deadlock |
| * with calls coming down the stack is possible. We prevent other |
| * threads from entering this iptun after we've exited it by setting |
| * the IPTUN_DELETE_PENDING flag. This will cause callers of |
| * iptun_enter() to block waiting on iptun_enter_cv. The assumption |
| * here is that the functions we're calling while IPTUN_DELETE_PENDING |
| * is set dont resuult in an iptun_enter() call, as that would result |
| * in deadlock. |
| */ |
| iptun->iptun_flags |= IPTUN_DELETE_PENDING; |
| |
| /* Wait for any pending upcall to the mac module to complete. */ |
| while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) |
| cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); |
| |
| iptun_exit(iptun); |
| |
| if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { |
| /* |
| * mac_disable() will fail with EBUSY if there are references |
| * to the iptun MAC. If there are none, then mac_disable() |
| * will assure that none can be acquired until the MAC is |
| * unregistered. |
| * |
| * XXX CR 6791335 prevents us from calling mac_disable() prior |
| * to dls_devnet_destroy(), so we unfortunately need to |
| * attempt to re-create the devnet node if mac_disable() |
| * fails. |
| */ |
| if ((err = mac_disable(iptun->iptun_mh)) != 0) { |
| (void) dls_devnet_create(iptun->iptun_mh, linkid, |
| iptun->iptun_zoneid); |
| } |
| } |
| |
| /* |
| * Now that we know the fate of this iptun_t, we need to clear |
| * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is |
| * slated to be freed. Either way, we need to signal the threads |
| * waiting in iptun_enter() so that they can either fail if |
| * IPTUN_CONDEMNED is set, or continue if it's not. |
| */ |
| mutex_enter(&iptun->iptun_lock); |
| iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; |
| if (err == 0) |
| iptun->iptun_flags |= IPTUN_CONDEMNED; |
| cv_broadcast(&iptun->iptun_enter_cv); |
| mutex_exit(&iptun->iptun_lock); |
| |
| /* |
| * Note that there is no danger in calling iptun_free() after having |
| * dropped the iptun_lock since callers of iptun_enter() at this point |
| * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of |
| * threads entering from mac callbacks which call iptun_enter() |
| * directly) which holds iptun_hash_lock, and iptun_free() grabs this |
| * lock in order to remove the iptun_t from the hash table. |
| */ |
| if (err == 0) |
| iptun_free(iptun); |
| |
| return (err); |
| } |
| |
| int |
| iptun_modify(const iptun_kparams_t *ik, cred_t *credp) |
| { |
| iptun_t *iptun; |
| boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; |
| int err; |
| |
| if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) |
| return (err); |
| |
| /* One cannot modify a tunnel that belongs to another zone. */ |
| if (iptun->iptun_zoneid != crgetzoneid(credp)) { |
| err = EACCES; |
| goto done; |
| } |
| |
| /* The tunnel type cannot be changed */ |
| if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { |
| err = EINVAL; |
| goto done; |
| } |
| |
| if ((err = iptun_setparams(iptun, ik)) != 0) |
| goto done; |
| iptun_headergen(iptun, B_FALSE); |
| |
| /* |
| * If any of the tunnel's addresses has been modified and the tunnel |
| * has the necessary addresses assigned to it, we need to try to bind |
| * with ip underneath us. If we're not ready to bind yet, then we'll |
| * try again when the addresses are modified later. |
| */ |
| laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); |
| raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); |
| if (laddr_change || raddr_change) { |
| if (iptun->iptun_flags & IPTUN_BOUND) |
| iptun_unbind(iptun); |
| if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { |
| if (laddr_change) |
| iptun->iptun_flags &= ~IPTUN_LADDR; |
| if (raddr_change) |
| iptun->iptun_flags &= ~IPTUN_RADDR; |
| goto done; |
| } |
| } |
| |
| if (laddr_change) |
| iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); |
| if (raddr_change) |
| iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); |
| |
| done: |
| iptun_exit(iptun); |
| return (err); |
| } |
| |
| /* Given an IP tunnel's datalink id, fill in its parameters. */ |
| int |
| iptun_info(iptun_kparams_t *ik, cred_t *credp) |
| { |
| iptun_t *iptun; |
| int err; |
| |
| /* Is the tunnel link visible from the caller's zone? */ |
| if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, |
| crgetzoneid(credp))) |
| return (ENOENT); |
| |
| if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) |
| return (err); |
| |
| bzero(ik, sizeof (iptun_kparams_t)); |
| |
| ik->iptun_kparam_linkid = iptun->iptun_linkid; |
| ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; |
| ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; |
| |
| if (iptun->iptun_flags & IPTUN_LADDR) { |
| iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); |
| ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; |
| } |
| if (iptun->iptun_flags & IPTUN_RADDR) { |
| iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); |
| ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; |
| } |
| |
| if (iptun->iptun_flags & IPTUN_IMPLICIT) |
| ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; |
| |
| if (iptun->iptun_itp != NULL) { |
| mutex_enter(&iptun->iptun_itp->itp_lock); |
| if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { |
| ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; |
| if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { |
| ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; |
| ik->iptun_kparam_secinfo = |
| iptun->iptun_simple_policy; |
| } |
| } |
| mutex_exit(&iptun->iptun_itp->itp_lock); |
| } |
| |
| done: |
| iptun_exit(iptun); |
| return (err); |
| } |
| |
| int |
| iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) |
| { |
| if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) |
| return (EADDRNOTAVAIL); |
| ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; |
| return (0); |
| } |
| |
| void |
| iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) |
| { |
| *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; |
| } |
| |
| void |
| iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) |
| { |
| iptun_t *iptun; |
| |
| if (iptun_enter_by_linkid(linkid, &iptun) != 0) |
| return; |
| if (iptun->iptun_itp != itp) { |
| ASSERT(iptun->iptun_itp == NULL); |
| ITP_REFHOLD(itp); |
| iptun->iptun_itp = itp; |
| /* IPsec policy means IPsec overhead, which means lower MTU. */ |
| (void) iptun_update_mtu(iptun, 0); |
| } |
| iptun_exit(iptun); |
| } |
| |
| /* |
| * Obtain the path MTU to the tunnel destination. |
| */ |
| static uint32_t |
| iptun_get_dst_pmtu(iptun_t *iptun) |
| { |
| ire_t *ire = NULL; |
| ip_stack_t *ipst = iptun->iptun_ns->netstack_ip; |
| uint32_t pmtu = 0; |
| |
| /* |
| * We only obtain the destination IRE for tunnels that have a remote |
| * tunnel address. |
| */ |
| if (!(iptun->iptun_flags & IPTUN_RADDR)) |
| return (0); |
| |
| switch (iptun->iptun_typeinfo->iti_ipvers) { |
| case IPV4_VERSION: |
| ire = ire_route_lookup(iptun->iptun_raddr4, INADDR_ANY, |
| INADDR_ANY, 0, NULL, NULL, iptun->iptun_connp->conn_zoneid, |
| NULL, (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); |
| break; |
| case IPV6_VERSION: |
| ire = ire_route_lookup_v6(&iptun->iptun_raddr6, NULL, NULL, 0, |
| NULL, NULL, iptun->iptun_connp->conn_zoneid, NULL, |
| (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); |
| break; |
| } |
| |
| if (ire != NULL) { |
| pmtu = ire->ire_max_frag; |
| ire_refrele(ire); |
| } |
| return (pmtu); |
| } |
| |
| /* |
| * Returns the max of old_ovhd and the overhead associated with pol. |
| */ |
| static uint32_t |
| iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) |
| { |
| uint32_t new_ovhd = old_ovhd; |
| |
| while (pol != NULL) { |
| new_ovhd = max(new_ovhd, |
| ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); |
| pol = pol->ipsp_hash.hash_next; |
| } |
| return (new_ovhd); |
| } |
| |
| static uint32_t |
| iptun_get_ipsec_overhead(iptun_t *iptun) |
| { |
| ipsec_policy_root_t *ipr; |
| ipsec_policy_head_t *iph; |
| ipsec_policy_t *pol; |
| ipsec_selector_t sel; |
| int i; |
| uint32_t ipsec_ovhd = 0; |
| ipsec_tun_pol_t *itp = iptun->iptun_itp; |
| netstack_t *ns = iptun->iptun_ns; |
| |
| if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { |
| /* |
| * Consult global policy, just in case. This will only work |
| * if we have both source and destination addresses to work |
| * with. |
| */ |
| if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != |
| (IPTUN_LADDR|IPTUN_RADDR)) |
| return (0); |
| |
| iph = ipsec_system_policy(ns); |
| bzero(&sel, sizeof (sel)); |
| sel.ips_isv4 = |
| (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); |
| switch (iptun->iptun_typeinfo->iti_ipvers) { |
| case IPV4_VERSION: |
| sel.ips_local_addr_v4 = iptun->iptun_laddr4; |
| sel.ips_remote_addr_v4 = iptun->iptun_raddr4; |
| break; |
| case IPV6_VERSION: |
| sel.ips_local_addr_v6 = iptun->iptun_laddr6; |
| sel.ips_remote_addr_v6 = iptun->iptun_raddr6; |
| break; |
| } |
| /* Check for both IPv4 and IPv6. */ |
| sel.ips_protocol = IPPROTO_ENCAP; |
| pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, |
| &sel, ns); |
| if (pol != NULL) { |
| ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); |
| IPPOL_REFRELE(pol, ns); |
| } |
| sel.ips_protocol = IPPROTO_IPV6; |
| pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, |
| &sel, ns); |
| if (pol != NULL) { |
| ipsec_ovhd = max(ipsec_ovhd, |
| ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); |
| IPPOL_REFRELE(pol, ns); |
| } |
| IPPH_REFRELE(iph, ns); |
| } else { |
| /* |
| * Look through all of the possible IPsec actions for the |
| * tunnel, and find the largest potential IPsec overhead. |
| */ |
| iph = itp->itp_policy; |
| rw_enter(&iph->iph_lock, RW_READER); |
| ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); |
| ipsec_ovhd = iptun_max_policy_overhead( |
| ipr->ipr_nonhash[IPSEC_AF_V4], 0); |
| ipsec_ovhd = iptun_max_policy_overhead( |
| ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); |
| for (i = 0; i < ipr->ipr_nchains; i++) { |
| ipsec_ovhd = iptun_max_policy_overhead( |
| ipr->ipr_hash[i].hash_head, ipsec_ovhd); |
| } |
| rw_exit(&iph->iph_lock); |
| } |
| |
| return (ipsec_ovhd); |
| } |
| |
| /* |
| * Calculate and return the maximum possible MTU for the given tunnel. |
| */ |
| static uint32_t |
| iptun_get_maxmtu(iptun_t *iptun, uint32_t new_pmtu) |
| { |
| size_t header_size, ipsec_overhead; |
| uint32_t maxmtu, pmtu; |
| |
| /* |
| * Start with the path-MTU to the remote address, which is either |
| * provided as the new_pmtu argument, or obtained using |
| * iptun_get_dst_pmtu(). |
| */ |
| if (new_pmtu != 0) { |
| if (iptun->iptun_flags & IPTUN_RADDR) { |
| iptun->iptun_dpmtu = new_pmtu; |
| iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); |
| } |
| pmtu = new_pmtu; |
| } else if (iptun->iptun_flags & IPTUN_RADDR) { |
| if ((pmtu = iptun_get_dst_pmtu(iptun)) == 0) { |
| /* |
| * We weren't able to obtain the path-MTU of the |
| * destination. Use the previous value. |
| */ |
| pmtu = iptun->iptun_dpmtu; |
| } else { |
| iptun->iptun_dpmtu = pmtu; |
| iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); |
| } |
| } else { |
| /* |
| * We have no path-MTU information to go on, use the maximum |
| * possible value. |
| */ |
| pmtu = iptun->iptun_typeinfo->iti_maxmtu; |
| } |
| |
| /* |
| * Now calculate tunneling overhead and subtract that from the |
| * path-MTU information obtained above. |
| */ |
| if (iptun->iptun_header_size != 0) { |
| header_size = iptun->iptun_header_size; |
| } else { |
| switch (iptun->iptun_typeinfo->iti_ipvers) { |
| case IPV4_VERSION: |
| header_size = sizeof (ipha_t); |
| if (is_system_labeled()) |
| header_size += IP_MAX_OPT_LENGTH; |
| break; |
| case IPV6_VERSION: |
| header_size = sizeof (iptun_ipv6hdrs_t); |
| break; |
| } |
| } |
| |
| ipsec_overhead = iptun_get_ipsec_overhead(iptun); |
| |
| maxmtu = pmtu - (header_size + ipsec_overhead); |
| return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); |
| } |
| |
| /* |
| * Re-calculate the tunnel's MTU and notify the MAC layer of any change in |
| * MTU. The new_pmtu argument is the new path MTU to the tunnel destination |
| * to be used in the tunnel MTU calculation. Passing in 0 for new_pmtu causes |
| * the path MTU to be dynamically updated using iptun_update_pmtu(). |
| * |
| * If the calculated tunnel MTU is different than its previous value, then we |
| * notify the MAC layer above us of this change using mac_maxsdu_update(). |
| */ |
| static uint32_t |
| iptun_update_mtu(iptun_t *iptun, uint32_t new_pmtu) |
| { |
| uint32_t newmtu; |
| |
| /* |
| * We return the current MTU without updating it if it was pegged to a |
| * static value using the MAC_PROP_MTU link property. |
| */ |
| if (iptun->iptun_flags & IPTUN_FIXED_MTU) |
| return (iptun->iptun_mtu); |
| |
| /* If the MTU isn't fixed, then use the maximum possible value. */ |
| newmtu = iptun_get_maxmtu(iptun, new_pmtu); |
| |
| /* |
| * We only dynamically adjust the tunnel MTU for tunnels with |
| * destinations because dynamic MTU calculations are based on the |
| * destination path-MTU. |
| */ |
| if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { |
| iptun->iptun_mtu = newmtu; |
| if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) |
| iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); |
| } |
| |
| return (newmtu); |
| } |
| |
| /* |
| * Frees a packet or packet chain and bumps stat for each freed packet. |
| */ |
| static void |
| iptun_drop_pkt(mblk_t *mp, uint64_t *stat) |
| { |
| mblk_t *pktmp; |
| |
| for (pktmp = mp; pktmp != NULL; pktmp = mp) { |
| mp = mp->b_next; |
| pktmp->b_next = NULL; |
| if (stat != NULL) |
| atomic_inc_64(stat); |
| freemsg(pktmp); |
| } |
| } |
| |
| /* |
| * Allocate and return a new mblk to hold an IP and ICMP header, and chain the |
| * original packet to its b_cont. Returns NULL on failure. |
| */ |
| static mblk_t * |
| iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) |
| { |
| mblk_t *icmperr_mp; |
| |
| if ((icmperr_mp = allocb_tmpl(hdrs_size, orig_pkt)) != NULL) { |
| icmperr_mp->b_wptr += hdrs_size; |
| /* tack on the offending packet */ |
| icmperr_mp->b_cont = orig_pkt; |
| } |
| return (icmperr_mp); |
| } |
| |
| /* |
| * Transmit an ICMP error. mp->b_rptr points at the packet to be included in |
| * the ICMP error. |
| */ |
| static void |
| iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp) |
| { |
| size_t orig_pktsize, hdrs_size; |
| mblk_t *icmperr_mp; |
| ipha_t *new_ipha; |
| icmph_t *new_icmp; |
| |
| orig_pktsize = msgdsize(mp); |
| hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); |
| if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { |
| iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); |
| return; |
| } |
| |
| new_ipha = (ipha_t *)icmperr_mp->b_rptr; |
| new_icmp = (icmph_t *)(new_ipha + 1); |
| |
| new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; |
| new_ipha->ipha_type_of_service = 0; |
| new_ipha->ipha_ident = 0; |
| new_ipha->ipha_fragment_offset_and_flags = 0; |
| new_ipha->ipha_ttl = orig_ipha->ipha_ttl; |
| new_ipha->ipha_protocol = IPPROTO_ICMP; |
| new_ipha->ipha_src = orig_ipha->ipha_dst; |
| new_ipha->ipha_dst = orig_ipha->ipha_src; |
| new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ |
| new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); |
| |
| *new_icmp = *icmp; |
| new_icmp->icmph_checksum = 0; |
| new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); |
| |
| ip_output(iptun->iptun_connp, icmperr_mp, iptun->iptun_connp->conn_wq, |
| IP_WPUT); |
| } |
| |
| static void |
| iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp) |
| { |
| size_t orig_pktsize, hdrs_size; |
| mblk_t *icmp6err_mp; |
| ip6_t *new_ip6h; |
| icmp6_t *new_icmp6; |
| |
| orig_pktsize = msgdsize(mp); |
| hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); |
| if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { |
| iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); |
| return; |
| } |
| |
| new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; |
| new_icmp6 = (icmp6_t *)(new_ip6h + 1); |
| |
| new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; |
| new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); |
| new_ip6h->ip6_hops = orig_ip6h->ip6_hops; |
| new_ip6h->ip6_nxt = IPPROTO_ICMPV6; |
| new_ip6h->ip6_src = orig_ip6h->ip6_dst; |
| new_ip6h->ip6_dst = orig_ip6h->ip6_src; |
| |
| *new_icmp6 = *icmp6; |
| /* The checksum is calculated in ip_wput_ire_v6(). */ |
| new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; |
| |
| ip_output_v6(iptun->iptun_connp, icmp6err_mp, |
| iptun->iptun_connp->conn_wq, IP_WPUT); |
| } |
| |
| static void |
| iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, |
| uint8_t type, uint8_t code) |
| { |
| icmph_t icmp; |
| |
| bzero(&icmp, sizeof (icmp)); |
| icmp.icmph_type = type; |
| icmp.icmph_code = code; |
| |
| iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); |
| } |
| |
| static void |
| iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, |
| mblk_t *mp) |
| { |
| icmph_t icmp; |
| |
| icmp.icmph_type = ICMP_DEST_UNREACHABLE; |
| icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; |
| icmp.icmph_du_zero = 0; |
| icmp.icmph_du_mtu = htons(newmtu); |
| |
| iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); |
| } |
| |
| static void |
| iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, |
| uint8_t type, uint8_t code, uint32_t offset) |
| { |
| icmp6_t icmp6; |
| |
| bzero(&icmp6, sizeof (icmp6)); |
| icmp6.icmp6_type = type; |
| icmp6.icmp6_code = code; |
| if (type == ICMP6_PARAM_PROB) |
| icmp6.icmp6_pptr = htonl(offset); |
| |
| iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); |
| } |
| |
| static void |
| iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, |
| mblk_t *mp) |
| { |
| icmp6_t icmp6; |
| |
| icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; |
| icmp6.icmp6_code = 0; |
| icmp6.icmp6_mtu = htonl(newmtu); |
| |
| iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); |
| } |
| |
| /* |
| * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The |
| * mp argument is only used to do bounds checking. |
| */ |
| static boolean_t |
| is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) |
| { |
| uint16_t hlen; |
| |
| if (ipha != NULL) { |
| icmph_t *icmph; |
| |
| ASSERT(ip6h == NULL); |
| if (ipha->ipha_protocol != IPPROTO_ICMP) |
| return (B_FALSE); |
| |
| hlen = IPH_HDR_LENGTH(ipha); |
| icmph = (icmph_t *)((uint8_t *)ipha + hlen); |
| return (ICMP_IS_ERROR(icmph->icmph_type) || |
| icmph->icmph_type == ICMP_REDIRECT); |
| } else { |
| icmp6_t *icmp6; |
| uint8_t *nexthdrp; |
| |
| ASSERT(ip6h != NULL); |
| if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || |
| *nexthdrp != IPPROTO_ICMPV6) { |
| return (B_FALSE); |
| } |
| |
| icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); |
| return (ICMP6_IS_ERROR(icmp6->icmp6_type) || |
| icmp6->icmp6_type == ND_REDIRECT); |
| } |
| } |
| |
| /* |
| * Find inner and outer IP headers from a tunneled packet as setup for calls |
| * into ipsec_tun_{in,out}bound(). |
| */ |
| static size_t |
| iptun_find_headers(mblk_t *mp, ipha_t **outer4, ipha_t **inner4, ip6_t **outer6, |
| ip6_t **inner6) |
| { |
| ipha_t *ipha; |
| size_t outer_hlen; |
| size_t first_mblkl = MBLKL(mp); |
| mblk_t *inner_mp; |
| |
| /* |
| * Don't bother handling packets that don't have a full IP header in |
| * the fist mblk. For the input path, the ip module ensures that this |
| * won't happen, and on the output path, the IP tunneling MAC-type |
| * plugins ensure that this also won't happen. |
| */ |
| if (first_mblkl < sizeof (ipha_t)) |
| return (0); |
| ipha = (ipha_t *)(mp->b_rptr); |
| switch (IPH_HDR_VERSION(ipha)) { |
| case IPV4_VERSION: |
| *outer4 = ipha; |
| *outer6 = NULL; |
| outer_hlen = IPH_HDR_LENGTH(ipha); |
| break; |
| case IPV6_VERSION: |
| *outer4 = NULL; |
| *outer6 = (ip6_t *)ipha; |
| outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); |
| break; |
| default: |
| return (0); |
| } |
| |
| if (first_mblkl < outer_hlen || |
| (first_mblkl == outer_hlen && mp->b_cont == NULL)) |
| return (0); |
| |
| /* |
| * We don't bother doing a pullup here since the outer header will |
| * just get stripped off soon on input anyway. We just want to ensure |
| * that the inner* pointer points to a full header. |
| */ |
| if (first_mblkl == outer_hlen) { |
| inner_mp = mp->b_cont; |
| ipha = (ipha_t *)inner_mp->b_rptr; |
| } else { |
| inner_mp = mp; |
| ipha = (ipha_t *)(mp->b_rptr + outer_hlen); |
| } |
| switch (IPH_HDR_VERSION(ipha)) { |
| case IPV4_VERSION: |
| if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) |
| return (0); |
| *inner4 = ipha; |
| *inner6 = NULL; |
| break; |
| case IPV6_VERSION: |
| if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) |
| return (0); |
| *inner4 = NULL; |
| *inner6 = (ip6_t *)ipha; |
| break; |
| default: |
| return (0); |
| } |
| |
| return (outer_hlen); |
| } |
| |
| /* |
| * Received ICMP error in response to an X over IPv4 packet that we |
| * transmitted. |
| * |
| * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of |
| * the following: |
| * |
| * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] |
| * |
| * or |
| * |
| * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] |
| * |
| * And "outer4" will get set to IPv4(1), and inner[46] will correspond to |
| * whatever the very-inner packet is (IPv4(2) or IPv6). |
| */ |
| static void |
| iptun_input_icmp_v4(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, |
| icmph_t *icmph) |
| { |
| uint8_t *orig; |
| ipha_t *outer4, *inner4; |
| ip6_t *outer6, *inner6; |
| int outer_hlen; |
| uint8_t type, code; |
| |
| /* |
| * Change the db_type to M_DATA because subsequent operations assume |
| * the ICMP packet is M_DATA again (i.e. calls to msgdsize()). |
| */ |
| data_mp->b_datap->db_type = M_DATA; |
| |
| ASSERT(data_mp->b_cont == NULL); |
| /* |
| * Temporarily move b_rptr forward so that iptun_find_headers() can |
| * find headers in the ICMP packet payload. |
| */ |
| orig = data_mp->b_rptr; |
| data_mp->b_rptr = (uint8_t *)(icmph + 1); |
| /* |
| * The ip module ensures that ICMP errors contain at least the |
| * original IP header (otherwise, the error would never have made it |
| * here). |
| */ |
| ASSERT(MBLKL(data_mp) >= 0); |
| outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, |
| &inner6); |
| ASSERT(outer6 == NULL); |
| data_mp->b_rptr = orig; |
| if (outer_hlen == 0) { |
| iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), |
| &iptun->iptun_ierrors); |
| return; |
| } |
| |
| /* Only ICMP errors due to tunneled packets should reach here. */ |
| ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || |
| outer4->ipha_protocol == IPPROTO_IPV6); |
| |
| /* ipsec_tun_inbound() always frees ipsec_mp. */ |
| if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, |
| inner4, inner6, outer4, outer6, -outer_hlen, |
| iptun->iptun_ns)) { |
| /* Callee did all of the freeing. */ |
| atomic_inc_64(&iptun->iptun_ierrors); |
| return; |
| } |
| /* We should never see reassembled fragment here. */ |
| ASSERT(data_mp->b_next == NULL); |
| |
| data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; |
| |
| /* |
| * If the original packet being transmitted was itself an ICMP error, |
| * then drop this packet. We don't want to generate an ICMP error in |
| * response to an ICMP error. |
| */ |
| if (is_icmp_error(data_mp, inner4, inner6)) { |
| iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); |
| return; |
| } |
| |
| switch (icmph->icmph_type) { |
| case ICMP_DEST_UNREACHABLE: |
| type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); |
| switch (icmph->icmph_code) { |
| case ICMP_FRAGMENTATION_NEEDED: { |
| uint32_t newmtu; |
| |
| /* |
| * We reconcile this with the fact that the tunnel may |
| * also have IPsec policy by letting iptun_update_mtu |
| * take care of it. |
| */ |
| newmtu = |
| iptun_update_mtu(iptun, ntohs(icmph->icmph_du_mtu)); |
| |
| if (inner4 != NULL) { |
| iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, |
| data_mp); |
| } else { |
| iptun_icmp_toobig_v6(iptun, newmtu, inner6, |
| data_mp); |
| } |
| return; |
| } |
| case ICMP_DEST_NET_UNREACH_ADMIN: |
| case ICMP_DEST_HOST_UNREACH_ADMIN: |
| code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : |
| ICMP6_DST_UNREACH_ADMIN); |
| break; |
| default: |
| code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : |
| ICMP6_DST_UNREACH_ADDR); |
| break; |
| } |
| break; |
| case ICMP_TIME_EXCEEDED: |
| if (inner6 != NULL) { |
| type = ICMP6_TIME_EXCEEDED; |
| code = 0; |
| } /* else we're already set. */ |
| break; |
| case ICMP_PARAM_PROBLEM: |
| /* |
| * This is a problem with the outer header we transmitted. |
| * Treat this as an output error. |
| */ |
| iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); |
| return; |
| default: |
| iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); |
| return; |
| } |
| |
| if (inner4 != NULL) |
| iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); |
| else |
| iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); |
| } |
| |
| /* |
| * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel |
| * Encapsulation Limit destination option. If there is one, set encaplim_ptr |
| * to point to the option value. |
| */ |
| static boolean_t |
| iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) |
| { |
| ip6_pkt_t pkt; |
| uint8_t *endptr; |
| ip6_dest_t *destp; |
| struct ip6_opt *optp; |
| |
| pkt.ipp_fields = 0; /* must be initialized */ |
| (void) ip_find_hdr_v6(mp, ip6h, &pkt, NULL); |
| if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { |
| destp = pkt.ipp_dstopts; |
| } else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) { |
| destp = pkt.ipp_rtdstopts; |
| } else { |
| return (B_FALSE); |
| } |
| |
| endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); |
| optp = (struct ip6_opt *)(destp + 1); |
| while (endptr - (uint8_t *)optp > sizeof (*optp)) { |
| if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { |
| if ((uint8_t *)(optp + 1) >= endptr) |
| return (B_FALSE); |
| *encaplim_ptr = (uint8_t *)&optp[1]; |
| return (B_TRUE); |
| } |
| optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); |
| } |
| return (B_FALSE); |
| } |
| |
| /* |
| * Received ICMPv6 error in response to an X over IPv6 packet that we |
| * transmitted. |
| * |
| * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of |
| * the following: |
| * |
| * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] |
| * |
| * or |
| * |
| * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] |
| * |
| * And "outer6" will get set to IPv6(1), and inner[46] will correspond to |
| * whatever the very-inner packet is (IPv4 or IPv6(2)). |
| */ |
| static void |
| iptun_input_icmp_v6(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, |
| icmp6_t *icmp6h) |
| { |
| uint8_t *orig; |
| ipha_t *outer4, *inner4; |
| ip6_t *outer6, *inner6; |
| int outer_hlen; |
| uint8_t type, code; |
| |
| /* |
| * Change the db_type to M_DATA because subsequent operations assume |
| * the ICMP packet is M_DATA again (i.e. calls to msgdsize().) |
| */ |
| data_mp->b_datap->db_type = M_DATA; |
| |
| ASSERT(data_mp->b_cont == NULL); |
| |
| /* |
| * Temporarily move b_rptr forward so that iptun_find_headers() can |
| * find IP headers in the ICMP packet payload. |
| */ |
| orig = data_mp->b_rptr; |
| data_mp->b_rptr = (uint8_t *)(icmp6h + 1); |
| /* |
| * The ip module ensures that ICMP errors contain at least the |
| * original IP header (otherwise, the error would never have made it |
| * here). |
| */ |
| ASSERT(MBLKL(data_mp) >= 0); |
| outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, |
| &inner6); |
| ASSERT(outer4 == NULL); |
| data_mp->b_rptr = orig; /* Restore r_ptr */ |
| if (outer_hlen == 0) { |
| iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), |
| &iptun->iptun_ierrors); |
| return; |
| } |
| |
| if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, |
| inner4, inner6, outer4, outer6, -outer_hlen, |
| iptun->iptun_ns)) { |
| /* Callee did all of the freeing. */ |
| atomic_inc_64(&iptun->iptun_ierrors); |
| return; |
| } |
| /* We should never see reassembled fragment here. */ |
| ASSERT(data_mp->b_next == NULL); |
| |
| data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; |
| |
| /* |
| * If the original packet being transmitted was itself an ICMP error, |
| * then drop this packet. We don't want to generate an ICMP error in |
| * response to an ICMP error. |
| */ |
| if (is_icmp_error(data_mp, inner4, inner6)) { |
| iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); |
| return; |
| } |
| |
| switch (icmp6h->icmp6_type) { |
| case ICMP6_PARAM_PROB: { |
| uint8_t *encaplim_ptr; |
| |
| /* |
| * If the ICMPv6 error points to a valid Tunnel Encapsulation |
| * Limit option and the limit value is 0, then fall through |
| * and send a host unreachable message. Otherwise, treat the |
| * error as an output error, as there must have been a problem |
| * with a packet we sent. |
| */ |
| if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || |
| (icmp6h->icmp6_pptr != |
| ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || |
| *encaplim_ptr != 0) { |
| iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); |
| return; |
| } |
| /* FALLTHRU */ |
| } |
| case ICMP6_TIME_EXCEEDED: |
| case ICMP6_DST_UNREACH: |
| type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : |
| ICMP6_DST_UNREACH); |
| code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : |
| ICMP6_DST_UNREACH_ADDR); |
| break; |
| case ICMP6_PACKET_TOO_BIG: { |
| uint32_t newmtu; |
| |
| /* |
| * We reconcile this with the fact that the tunnel may also |
| * have IPsec policy by letting iptun_update_mtu take care of |
| * it. |
| */ |
| newmtu = iptun_update_mtu(iptun, ntohl(icmp6h->icmp6_mtu)); |
| |
| if (inner4 != NULL) { |
| iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, |
| data_mp); |
| } else { |
| iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp); |
| } |
| return; |
| } |
| default: |
| iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); |
| return; |
| } |
| |
| if (inner4 != NULL) |
| iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); |
| else |
| iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); |
| } |
| |
| static void |
| iptun_input_icmp(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp) |
| { |
| mblk_t *tmpmp; |
| size_t hlen; |
| |
| if (data_mp->b_cont != NULL) { |
| /* |
| * Since ICMP error processing necessitates access to bits |
| * that are within the ICMP error payload (the original packet |
| * that caused the error), pull everything up into a single |
| * block for convenience. |
| */ |
| data_mp->b_datap->db_type = M_DATA; |
| if ((tmpmp = msgpullup(data_mp, -1)) == NULL) { |
| iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), |
| &iptun->iptun_norcvbuf); |
| return; |
| } |
| freemsg(data_mp); |
| data_mp = tmpmp; |
| if (ipsec_mp != NULL) |
| ipsec_mp->b_cont = data_mp; |
| } |
| |
| switch (iptun->iptun_typeinfo->iti_ipvers) { |
| case IPV4_VERSION: |
| /* |
| * The outer IP header coming up from IP is always ipha_t |
| * alligned (otherwise, we would have crashed in ip). |
| */ |
| hlen = IPH_HDR_LENGTH((ipha_t *)data_mp->b_rptr); |
| iptun_input_icmp_v4(iptun, ipsec_mp, data_mp, |
| (icmph_t *)(data_mp->b_rptr + hlen)); |
| break; |
| case IPV6_VERSION: |
| hlen = ip_hdr_length_v6(data_mp, (ip6_t *)data_mp->b_rptr); |
| iptun_input_icmp_v6(iptun, ipsec_mp, data_mp, |
| (icmp6_t *)(data_mp->b_rptr + hlen)); |
| break; |
| } |
| } |
| |
| static boolean_t |
| iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) |
| { |
| ipaddr_t v4addr; |
| |
| /* |
| * It's possible that someone sent us an IPv4-in-IPv4 packet with the |
| * IPv4 address of a 6to4 tunnel as the destination. |
| */ |
| if (inner6 == NULL) |
| return (B_FALSE); |
| |
| /* |
| * Make sure that the IPv6 destination is within the site that this |
| * 6to4 tunnel is routing for. We don't want people bouncing random |
| * tunneled IPv6 packets through this 6to4 router. |
| */ |
| IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); |
| if (outer4->ipha_dst != v4addr) |
| return (B_FALSE); |
| |
| if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { |
| /* |
| * Section 9 of RFC 3056 (security considerations) suggests |
| * that when a packet is from a 6to4 site (i.e., it's not a |
| * global address being forwarded froma relay router), make |
| * sure that the packet was tunneled by that site's 6to4 |
| * router. |
| */ |
| IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); |
| if (outer4->ipha_src != v4addr) |
| return (B_FALSE); |
| } else { |
| /* |
| * Only accept packets from a relay router if we've configured |
| * outbound relay router functionality. |
| */ |
| if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) |
| return (B_FALSE); |
| } |
| |
| return (B_TRUE); |
| } |
| |
| /* |
| * Input function for everything that comes up from the ip module below us. |
| * This is called directly from the ip module via connp->conn_recv(). |
| * |
| * There are two kinds of packets that can arrive here: (1) IP-in-IP tunneled |
| * packets and (2) ICMP errors containing IP-in-IP packets transmitted by us. |
| * They have the following structure: |
| * |
| * 1) M_DATA |
| * 2) M_CTL[->M_DATA] |
| * |
| * (2) Is an M_CTL optionally followed by M_DATA, where the M_CTL block is the |
| * start of the actual ICMP packet (it doesn't contain any special control |
| * information). |
| * |
| * Either (1) or (2) can be IPsec-protected, in which case an M_CTL block |
| * containing an ipsec_in_t will have been prepended to either (1) or (2), |
| * making a total of four combinations of possible mblk chains: |
| * |
| * A) (1) |
| * B) (2) |
| * C) M_CTL(ipsec_in_t)->(1) |
| * D) M_CTL(ipsec_in_t)->(2) |
| */ |
| /* ARGSUSED */ |
| static void |
| iptun_input(void *arg, mblk_t *mp, void *arg2) |
| { |
| conn_t *connp = arg; |
| iptun_t *iptun = connp->conn_iptun; |
| int outer_hlen; |
| ipha_t *outer4, *inner4; |
| ip6_t *outer6, *inner6; |
| mblk_t *data_mp = mp; |
| |
| ASSERT(IPCL_IS_IPTUN(connp)); |
| ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); |
| |
| if (DB_TYPE(mp) == M_CTL) { |
| if (((ipsec_in_t *)(mp->b_rptr))->ipsec_in_type != IPSEC_IN) { |
| iptun_input_icmp(iptun, NULL, mp); |
| return; |
| } |
| |
| data_mp = mp->b_cont; |
| if (DB_TYPE(data_mp) == M_CTL) { |
| /* Protected ICMP packet. */ |
| iptun_input_icmp(iptun, mp, data_mp); |
| return; |
| } |
| } |
| |
| /* |
| * Request the destination's path MTU information regularly in case |
| * path MTU has increased. |
| */ |
| if (IPTUN_PMTU_TOO_OLD(iptun)) |
| iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); |
| |
| if ((outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, |
| &inner6)) == 0) |
| goto drop; |
| |
| /* |
| * If the system is labeled, we call tsol_check_dest() on the packet |
| * destination (our local tunnel address) to ensure that the packet as |
| * labeled should be allowed to be sent to us. We don't need to call |
| * the more involved tsol_receive_local() since the tunnel link itself |
| * cannot be assigned to shared-stack non-global zones. |
| */ |
| if (is_system_labeled()) { |
| cred_t *msg_cred; |
| |
| if ((msg_cred = msg_getcred(data_mp, NULL)) == NULL) |
| goto drop; |
| if (tsol_check_dest(msg_cred, (outer4 != NULL ? |
| (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), |
| (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), |
| CONN_MAC_DEFAULT, NULL) != 0) |
| goto drop; |
| } |
| |
| if (!ipsec_tun_inbound((mp == data_mp ? NULL : mp), &data_mp, |
| iptun->iptun_itp, inner4, inner6, outer4, outer6, outer_hlen, |
| iptun->iptun_ns)) { |
| /* Callee did all of the freeing. */ |
| return; |
| } |
| mp = data_mp; |
| |
| if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && |
| !iptun_in_6to4_ok(iptun, outer4, inner6)) |
| goto drop; |
| |
| /* |
| * We need to statistically account for each packet individually, so |
| * we might as well split up any b_next chains here. |
| */ |
| do { |
| mp = data_mp->b_next; |
| data_mp->b_next = NULL; |
| |
| atomic_inc_64(&iptun->iptun_ipackets); |
| atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); |
| mac_rx(iptun->iptun_mh, NULL, data_mp); |
| |
| data_mp = mp; |
| } while (data_mp != NULL); |
| return; |
| drop: |
| iptun_drop_pkt(mp, &iptun->iptun_ierrors); |
| } |
| |
| /* |
| * Do 6to4-specific header-processing on output. Return B_TRUE if the packet |
| * was processed without issue, or B_FALSE if the packet had issues and should |
| * be dropped. |
| */ |
| static boolean_t |
| iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) |
| { |
| ipaddr_t v4addr; |
| |
| /* |
| * IPv6 source must be a 6to4 address. This is because a conscious |
| * decision was made to not allow a Solaris system to be used as a |
| * relay router (for security reasons) when 6to4 was initially |
| * integrated. If this decision is ever reversed, the following check |
| * can be removed. |
| */ |
| if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) |
| return (B_FALSE); |
| |
| /* |
| * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 |
| * portion of the 6to4 IPv6 source address. In other words, make sure |
| * that we're tunneling packets from our own 6to4 site. |
| */ |
| IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); |
| if (outer4->ipha_src != v4addr) |
| return (B_FALSE); |
| |
| /* |
| * Automatically set the destination of the outer IPv4 header as |
| * described in RFC3056. There are two possibilities: |
| * |
| * a. If the IPv6 destination is a 6to4 address, set the IPv4 address |
| * to the IPv4 portion of the 6to4 address. |
| * b. If the IPv6 destination is a native IPv6 address, set the IPv4 |
| * destination to the address of a relay router. |
| * |
| * Design Note: b shouldn't be necessary here, and this is a flaw in |
| * the design of the 6to4relay command. Instead of setting a 6to4 |
| * relay address in this module via an ioctl, the 6to4relay command |
| * could simply add a IPv6 route for native IPv6 addresses (such as a |
| * default route) in the forwarding table that uses a 6to4 destination |
| * as its next hop, and the IPv4 portion of that address could be a |
| * 6to4 relay address. In order for this to work, IP would have to |
| * resolve the next hop address, which would necessitate a link-layer |
| * address resolver for 6to4 links, which doesn't exist today. |
| * |
| * In fact, if a resolver existed for 6to4 links, then setting the |
| * IPv4 destination in the outer header could be done as part of |
| * link-layer address resolution and fast-path header generation, and |
| * not here. |
| */ |
| if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { |
| /* destination is a 6to4 router */ |
| IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, |
| (struct in_addr *)&outer4->ipha_dst); |
| } else { |
| /* |
| * The destination is a native IPv6 address. If output to a |
| * relay-router is enabled, use the relay-router's IPv4 |
| * address as the destination. |
| */ |
| if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) |
| return (B_FALSE); |
| outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; |
| } |
| |
| /* |
| * If the outer source and destination are equal, this means that the |
| * 6to4 router somehow forwarded an IPv6 packet destined for its own |
| * 6to4 site to its 6to4 tunnel interface, which will result in this |
| * packet infinitely bouncing between ip and iptun. |
| */ |
| return (outer4->ipha_src != outer4->ipha_dst); |
| } |
| |
| /* |
| * Process output packets with outer IPv4 headers. Frees mp and bumps stat on |
| * error. |
| */ |
| static mblk_t * |
| iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, |
| ipha_t *inner4, ip6_t *inner6) |
| { |
| uint8_t *innerptr = (inner4 != NULL ? |
| (uint8_t *)inner4 : (uint8_t *)inner6); |
| size_t minmtu = (inner4 != NULL ? |
| IPTUN_MIN_IPV4_MTU : IPTUN_MIN_IPV6_MTU); |
| |
| if (inner4 != NULL) { |
| ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); |
| /* |
| * Copy the tos from the inner IPv4 header. We mask off ECN |
| * bits (bits 6 and 7) because there is currently no |
| * tunnel-tunnel communication to determine if both sides |
| * support ECN. We opt for the safe choice: don't copy the |
| * ECN bits when doing encapsulation. |
| */ |
| outer4->ipha_type_of_service = |
| inner4->ipha_type_of_service & ~0x03; |
| } else { |
| ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && |
| inner6 != NULL); |
| |
| if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && |
| !iptun_out_process_6to4(iptun, outer4, inner6)) { |
| iptun_drop_pkt(mp, &iptun->iptun_oerrors); |
| return (NULL); |
| } |
| } |
| |
| /* |
| * As described in section 3.2.2 of RFC4213, if the packet payload is |
| * less than or equal to the minimum MTU size, then we need to allow |
| * IPv4 to fragment the packet. The reason is that even if we end up |
| * receiving an ICMP frag-needed, the interface above this tunnel |
| * won't be
|