| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| |
| /* |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| /* |
| * Functions to implement IP address -> link layer address (PSARC 2006/482) |
| */ |
| #include <inet/ip2mac.h> |
| #include <inet/ip2mac_impl.h> |
| #include <sys/zone.h> |
| #include <inet/ip_ndp.h> |
| #include <inet/ip_if.h> |
| #include <inet/ip6.h> |
| |
| /* |
| * dispatch pending callbacks. |
| */ |
| void |
| ncec_cb_dispatch(ncec_t *ncec) |
| { |
| ncec_cb_t *ncec_cb; |
| ip2mac_t ip2m; |
| |
| mutex_enter(&ncec->ncec_lock); |
| if (list_is_empty(&ncec->ncec_cb)) { |
| mutex_exit(&ncec->ncec_lock); |
| return; |
| } |
| ncec_ip2mac_response(&ip2m, ncec); |
| ncec_cb_refhold_locked(ncec); |
| /* |
| * IP does not hold internal locks like nce_lock across calls to |
| * other subsystems for fear of recursive lock entry and lock |
| * hierarchy violation. The caller may be holding locks across |
| * the call to IP. (It would be ideal if no subsystem holds locks |
| * across calls into another subsystem, especially if calls can |
| * happen in either direction). |
| */ |
| ncec_cb = list_head(&ncec->ncec_cb); |
| for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { |
| if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) |
| continue; |
| ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED; |
| mutex_exit(&ncec->ncec_lock); |
| (*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg); |
| mutex_enter(&ncec->ncec_lock); |
| } |
| ncec_cb_refrele(ncec); |
| mutex_exit(&ncec->ncec_lock); |
| } |
| |
| /* |
| * fill up the ip2m response fields with inforamation from the nce. |
| */ |
| void |
| ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec) |
| { |
| boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); |
| sin_t *sin; |
| sin6_t *sin6; |
| struct sockaddr_dl *sdl; |
| |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| bzero(ip2m, sizeof (*ip2m)); |
| if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec)) |
| ip2m->ip2mac_err = 0; |
| else |
| ip2m->ip2mac_err = ESRCH; |
| if (isv6) { |
| sin6 = (sin6_t *)&ip2m->ip2mac_pa; |
| sin6->sin6_family = AF_INET6; |
| sin6->sin6_addr = ncec->ncec_addr; |
| } else { |
| sin = (sin_t *)&ip2m->ip2mac_pa; |
| sin->sin_family = AF_INET; |
| IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr); |
| } |
| if (ip2m->ip2mac_err == 0) { |
| sdl = &ip2m->ip2mac_ha; |
| sdl->sdl_family = AF_LINK; |
| sdl->sdl_type = ncec->ncec_ill->ill_type; |
| /* |
| * should we put ncec_ill->ill_name in there? why? |
| * likewise for the sdl_index |
| */ |
| sdl->sdl_nlen = 0; |
| sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length; |
| if (ncec->ncec_lladdr != NULL) |
| bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen); |
| } |
| } |
| |
| void |
| ncec_cb_refhold_locked(ncec_t *ncec) |
| { |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| ncec->ncec_cb_walker_cnt++; |
| } |
| |
| void |
| ncec_cb_refrele(ncec_t *ncec) |
| { |
| ncec_cb_t *ncec_cb, *ncec_cb_next = NULL; |
| |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| if (--ncec->ncec_cb_walker_cnt == 0) { |
| for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL; |
| ncec_cb = ncec_cb_next) { |
| |
| ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb); |
| if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0) |
| continue; |
| list_remove(&ncec->ncec_cb, ncec_cb); |
| kmem_free(ncec_cb, sizeof (*ncec_cb)); |
| } |
| } |
| } |
| |
| /* |
| * add a callback to the nce, so that the callback can be invoked |
| * after address resolution succeeds/fails. |
| */ |
| static ip2mac_id_t |
| ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg) |
| { |
| ncec_cb_t *nce_cb; |
| ip2mac_id_t ip2mid = NULL; |
| |
| ASSERT(MUTEX_HELD(&ncec->ncec_lock)); |
| if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL) |
| return (ip2mid); |
| nce_cb->ncec_cb_func = cb; |
| nce_cb->ncec_cb_arg = cbarg; |
| /* |
| * We identify the ncec_cb_t during cancellation by the address |
| * of the nce_cb_t itself, and, as a short-cut for eliminating |
| * clear mismatches, only look in the callback list of ncec's |
| * whose address is equal to the nce_cb_id. |
| */ |
| nce_cb->ncec_cb_id = ncec; /* no refs! just an address */ |
| list_insert_tail(&ncec->ncec_cb, nce_cb); |
| ip2mid = ncec; /* this is the id to be used in ip2mac_cancel */ |
| |
| return (nce_cb); |
| } |
| |
| /* |
| * Resolve an IP address to a link-layer address using the data-structures |
| * defined in PSARC 2006/482. If the current link-layer address for the |
| * IP address is not known, the state-machine for resolving the resolution |
| * will be triggered, and the callback function (*cb) will be invoked after |
| * the resolution completes. |
| */ |
| ip2mac_id_t |
| ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg, |
| zoneid_t zoneid) |
| { |
| ncec_t *ncec; |
| nce_t *nce = NULL; |
| boolean_t isv6; |
| ill_t *ill; |
| netstack_t *ns; |
| ip_stack_t *ipst; |
| ip2mac_id_t ip2mid = NULL; |
| sin_t *sin; |
| sin6_t *sin6; |
| int err; |
| uint64_t delta; |
| boolean_t need_resolve = B_FALSE; |
| |
| isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6); |
| |
| ns = netstack_find_by_zoneid(zoneid); |
| if (ns == NULL) { |
| ip2m->ip2mac_err = EINVAL; |
| return (NULL); |
| } |
| /* |
| * For exclusive stacks we reset the zoneid to zero |
| * since IP uses the global zoneid in the exclusive stacks. |
| */ |
| if (ns->netstack_stackid != GLOBAL_NETSTACKID) |
| zoneid = GLOBAL_ZONEID; |
| ipst = ns->netstack_ip; |
| /* |
| * find the ill from the ip2m->ip2mac_ifindex |
| */ |
| ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst); |
| if (ill == NULL) { |
| ip2m->ip2mac_err = ENXIO; |
| netstack_rele(ns); |
| return (NULL); |
| } |
| if (isv6) { |
| sin6 = (sin6_t *)&ip2m->ip2mac_pa; |
| if (op == IP2MAC_LOOKUP) { |
| nce = nce_lookup_v6(ill, &sin6->sin6_addr); |
| } else { |
| err = nce_lookup_then_add_v6(ill, NULL, |
| ill->ill_phys_addr_length, |
| &sin6->sin6_addr, 0, ND_UNCHANGED, &nce); |
| } |
| } else { |
| sin = (sin_t *)&ip2m->ip2mac_pa; |
| if (op == IP2MAC_LOOKUP) { |
| nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr); |
| } else { |
| err = nce_lookup_then_add_v4(ill, NULL, |
| ill->ill_phys_addr_length, |
| &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce); |
| } |
| } |
| if (op == IP2MAC_LOOKUP) { |
| if (nce == NULL) { |
| ip2m->ip2mac_err = ESRCH; |
| goto done; |
| } |
| ncec = nce->nce_common; |
| delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; |
| mutex_enter(&ncec->ncec_lock); |
| if (NCE_ISREACHABLE(ncec) && |
| delta < (uint64_t)ill->ill_reachable_time) { |
| ncec_ip2mac_response(ip2m, ncec); |
| ip2m->ip2mac_err = 0; |
| } else { |
| ip2m->ip2mac_err = ESRCH; |
| } |
| mutex_exit(&ncec->ncec_lock); |
| goto done; |
| } else { |
| if (err != 0 && err != EEXIST) { |
| ip2m->ip2mac_err = err; |
| goto done; |
| } |
| } |
| ncec = nce->nce_common; |
| delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; |
| mutex_enter(&ncec->ncec_lock); |
| if (NCE_ISCONDEMNED(ncec)) { |
| ip2m->ip2mac_err = ESRCH; |
| } else { |
| if (NCE_ISREACHABLE(ncec)) { |
| if (NCE_MYADDR(ncec) || |
| delta < (uint64_t)ill->ill_reachable_time) { |
| ncec_ip2mac_response(ip2m, ncec); |
| ip2m->ip2mac_err = 0; |
| mutex_exit(&ncec->ncec_lock); |
| goto done; |
| } |
| /* |
| * Since we do not control the packet output |
| * path for ip2mac() callers, we need to verify |
| * if the existing information in the nce is |
| * very old, and retrigger resolution if necessary. |
| * We will not return the existing stale |
| * information until it is verified through a |
| * resolver request/response exchange. |
| * |
| * In the future, we may want to support extensions |
| * that do additional callbacks on link-layer updates, |
| * so that we can return the stale information but |
| * also update the caller if the lladdr changes. |
| */ |
| ncec->ncec_rcnt = ill->ill_xmit_count; |
| ncec->ncec_state = ND_PROBE; |
| need_resolve = B_TRUE; /* reachable but very old nce */ |
| } else if (ncec->ncec_state == ND_INITIAL) { |
| need_resolve = B_TRUE; /* ND_INITIAL nce */ |
| ncec->ncec_state = ND_INCOMPLETE; |
| } |
| /* |
| * NCE not known to be reachable in the recent past. We must |
| * reconfirm the information before returning it to the caller |
| */ |
| if (ncec->ncec_rcnt > 0) { |
| /* |
| * Still resolving this ncec, so we can queue the |
| * callback information in ncec->ncec_cb |
| */ |
| ip2mid = ncec_add_cb(ncec, cb, cbarg); |
| ip2m->ip2mac_err = EINPROGRESS; |
| } else { |
| /* |
| * No more retransmits allowed -- resolution failed. |
| */ |
| ip2m->ip2mac_err = ESRCH; |
| } |
| } |
| mutex_exit(&ncec->ncec_lock); |
| done: |
| /* |
| * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL, |
| * trigger resolve. |
| */ |
| if (need_resolve) |
| ip_ndp_resolve(ncec); |
| if (nce != NULL) |
| nce_refrele(nce); |
| netstack_rele(ns); |
| ill_refrele(ill); |
| return (ip2mid); |
| } |
| |
| /* |
| * data passed to ncec_walk for canceling outstanding callbacks. |
| */ |
| typedef struct ip2mac_cancel_data_s { |
| ip2mac_id_t ip2m_cancel_id; |
| int ip2m_cancel_err; |
| } ip2mac_cancel_data_t; |
| |
| /* |
| * callback invoked for each active ncec. If the ip2mac_id_t corresponds |
| * to an active nce_cb_t in the ncec's callback list, we want to remove |
| * the callback (if there are no walkers) or return EBUSY to the caller |
| */ |
| static void |
| ip2mac_cancel_callback(ncec_t *ncec, void *arg) |
| { |
| ip2mac_cancel_data_t *ip2m_wdata = arg; |
| ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id; |
| ncec_cb_t *ncec_cb; |
| |
| if (ip2m_nce_cb->ncec_cb_id != ncec) |
| return; |
| |
| mutex_enter(&ncec->ncec_lock); |
| if (list_is_empty(&ncec->ncec_cb)) { |
| mutex_exit(&ncec->ncec_lock); |
| return; |
| } |
| /* |
| * IP does not hold internal locks like nce_lock across calls to |
| * other subsystems for fear of recursive lock entry and lock |
| * hierarchy violation. The caller may be holding locks across |
| * the call to IP. (It would be ideal if no subsystem holds locks |
| * across calls into another subsystem, especially if calls can |
| * happen in either direction). |
| */ |
| ncec_cb = list_head(&ncec->ncec_cb); |
| for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { |
| if (ncec_cb != ip2m_nce_cb) |
| continue; |
| /* |
| * If there are no walkers we can remove the nce_cb. |
| * Otherwise the exiting walker will clean up. |
| */ |
| if (ncec->ncec_cb_walker_cnt == 0) { |
| list_remove(&ncec->ncec_cb, ncec_cb); |
| } else { |
| ip2m_wdata->ip2m_cancel_err = EBUSY; |
| } |
| break; |
| } |
| mutex_exit(&ncec->ncec_lock); |
| } |
| |
| /* |
| * cancel an outstanding timeout set up via ip2mac |
| */ |
| int |
| ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid) |
| { |
| netstack_t *ns; |
| ip_stack_t *ipst; |
| ip2mac_cancel_data_t ip2m_wdata; |
| |
| ns = netstack_find_by_zoneid(zoneid); |
| if (ns == NULL) { |
| ip2m_wdata.ip2m_cancel_err = EINVAL; |
| return (ip2m_wdata.ip2m_cancel_err); |
| } |
| /* |
| * For exclusive stacks we reset the zoneid to zero |
| * since IP uses the global zoneid in the exclusive stacks. |
| */ |
| if (ns->netstack_stackid != GLOBAL_NETSTACKID) |
| zoneid = GLOBAL_ZONEID; |
| ipst = ns->netstack_ip; |
| |
| ip2m_wdata.ip2m_cancel_id = ip2mid; |
| ip2m_wdata.ip2m_cancel_err = 0; |
| ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst); |
| /* |
| * We may return EBUSY if a walk to dispatch callbacks is |
| * in progress, in which case the caller needs to synchronize |
| * with the registered callback function to make sure the |
| * module does not exit when there is a callback pending. |
| */ |
| netstack_rele(ns); |
| return (ip2m_wdata.ip2m_cancel_err); |
| } |