| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| /* Copyright (c) 1990 Mentat Inc. */ |
| |
| /* AR - Address Resolution Protocol */ |
| |
| #include <sys/types.h> |
| #include <sys/stream.h> |
| #include <sys/stropts.h> |
| #include <sys/errno.h> |
| #include <sys/strlog.h> |
| #include <sys/dlpi.h> |
| #include <sys/sockio.h> |
| #define _SUN_TPI_VERSION 2 |
| #include <sys/tihdr.h> |
| #include <sys/socket.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/cmn_err.h> |
| #include <sys/sdt.h> |
| #include <sys/vtrace.h> |
| #include <sys/strsun.h> |
| #include <sys/policy.h> |
| #include <sys/zone.h> |
| #include <sys/ethernet.h> |
| #include <sys/zone.h> |
| #include <sys/random.h> |
| #include <sys/sdt.h> |
| #include <sys/hook_event.h> |
| |
| #include <inet/common.h> |
| #include <inet/optcom.h> |
| #include <inet/mi.h> |
| #include <inet/nd.h> |
| #include <inet/snmpcom.h> |
| #include <net/if.h> |
| #include <inet/arp.h> |
| #include <netinet/ip6.h> |
| #include <netinet/arp.h> |
| #include <inet/ip.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ip_ndp.h> |
| #include <inet/mib2.h> |
| #include <inet/arp_impl.h> |
| |
| /* |
| * ARP entry life time and design notes |
| * ------------------------------------ |
| * |
| * ARP entries (ACEs) must last at least as long as IP knows about a given |
| * MAC-IP translation (i.e., as long as the IRE cache entry exists). It's ok |
| * if the ARP entry lasts longer, but not ok if it is removed before the IP |
| * entry. The reason for this is that if ARP doesn't have an entry, we will be |
| * unable to detect the difference between an ARP broadcast that represents no |
| * change (same, known address of sender) and one that represents a change (new |
| * address for existing entry). In the former case, we must not notify IP, or |
| * we can suffer hurricane attack. In the latter case, we must notify IP, or |
| * IP will drift out of sync with the network. |
| * |
| * Note that IP controls the lifetime of entries, not ARP. |
| * |
| * We don't attempt to reconfirm aging entries. If the system is no longer |
| * talking to a given peer, then it doesn't matter if we have the right mapping |
| * for that peer. It would be possible to send queries on aging entries that |
| * are active, but this isn't done. |
| * |
| * IPMP Notes |
| * ---------- |
| * |
| * ARP is aware of IPMP. In particular, IP notifies ARP about all "active" |
| * (able to transmit data packets) interfaces in a given group via |
| * AR_IPMP_ACTIVATE and AR_IPMP_DEACTIVATE messages. These messages, combined |
| * with the "IPMP arl_t" that ARP creates over the IPMP DLPI stub driver, |
| * enable ARP to track all the arl_t's that are in the same group and thus |
| * ensure that ACEs are shared across each group and the arl_t that ARP |
| * chooses to transmit on for a given ACE is optimal. |
| * |
| * ARP relies on IP for hardware address updates. In particular, if the |
| * hardware address of an interface changes (DL_NOTE_PHYS_ADDR), then IP will |
| * bring the interface down and back up -- and as part of bringing it back |
| * up, will send messages to ARP that allow it to update the affected arl's |
| * with new hardware addresses. |
| * |
| * N.B.: One side-effect of this approach is that when an interface fails and |
| * then starts to repair, it will temporarily populate the ARP cache with |
| * addresses that are owned by it rather than the group's arl_t. To address |
| * this, we could add more messages (e.g., AR_IPMP_JOIN and AR_IPMP_LEAVE), |
| * but as the issue appears to be only cosmetic (redundant entries in the ARP |
| * cache during interace repair), we've kept things simple for now. |
| */ |
| |
| /* |
| * This is used when scanning for "old" (least recently broadcast) ACEs. We |
| * don't want to have to walk the list for every single one, so we gather up |
| * batches at a time. |
| */ |
| #define ACE_RESCHED_LIST_LEN 8 |
| |
| typedef struct { |
| arl_t *art_arl; |
| uint_t art_naces; |
| ace_t *art_aces[ACE_RESCHED_LIST_LEN]; |
| } ace_resched_t; |
| |
| #define ACE_RESOLVED(ace) ((ace)->ace_flags & ACE_F_RESOLVED) |
| #define ACE_NONPERM(ace) \ |
| (((ace)->ace_flags & (ACE_F_RESOLVED | ACE_F_PERMANENT)) == \ |
| ACE_F_RESOLVED) |
| |
| #define AR_DEF_XMIT_INTERVAL 500 /* time in milliseconds */ |
| #define AR_LL_HDR_SLACK 32 /* Leave the lower layer some room */ |
| |
| #define AR_SNMP_MSG T_OPTMGMT_ACK |
| #define AR_DRAINING (void *)0x11 |
| |
| /* |
| * The IPv4 Link Local address space is special; we do extra duplicate checking |
| * there, as the entire assignment mechanism rests on random numbers. |
| */ |
| #define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \ |
| ((uchar_t *)ptr)[1] == 254) |
| |
| /* |
| * Check if the command needs to be enqueued by seeing if there are other |
| * commands ahead of us or if some DLPI response is being awaited. Usually |
| * there would be an enqueued command in the latter case, however if the |
| * stream that originated the command has closed, the close would have |
| * cleaned up the enqueued command. AR_DRAINING signifies that the command |
| * at the head of the arl_queue has been internally dequeued on completion |
| * of the previous command and is being called from ar_dlpi_done |
| */ |
| #define CMD_NEEDS_QUEUEING(mp, arl) \ |
| (mp->b_prev != AR_DRAINING && (arl->arl_queue != NULL || \ |
| arl->arl_dlpi_pending != DL_PRIM_INVAL)) |
| |
| #define ARH_FIXED_LEN 8 |
| |
| /* |
| * Macro used when creating ACEs to determine the arl that should own it. |
| */ |
| #define OWNING_ARL(arl) \ |
| ((arl)->arl_ipmp_arl != NULL ? (arl)->arl_ipmp_arl : arl) |
| |
| /* |
| * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK |
| * doesn't quite do it for us. |
| */ |
| typedef struct ar_m_s { |
| t_uscalar_t ar_mac_type; |
| uint32_t ar_mac_arp_hw_type; |
| t_scalar_t ar_mac_sap_length; |
| uint32_t ar_mac_hw_addr_length; |
| } ar_m_t; |
| |
| typedef struct msg2_args { |
| mblk_t *m2a_mpdata; |
| mblk_t *m2a_mptail; |
| } msg2_args_t; |
| |
| static mblk_t *ar_alloc(uint32_t cmd, int); |
| static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr, |
| uint32_t hw_addr_len, uchar_t *proto_addr, |
| uint32_t proto_addr_len, uchar_t *proto_mask, |
| uchar_t *proto_extract_mask, uint32_t hw_extract_start, |
| uchar_t *sender_addr, uint32_t flags); |
| static void ar_ce_delete(ace_t *ace); |
| static void ar_ce_delete_per_arl(ace_t *ace, void *arg); |
| static ace_t **ar_ce_hash(arp_stack_t *as, uint32_t proto, |
| const uchar_t *proto_addr, uint32_t proto_addr_length); |
| static ace_t *ar_ce_lookup(arl_t *arl, uint32_t proto, |
| const uchar_t *proto_addr, uint32_t proto_addr_length); |
| static ace_t *ar_ce_lookup_entry(arl_t *arl, uint32_t proto, |
| const uchar_t *proto_addr, uint32_t proto_addr_length); |
| static ace_t *ar_ce_lookup_from_area(arp_stack_t *as, mblk_t *mp, |
| ace_t *matchfn()); |
| static ace_t *ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, |
| const uchar_t *proto_addr, uint32_t proto_addr_length); |
| static ace_t *ar_ce_lookup_permanent(arp_stack_t *as, uint32_t proto, |
| uchar_t *proto_addr, uint32_t proto_addr_length); |
| static boolean_t ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, |
| uint32_t hw_addr_length); |
| static void ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *), |
| void *arg1); |
| |
| static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code); |
| static int ar_close(queue_t *q); |
| static int ar_cmd_dispatch(queue_t *q, mblk_t *mp, boolean_t from_wput); |
| static void ar_cmd_done(arl_t *arl); |
| static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size); |
| static void ar_dlpi_send(arl_t *, mblk_t *); |
| static void ar_dlpi_done(arl_t *, t_uscalar_t); |
| static int ar_entry_add(queue_t *q, mblk_t *mp); |
| static int ar_entry_delete(queue_t *q, mblk_t *mp); |
| static int ar_entry_query(queue_t *q, mblk_t *mp); |
| static int ar_entry_squery(queue_t *q, mblk_t *mp); |
| static int ar_interface_up(queue_t *q, mblk_t *mp); |
| static int ar_interface_down(queue_t *q, mblk_t *mp); |
| static int ar_interface_on(queue_t *q, mblk_t *mp); |
| static int ar_interface_off(queue_t *q, mblk_t *mp); |
| static int ar_ipmp_activate(queue_t *q, mblk_t *mp); |
| static int ar_ipmp_deactivate(queue_t *q, mblk_t *mp); |
| static void ar_ll_cleanup_arl_queue(queue_t *q); |
| static void ar_ll_down(arl_t *arl); |
| static arl_t *ar_ll_lookup_by_name(arp_stack_t *as, const char *name); |
| static arl_t *ar_ll_lookup_from_mp(arp_stack_t *as, mblk_t *mp); |
| static void ar_ll_init(arp_stack_t *, ar_t *, mblk_t *mp); |
| static void ar_ll_set_defaults(arl_t *, mblk_t *mp); |
| static void ar_ll_clear_defaults(arl_t *); |
| static int ar_ll_up(arl_t *arl); |
| static int ar_mapping_add(queue_t *q, mblk_t *mp); |
| static boolean_t ar_mask_all_ones(uchar_t *mask, uint32_t mask_len); |
| static ar_m_t *ar_m_lookup(t_uscalar_t mac_type); |
| static int ar_nd_ioctl(queue_t *q, mblk_t *mp); |
| static int ar_open(queue_t *q, dev_t *devp, int flag, int sflag, |
| cred_t *credp); |
| static int ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); |
| static boolean_t ar_param_register(IDP *ndp, arpparam_t *arppa, int cnt); |
| static int ar_param_set(queue_t *q, mblk_t *mp, char *value, |
| caddr_t cp, cred_t *cr); |
| static void ar_query_delete(ace_t *ace, void *ar); |
| static void ar_query_reply(ace_t *ace, int ret_val, |
| uchar_t *proto_addr, uint32_t proto_addr_len); |
| static clock_t ar_query_xmit(arp_stack_t *as, ace_t *ace); |
| static void ar_rput(queue_t *q, mblk_t *mp_orig); |
| static void ar_rput_dlpi(queue_t *q, mblk_t *mp); |
| static void ar_set_address(ace_t *ace, uchar_t *addrpos, |
| uchar_t *proto_addr, uint32_t proto_addr_len); |
| static int ar_slifname(queue_t *q, mblk_t *mp); |
| static int ar_set_ppa(queue_t *q, mblk_t *mp); |
| static int ar_snmp_msg(queue_t *q, mblk_t *mp_orig); |
| static void ar_snmp_msg2(ace_t *, void *); |
| static void ar_wput(queue_t *q, mblk_t *mp); |
| static void ar_wsrv(queue_t *q); |
| static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, |
| uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1, |
| const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr, |
| arp_stack_t *as); |
| static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, |
| ushort_t cmd, boolean_t); |
| static mblk_t *ar_cmd_dequeue(arl_t *arl); |
| |
| static void *arp_stack_init(netstackid_t stackid, netstack_t *ns); |
| static void arp_stack_fini(netstackid_t stackid, void *arg); |
| static void arp_stack_shutdown(netstackid_t stackid, void *arg); |
| /* |
| * All of these are alterable, within the min/max values given, |
| * at run time. arp_publish_interval and arp_publish_count are |
| * set by default to 2 seconds and 5 respectively. This is |
| * useful during FAILOVER/FAILBACK to make sure that the ARP |
| * packets are not lost. Assumed that it does not affect the |
| * normal operations. |
| */ |
| static arpparam_t arp_param_arr[] = { |
| /* min max value name */ |
| { 30000, 3600000, 300000, "arp_cleanup_interval"}, |
| { 1000, 20000, 2000, "arp_publish_interval"}, |
| { 1, 20, 5, "arp_publish_count"}, |
| { 0, 20000, 1000, "arp_probe_delay"}, |
| { 10, 20000, 1500, "arp_probe_interval"}, |
| { 0, 20, 3, "arp_probe_count"}, |
| { 0, 20000, 100, "arp_fastprobe_delay"}, |
| { 10, 20000, 150, "arp_fastprobe_interval"}, |
| { 0, 20, 3, "arp_fastprobe_count"}, |
| { 0, 3600000, 300000, "arp_defend_interval"}, |
| { 0, 20000, 100, "arp_defend_rate"}, |
| { 0, 3600000, 15000, "arp_broadcast_interval"}, |
| { 5, 86400, 3600, "arp_defend_period"} |
| }; |
| #define as_cleanup_interval as_param_arr[0].arp_param_value |
| #define as_publish_interval as_param_arr[1].arp_param_value |
| #define as_publish_count as_param_arr[2].arp_param_value |
| #define as_probe_delay as_param_arr[3].arp_param_value |
| #define as_probe_interval as_param_arr[4].arp_param_value |
| #define as_probe_count as_param_arr[5].arp_param_value |
| #define as_fastprobe_delay as_param_arr[6].arp_param_value |
| #define as_fastprobe_interval as_param_arr[7].arp_param_value |
| #define as_fastprobe_count as_param_arr[8].arp_param_value |
| #define as_defend_interval as_param_arr[9].arp_param_value |
| #define as_defend_rate as_param_arr[10].arp_param_value |
| #define as_broadcast_interval as_param_arr[11].arp_param_value |
| #define as_defend_period as_param_arr[12].arp_param_value |
| |
| static struct module_info arp_mod_info = { |
| 0, "arp", 0, INFPSZ, 512, 128 |
| }; |
| |
| static struct qinit arprinit = { |
| (pfi_t)ar_rput, NULL, ar_open, ar_close, NULL, &arp_mod_info |
| }; |
| |
| static struct qinit arpwinit = { |
| (pfi_t)ar_wput, (pfi_t)ar_wsrv, ar_open, ar_close, NULL, &arp_mod_info |
| }; |
| |
| struct streamtab arpinfo = { |
| &arprinit, &arpwinit |
| }; |
| |
| /* |
| * TODO: we need a better mechanism to set the ARP hardware type since |
| * the DLPI mac type does not include enough predefined values. |
| */ |
| static ar_m_t ar_m_tbl[] = { |
| { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ |
| { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ |
| { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ |
| { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ |
| { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ |
| { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ |
| { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ |
| { DL_OTHER, ARPHRD_ETHER, -2, 6}, /* unknown */ |
| }; |
| |
| /* |
| * Note that all routines which need to queue the message for later |
| * processing have to be ioctl_aware to be able to queue the complete message. |
| * Following are command entry flags in arct_flags |
| */ |
| #define ARF_IOCTL_AWARE 0x1 /* Arp command can come down as M_IOCTL */ |
| #define ARF_ONLY_CMD 0x2 /* Command is exclusive to ARP */ |
| #define ARF_WPUT_OK 0x4 /* Command is allowed from ar_wput */ |
| |
| /* ARP Cmd Table entry */ |
| typedef struct arct_s { |
| int (*arct_pfi)(queue_t *, mblk_t *); |
| uint32_t arct_cmd; |
| int arct_min_len; |
| uint32_t arct_flags; |
| int arct_priv_req; /* Privilege required for this cmd */ |
| const char *arct_txt; |
| } arct_t; |
| |
| /* |
| * AR_ENTRY_ADD, QUERY and SQUERY are used by sdp, hence they need to |
| * have ARF_WPUT_OK set. |
| */ |
| static arct_t ar_cmd_tbl[] = { |
| { ar_entry_add, AR_ENTRY_ADD, sizeof (area_t), |
| ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_CONFIG, |
| "AR_ENTRY_ADD" }, |
| { ar_entry_delete, AR_ENTRY_DELETE, sizeof (ared_t), |
| ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_DELETE" }, |
| { ar_entry_query, AR_ENTRY_QUERY, sizeof (areq_t), |
| ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_NP, |
| "AR_ENTRY_QUERY" }, |
| { ar_entry_squery, AR_ENTRY_SQUERY, sizeof (area_t), |
| ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_NP, |
| "AR_ENTRY_SQUERY" }, |
| { ar_mapping_add, AR_MAPPING_ADD, sizeof (arma_t), |
| ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_MAPPING_ADD" }, |
| { ar_interface_up, AR_INTERFACE_UP, sizeof (arc_t), |
| ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_UP" }, |
| { ar_interface_down, AR_INTERFACE_DOWN, sizeof (arc_t), |
| ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_DOWN" }, |
| { ar_interface_on, AR_INTERFACE_ON, sizeof (arc_t), |
| ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_ON" }, |
| { ar_interface_off, AR_INTERFACE_OFF, sizeof (arc_t), |
| ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_OFF" }, |
| { ar_ipmp_activate, AR_IPMP_ACTIVATE, sizeof (arie_t), |
| ARF_ONLY_CMD, OP_CONFIG, "AR_IPMP_ACTIVATE" }, |
| { ar_ipmp_deactivate, AR_IPMP_DEACTIVATE, sizeof (arie_t), |
| ARF_ONLY_CMD, OP_CONFIG, "AR_IPMP_DEACTIVATE" }, |
| { ar_set_ppa, (uint32_t)IF_UNITSEL, sizeof (int), |
| ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "IF_UNITSEL" }, |
| { ar_nd_ioctl, ND_GET, 1, |
| ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_NP, "ND_GET" }, |
| { ar_nd_ioctl, ND_SET, 1, |
| ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "ND_SET" }, |
| { ar_snmp_msg, AR_SNMP_MSG, sizeof (struct T_optmgmt_ack), |
| ARF_IOCTL_AWARE | ARF_WPUT_OK | ARF_ONLY_CMD, OP_NP, |
| "AR_SNMP_MSG" }, |
| { ar_slifname, (uint32_t)SIOCSLIFNAME, sizeof (struct lifreq), |
| ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "SIOCSLIFNAME" } |
| }; |
| |
| /* |
| * Lookup and return an arl appropriate for sending packets with either source |
| * hardware address `hw_addr' or source protocol address `ip_addr', in that |
| * order. If neither was specified or neither match, return any arl in the |
| * same group as `arl'. |
| */ |
| static arl_t * |
| ar_ipmp_lookup_xmit_arl(arl_t *arl, uchar_t *hw_addr, uint_t hw_addrlen, |
| uchar_t *ip_addr) |
| { |
| arlphy_t *ap; |
| ace_t *src_ace; |
| arl_t *xmit_arl = NULL; |
| arp_stack_t *as = ARL_TO_ARPSTACK(arl); |
| |
| ASSERT(arl->arl_flags & ARL_F_IPMP); |
| |
| if (hw_addr != NULL && hw_addrlen != 0) { |
| xmit_arl = as->as_arl_head; |
| for (; xmit_arl != NULL; xmit_arl = xmit_arl->arl_next) { |
| /* |
| * There may be arls with the same HW address that are |
| * not in our IPMP group; we don't want those. |
| */ |
| if (xmit_arl->arl_ipmp_arl != arl) |
| continue; |
| |
| ap = xmit_arl->arl_phy; |
| if (ap != NULL && ap->ap_hw_addrlen == hw_addrlen && |
| bcmp(ap->ap_hw_addr, hw_addr, hw_addrlen) == 0) |
| break; |
| } |
| |
| DTRACE_PROBE4(xmit_arl_hwsrc, arl_t *, arl, arl_t *, |
| xmit_arl, uchar_t *, hw_addr, uint_t, hw_addrlen); |
| } |
| |
| if (xmit_arl == NULL && ip_addr != NULL) { |
| src_ace = ar_ce_lookup_permanent(as, IP_ARP_PROTO_TYPE, ip_addr, |
| IP_ADDR_LEN); |
| if (src_ace != NULL) |
| xmit_arl = src_ace->ace_xmit_arl; |
| |
| DTRACE_PROBE4(xmit_arl_ipsrc, arl_t *, arl, arl_t *, |
| xmit_arl, uchar_t *, ip_addr, uint_t, IP_ADDR_LEN); |
| } |
| |
| if (xmit_arl == NULL) { |
| xmit_arl = as->as_arl_head; |
| for (; xmit_arl != NULL; xmit_arl = xmit_arl->arl_next) |
| if (xmit_arl->arl_ipmp_arl == arl && xmit_arl != arl) |
| break; |
| |
| DTRACE_PROBE2(xmit_arl_any, arl_t *, arl, arl_t *, xmit_arl); |
| } |
| |
| return (xmit_arl); |
| } |
| |
| /* |
| * ARP Cache Entry creation routine. |
| * Cache entries are allocated within timer messages and inserted into |
| * the global hash list based on protocol and protocol address. |
| */ |
| static int |
| ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, |
| uchar_t *proto_addr, uint_t proto_addr_len, uchar_t *proto_mask, |
| uchar_t *proto_extract_mask, uint_t hw_extract_start, uchar_t *sender_addr, |
| uint_t flags) |
| { |
| static ace_t ace_null; |
| ace_t *ace; |
| ace_t **acep; |
| uchar_t *dst; |
| mblk_t *mp; |
| arp_stack_t *as = ARL_TO_ARPSTACK(arl); |
| arl_t *xmit_arl; |
| arlphy_t *ap; |
| |
| if ((flags & ~ACE_EXTERNAL_FLAGS_MASK) || arl == NULL) |
| return (EINVAL); |
| |
| if (proto_addr == NULL || proto_addr_len == 0 || |
| (proto == IP_ARP_PROTO_TYPE && proto_addr_len != IP_ADDR_LEN)) |
| return (EINVAL); |
| |
| if (flags & ACE_F_MYADDR) |
| flags |= ACE_F_PUBLISH | ACE_F_AUTHORITY; |
| |
| /* |
| * Latch a transmit arl for this ace. |
| */ |
| if (arl->arl_flags & ARL_F_IPMP) { |
| ASSERT(proto == IP_ARP_PROTO_TYPE); |
| xmit_arl = ar_ipmp_lookup_xmit_arl(arl, hw_addr, hw_addr_len, |
| sender_addr); |
| } else { |
| xmit_arl = arl; |
| } |
| |
| if (xmit_arl == NULL || xmit_arl->arl_phy == NULL) |
| return (EINVAL); |
| |
| ap = xmit_arl->arl_phy; |
| |
| if (!hw_addr && hw_addr_len == 0) { |
| if (flags == ACE_F_PERMANENT) { /* Not publish */ |
| /* 224.0.0.0 to zero length address */ |
| flags |= ACE_F_RESOLVED; |
| } else { /* local address and unresolved case */ |
| hw_addr = ap->ap_hw_addr; |
| hw_addr_len = ap->ap_hw_addrlen; |
| if (flags & ACE_F_PUBLISH) |
| flags |= ACE_F_RESOLVED; |
| } |
| } else { |
| flags |= ACE_F_RESOLVED; |
| } |
| |
| /* Handle hw_addr_len == 0 for DL_ENABMULTI_REQ etc. */ |
| if (hw_addr_len != 0 && hw_addr == NULL) |
| return (EINVAL); |
| if (hw_addr_len < ap->ap_hw_addrlen && hw_addr_len != 0) |
| return (EINVAL); |
| if (!proto_extract_mask && (flags & ACE_F_MAPPING)) |
| return (EINVAL); |
| |
| /* |
| * If the underlying link doesn't have reliable up/down notification or |
| * if we're working with the IPv4 169.254.0.0/16 Link Local Address |
| * space, then don't use the fast timers. Otherwise, use them. |
| */ |
| if (ap->ap_notifies && |
| !(proto == IP_ARP_PROTO_TYPE && IS_IPV4_LL_SPACE(proto_addr))) { |
| flags |= ACE_F_FAST; |
| } |
| |
| /* |
| * Allocate the timer block to hold the ace. |
| * (ace + proto_addr + proto_addr_mask + proto_extract_mask + hw_addr) |
| */ |
| mp = mi_timer_alloc(sizeof (ace_t) + proto_addr_len + proto_addr_len + |
| proto_addr_len + hw_addr_len); |
| if (!mp) |
| return (ENOMEM); |
| ace = (ace_t *)mp->b_rptr; |
| *ace = ace_null; |
| ace->ace_proto = proto; |
| ace->ace_mp = mp; |
| ace->ace_arl = arl; |
| ace->ace_xmit_arl = xmit_arl; |
| |
| dst = (uchar_t *)&ace[1]; |
| |
| ace->ace_proto_addr = dst; |
| ace->ace_proto_addr_length = proto_addr_len; |
| bcopy(proto_addr, dst, proto_addr_len); |
| dst += proto_addr_len; |
| /* |
| * The proto_mask allows us to add entries which will let us respond |
| * to requests for a group of addresses. This makes it easy to provide |
| * proxy ARP service for machines that don't understand about the local |
| * subnet structure, if, for example, there are BSD4.2 systems lurking. |
| */ |
| ace->ace_proto_mask = dst; |
| if (proto_mask != NULL) { |
| bcopy(proto_mask, dst, proto_addr_len); |
| dst += proto_addr_len; |
| } else { |
| while (proto_addr_len-- > 0) |
| *dst++ = (uchar_t)~0; |
| } |
| |
| if (proto_extract_mask != NULL) { |
| ace->ace_proto_extract_mask = dst; |
| bcopy(proto_extract_mask, dst, ace->ace_proto_addr_length); |
| dst += ace->ace_proto_addr_length; |
| } else { |
| ace->ace_proto_extract_mask = NULL; |
| } |
| ace->ace_hw_extract_start = hw_extract_start; |
| ace->ace_hw_addr_length = hw_addr_len; |
| ace->ace_hw_addr = dst; |
| if (hw_addr != NULL) { |
| bcopy(hw_addr, dst, hw_addr_len); |
| dst += hw_addr_len; |
| } |
| |
| ace->ace_flags = flags; |
| if (ar_mask_all_ones(ace->ace_proto_mask, |
| ace->ace_proto_addr_length)) { |
| acep = ar_ce_hash(as, ace->ace_proto, ace->ace_proto_addr, |
| ace->ace_proto_addr_length); |
| } else { |
| acep = &as->as_ce_mask_entries; |
| } |
| if ((ace->ace_next = *acep) != NULL) |
| ace->ace_next->ace_ptpn = &ace->ace_next; |
| *acep = ace; |
| ace->ace_ptpn = acep; |
| return (0); |
| } |
| |
| /* Delete a cache entry. */ |
| static void |
| ar_ce_delete(ace_t *ace) |
| { |
| ace_t **acep; |
| |
| /* Get out of the hash list. */ |
| acep = ace->ace_ptpn; |
| if (ace->ace_next) |
| ace->ace_next->ace_ptpn = acep; |
| acep[0] = ace->ace_next; |
| /* Mark it dying in case we have a timer about to fire. */ |
| ace->ace_flags |= ACE_F_DYING; |
| /* Complete any outstanding queries immediately. */ |
| ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); |
| /* Free the timer, immediately, or when it fires. */ |
| mi_timer_free(ace->ace_mp); |
| } |
| |
| /* |
| * ar_ce_walk routine. Delete the ace if it is associated with the arl |
| * that is going away. |
| */ |
| static void |
| ar_ce_delete_per_arl(ace_t *ace, void *arl) |
| { |
| if (ace->ace_arl == arl || ace->ace_xmit_arl == arl) { |
| ace->ace_flags &= ~ACE_F_PERMANENT; |
| ar_ce_delete(ace); |
| } |
| } |
| |
| /* |
| * ar_ce_walk routine used when deactivating an `arl' in a group. Deletes |
| * `ace' if it was using `arl_arg' as its output interface. |
| */ |
| static void |
| ar_ce_ipmp_deactivate(ace_t *ace, void *arl_arg) |
| { |
| arl_t *arl = arl_arg; |
| |
| ASSERT(!(arl->arl_flags & ARL_F_IPMP)); |
| |
| if (ace->ace_arl == arl) { |
| ASSERT(ace->ace_xmit_arl == arl); |
| /* |
| * This ACE is tied to the arl leaving the group (e.g., an |
| * ACE_F_PERMANENT for a test address) and is not used by the |
| * group, so we can leave it be. |
| */ |
| return; |
| } |
| |
| if (ace->ace_xmit_arl != arl) |
| return; |
| |
| ASSERT(ace->ace_arl == arl->arl_ipmp_arl); |
| |
| /* |
| * IP should've already sent us messages asking us to move any |
| * ACE_F_MYADDR entries to another arl, but there are two exceptions: |
| * |
| * 1. The group was misconfigured with interfaces that have duplicate |
| * hardware addresses, but in.mpathd was unable to offline those |
| * duplicate interfaces. |
| * |
| * 2. The messages from IP were lost or never created (e.g. due to |
| * memory pressure). |
| * |
| * We handle the first case by just quietly deleting the ACE. Since |
| * the second case cannot be distinguished from a more serious bug in |
| * the IPMP framework, we ASSERT() that this can't happen on DEBUG |
| * systems, but quietly delete the ACE on production systems (the |
| * deleted ACE will render the IP address unreachable). |
| */ |
| if (ace->ace_flags & ACE_F_MYADDR) { |
| arlphy_t *ap = arl->arl_phy; |
| uint_t hw_addrlen = ap->ap_hw_addrlen; |
| |
| ASSERT(hw_addrlen == ace->ace_hw_addr_length && |
| bcmp(ap->ap_hw_addr, ace->ace_hw_addr, hw_addrlen) == 0); |
| } |
| |
| /* |
| * NOTE: it's possible this arl got selected as the ace_xmit_arl when |
| * creating an ACE_F_PERMANENT ACE on behalf of an SIOCS*ARP ioctl for |
| * an IPMP IP interface. But it's still OK for us to delete such an |
| * ACE since ipmp_illgrp_refresh_arpent() will ask us to recreate it |
| * and we'll pick another arl then. |
| */ |
| ar_ce_delete(ace); |
| } |
| |
| /* Cache entry hash routine, based on protocol and protocol address. */ |
| static ace_t ** |
| ar_ce_hash(arp_stack_t *as, uint32_t proto, const uchar_t *proto_addr, |
| uint32_t proto_addr_length) |
| { |
| const uchar_t *up = proto_addr; |
| unsigned int hval = proto; |
| int len = proto_addr_length; |
| |
| while (--len >= 0) |
| hval ^= *up++; |
| return (&as->as_ce_hash_tbl[hval % ARP_HASH_SIZE]); |
| } |
| |
| /* Cache entry lookup. Try to find an ace matching the parameters passed. */ |
| ace_t * |
| ar_ce_lookup(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, |
| uint32_t proto_addr_length) |
| { |
| ace_t *ace; |
| |
| ace = ar_ce_lookup_entry(arl, proto, proto_addr, proto_addr_length); |
| if (!ace) |
| ace = ar_ce_lookup_mapping(arl, proto, proto_addr, |
| proto_addr_length); |
| return (ace); |
| } |
| |
| /* |
| * Cache entry lookup. Try to find an ace matching the parameters passed. |
| * Look only for exact entries (no mappings) |
| */ |
| static ace_t * |
| ar_ce_lookup_entry(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, |
| uint32_t proto_addr_length) |
| { |
| ace_t *ace; |
| arp_stack_t *as = ARL_TO_ARPSTACK(arl); |
| |
| if (!proto_addr) |
| return (NULL); |
| ace = *ar_ce_hash(as, proto, proto_addr, proto_addr_length); |
| for (; ace; ace = ace->ace_next) { |
| if ((ace->ace_arl == arl || |
| ace->ace_arl == arl->arl_ipmp_arl) && |
| ace->ace_proto_addr_length == proto_addr_length && |
| ace->ace_proto == proto) { |
| int i1 = proto_addr_length; |
| uchar_t *ace_addr = ace->ace_proto_addr; |
| uchar_t *mask = ace->ace_proto_mask; |
| /* |
| * Note that the ace_proto_mask is applied to the |
| * proto_addr before comparing to the ace_addr. |
| */ |
| do { |
| if (--i1 < 0) |
| return (ace); |
| } while ((proto_addr[i1] & mask[i1]) == ace_addr[i1]); |
| } |
| } |
| return (ace); |
| } |
| |
| /* |
| * Extract cache entry lookup parameters from an external command message, then |
| * call the supplied match function. |
| */ |
| static ace_t * |
| ar_ce_lookup_from_area(arp_stack_t *as, mblk_t *mp, ace_t *matchfn()) |
| { |
| uchar_t *proto_addr; |
| area_t *area = (area_t *)mp->b_rptr; |
| |
| proto_addr = mi_offset_paramc(mp, area->area_proto_addr_offset, |
| area->area_proto_addr_length); |
| if (!proto_addr) |
| return (NULL); |
| return ((*matchfn)(ar_ll_lookup_from_mp(as, mp), area->area_proto, |
| proto_addr, area->area_proto_addr_length)); |
| } |
| |
| /* |
| * Cache entry lookup. Try to find an ace matching the parameters passed. |
| * Look only for mappings. |
| */ |
| static ace_t * |
| ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, |
| uint32_t proto_addr_length) |
| { |
| ace_t *ace; |
| arp_stack_t *as = ARL_TO_ARPSTACK(arl); |
| |
| if (!proto_addr) |
| return (NULL); |
| ace = as->as_ce_mask_entries; |
| for (; ace; ace = ace->ace_next) { |
| if (ace->ace_arl == arl && |
| ace->ace_proto_addr_length == proto_addr_length && |
| ace->ace_proto == proto) { |
| int i1 = proto_addr_length; |
| uchar_t *ace_addr = ace->ace_proto_addr; |
| uchar_t *mask = ace->ace_proto_mask; |
| /* |
| * Note that the ace_proto_mask is applied to the |
| * proto_addr before comparing to the ace_addr. |
| */ |
| do { |
| if (--i1 < 0) |
| return (ace); |
| } while ((proto_addr[i1] & mask[i1]) == ace_addr[i1]); |
| } |
| } |
| return (ace); |
| } |
| |
| /* |
| * Look for a permanent entry for proto_addr across all interfaces. |
| */ |
| static ace_t * |
| ar_ce_lookup_permanent(arp_stack_t *as, uint32_t proto, uchar_t *proto_addr, |
| uint32_t proto_addr_length) |
| { |
| ace_t *ace; |
| |
| ace = *ar_ce_hash(as, proto, proto_addr, proto_addr_length); |
| for (; ace != NULL; ace = ace->ace_next) { |
| if (!(ace->ace_flags & ACE_F_PERMANENT)) |
| continue; |
| if (ace->ace_proto_addr_length == proto_addr_length && |
| ace->ace_proto == proto) { |
| int i1 = proto_addr_length; |
| uchar_t *ace_addr = ace->ace_proto_addr; |
| uchar_t *mask = ace->ace_proto_mask; |
| |
| /* |
| * Note that the ace_proto_mask is applied to the |
| * proto_addr before comparing to the ace_addr. |
| */ |
| do { |
| if (--i1 < 0) |
| return (ace); |
| } while ((proto_addr[i1] & mask[i1]) == ace_addr[i1]); |
| } |
| } |
| return (ace); |
| } |
| |
| /* |
| * ar_ce_resolve is called when a response comes in to an outstanding request. |
| * Returns 'true' if the address has changed and we need to tell the client. |
| * (We don't need to tell the client if there's still an outstanding query.) |
| */ |
| static boolean_t |
| ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length) |
| { |
| boolean_t hwchanged; |
| |
| if (hw_addr_length == ace->ace_hw_addr_length) { |
| ASSERT(ace->ace_hw_addr != NULL); |
| hwchanged = bcmp(hw_addr, ace->ace_hw_addr, |
| hw_addr_length) != 0; |
| if (hwchanged) |
| bcopy(hw_addr, ace->ace_hw_addr, hw_addr_length); |
| /* |
| * No need to bother with ar_query_reply if no queries are |
| * waiting. |
| */ |
| ace->ace_flags |= ACE_F_RESOLVED; |
| if (ace->ace_query_mp != NULL) |
| ar_query_reply(ace, 0, NULL, (uint32_t)0); |
| if (hwchanged) |
| return (B_TRUE); |
| } |
| return (B_FALSE); |
| } |
| |
| /* |
| * There are 2 functions performed by this function. |
| * 1. Resolution of unresolved entries and update of resolved entries. |
| * 2. Detection of nodes with our own IP address (duplicates). |
| * |
| * If the resolving ARL is in the same group as a matching ACE's ARL, then |
| * update the ACE. Otherwise, make no updates. |
| * |
| * For all entries, we first check to see if this is a duplicate (probable |
| * loopback) message. If so, then just ignore it. |
| * |
| * Next, check to see if the entry has completed DAD. If not, then we've |
| * failed, because someone is already using the address. Notify IP of the DAD |
| * failure and remove the broken ace. |
| * |
| * Next, we check if we're the authority for this address. If so, then it's |
| * time to defend it, because the other node is a duplicate. Report it as a |
| * 'bogon' and let IP decide how to defend. |
| * |
| * Finally, if it's unresolved or if the arls match, we just update the MAC |
| * address. This allows a published 'static' entry to be updated by an ARP |
| * request from the node for which we're a proxy ARP server. |
| * |
| * Note that this logic does not update published ARP entries for mismatched |
| * arls, as for example when we proxy arp across 2 subnets with differing |
| * subnet masks. |
| * |
| * Return Values below |
| */ |
| |
| #define AR_NOTFOUND 1 /* No matching ace found in cache */ |
| #define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ |
| #define AR_LOOPBACK 3 /* Our own arp packet was received */ |
| #define AR_BOGON 4 /* Another host has our IP addr. */ |
| #define AR_FAILED 5 /* Duplicate Address Detection has failed */ |
| #define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ |
| |
| static int |
| ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr, |
| uint32_t hlen, const uchar_t *src_paddr, uint32_t plen, arl_t **ace_arlp) |
| { |
| ace_t *ace; |
| ace_t *ace_next; |
| int i1; |
| const uchar_t *paddr; |
| uchar_t *ace_addr; |
| uchar_t *mask; |
| int retv = AR_NOTFOUND; |
| arp_stack_t *as = ARL_TO_ARPSTACK(arl); |
| |
| ace = *ar_ce_hash(as, proto, src_paddr, plen); |
| for (; ace != NULL; ace = ace_next) { |
| |
| /* ar_ce_resolve may delete the ace; fetch next pointer now */ |
| ace_next = ace->ace_next; |
| |
| if (ace->ace_proto_addr_length != plen || |
| ace->ace_proto != proto) { |
| continue; |
| } |
| |
| /* |
| * Note that the ace_proto_mask is applied to the proto_addr |
| * before comparing to the ace_addr. |
| */ |
| paddr = src_paddr; |
| i1 = plen; |
| ace_addr = ace->ace_proto_addr; |
| mask = ace->ace_proto_mask; |
| while (--i1 >= 0) { |
| if ((*paddr++ & *mask++) != *ace_addr++) |
| break; |
| } |
| if (i1 >= 0) |
| continue; |
| |
| *ace_arlp = ace->ace_arl; |
| |
| /* |
| * If the IP address is ours, and the hardware address matches |
| * one of our own arls, then this is a broadcast packet |
| * emitted by one of our interfaces, reflected by the switch |
| * and received on another interface. We return AR_LOOPBACK. |
| */ |
| if (ace->ace_flags & ACE_F_MYADDR) { |
| arl_t *hw_arl = as->as_arl_head; |
| arlphy_t *ap; |
| |
| for (; hw_arl != NULL; hw_arl = hw_arl->arl_next) { |
| ap = hw_arl->arl_phy; |
| if (ap != NULL && ap->ap_hw_addrlen == hlen && |
| bcmp(ap->ap_hw_addr, src_haddr, hlen) == 0) |
| return (AR_LOOPBACK); |
| } |
| } |
| |
| /* |
| * If the entry is unverified, then we've just verified that |
| * someone else already owns this address, because this is a |
| * message with the same protocol address but different |
| * hardware address. NOTE: the ace_xmit_arl check ensures we |
| * don't send duplicate AR_FAILEDs if arl is in an IPMP group. |
| */ |
| if ((ace->ace_flags & ACE_F_UNVERIFIED) && |
| arl == ace->ace_xmit_arl) { |
| ar_ce_delete(ace); |
| return (AR_FAILED); |
| } |
| |
| /* |
| * If the IP address matches ours and we're authoritative for |
| * this entry, then some other node is using our IP addr, so |
| * return AR_BOGON. Also reset the transmit count to zero so |
| * that, if we're currently in initial announcement mode, we |
| * switch back to the lazier defense mode. Knowing that |
| * there's at least one duplicate out there, we ought not |
| * blindly announce. NOTE: the ace_xmit_arl check ensures we |
| * don't send duplicate AR_BOGONs if arl is in an IPMP group. |
| */ |
| if ((ace->ace_flags & ACE_F_AUTHORITY) && |
| arl == ace->ace_xmit_arl) { |
| ace->ace_xmit_count = 0; |
| return (AR_BOGON); |
| } |
| |
| /* |
| * Only update this ACE if it's on the same network -- i.e., |
| * it's for our ARL or another ARL in the same IPMP group. |
| */ |
| if (ace->ace_arl == arl || ace->ace_arl == arl->arl_ipmp_arl) { |
| if (ar_ce_resolve(ace, src_haddr, hlen)) |
| retv = AR_CHANGED; |
| else if (retv == AR_NOTFOUND) |
| retv = AR_MERGED; |
| } |
| } |
| |
| if (retv == AR_NOTFOUND) |
| *ace_arlp = NULL; |
| return (retv); |
| } |
| |
| /* Pass arg1 to the pfi supplied, along with each ace in existence. */ |
| static void |
| ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *), void *arg1) |
| { |
| ace_t *ace; |
| ace_t *ace1; |
| int i; |
| |
| for (i = 0; i < ARP_HASH_SIZE; i++) { |
| /* |
| * We walk the hash chain in a way that allows the current |
| * ace to get blown off by the called routine. |
| */ |
| for (ace = as->as_ce_hash_tbl[i]; ace; ace = ace1) { |
| ace1 = ace->ace_next; |
| (*pfi)(ace, arg1); |
| } |
| } |
| for (ace = as->as_ce_mask_entries; ace; ace = ace1) { |
| ace1 = ace->ace_next; |
| (*pfi)(ace, arg1); |
| } |
| } |
| |
| /* |
| * Send a copy of interesting packets to the corresponding IP instance. |
| * The corresponding IP instance is the ARP-IP-DEV instance for this |
| * DEV (i.e. ARL). |
| */ |
| static void |
| ar_client_notify(const arl_t *arl, mblk_t *mp, int code) |
| { |
| ar_t *ar = ((ar_t *)arl->arl_rq->q_ptr)->ar_arl_ip_assoc; |
| arcn_t *arcn; |
| mblk_t *mp1; |
| int arl_namelen = strlen(arl->arl_name) + 1; |
| |
| /* Looks like the association disappeared */ |
| if (ar == NULL) { |
| freemsg(mp); |
| return; |
| } |
| |
| /* ar is the corresponding ARP-IP instance for this ARL */ |
| ASSERT(ar->ar_arl == NULL && ar->ar_wq->q_next != NULL); |
| |
| mp1 = allocb(sizeof (arcn_t) + arl_namelen, BPRI_MED); |
| if (mp1 == NULL) { |
| freemsg(mp); |
| return; |
| } |
| DB_TYPE(mp1) = M_CTL; |
| mp1->b_cont = mp; |
| arcn = (arcn_t *)mp1->b_rptr; |
| mp1->b_wptr = (uchar_t *)&arcn[1] + arl_namelen; |
| arcn->arcn_cmd = AR_CLIENT_NOTIFY; |
| arcn->arcn_name_offset = sizeof (arcn_t); |
| arcn->arcn_name_length = arl_namelen; |
| arcn->arcn_code = code; |
| bcopy(arl->arl_name, &arcn[1], arl_namelen); |
| |
| putnext(ar->ar_wq, mp1); |
| } |
| |
| /* |
| * Send a delete-notify message down to IP. We've determined that IP doesn't |
| * have a cache entry for the IP address itself, but it may have other cache |
| * entries with the same hardware address, and we don't want to see those grow |
| * stale. (The alternative is sending down updates for every ARP message we |
| * get that doesn't match an existing ace. That's much more expensive than an |
| * occasional delete and reload.) |
| */ |
| static void |
| ar_delete_notify(const ace_t *ace) |
| { |
| const arl_t *arl = ace->ace_arl; |
| const arlphy_t *ap = ace->ace_xmit_arl->arl_phy; |
| mblk_t *mp; |
| size_t len; |
| arh_t *arh; |
| |
| len = sizeof (*arh) + 2 * ace->ace_proto_addr_length; |
| mp = allocb(len, BPRI_MED); |
| if (mp == NULL) |
| return; |
| arh = (arh_t *)mp->b_rptr; |
| mp->b_wptr = (uchar_t *)arh + len; |
| U16_TO_BE16(ap->ap_arp_hw_type, arh->arh_hardware); |
| U16_TO_BE16(ace->ace_proto, arh->arh_proto); |
| arh->arh_hlen = 0; |
| arh->arh_plen = ace->ace_proto_addr_length; |
| U16_TO_BE16(ARP_RESPONSE, arh->arh_operation); |
| bcopy(ace->ace_proto_addr, arh + 1, ace->ace_proto_addr_length); |
| bcopy(ace->ace_proto_addr, (uchar_t *)(arh + 1) + |
| ace->ace_proto_addr_length, ace->ace_proto_addr_length); |
| ar_client_notify(arl, mp, AR_CN_ANNOUNCE); |
| } |
| |
| /* ARP module close routine. */ |
| static int |
| ar_close(queue_t *q) |
| { |
| ar_t *ar = (ar_t *)q->q_ptr; |
| char name[LIFNAMSIZ]; |
| arl_t *arl, *xarl; |
| arl_t **arlp; |
| cred_t *cr; |
| arc_t *arc; |
| mblk_t *mp1; |
| int index; |
| arp_stack_t *as = ar->ar_as; |
| |
| TRACE_1(TR_FAC_ARP, TR_ARP_CLOSE, |
| "arp_close: q %p", q); |
| |
| arl = ar->ar_arl; |
| if (arl == NULL) { |
| index = 0; |
| /* |
| * If this is the <ARP-IP-Driver> stream send down |
| * a closing message to IP and wait for IP to send |
| * an ack. This helps to make sure that messages |
| * that are currently being sent up by IP are not lost. |
| */ |
| if (ar->ar_on_ill_stream) { |
| mp1 = allocb(sizeof (arc_t), BPRI_MED); |
| if (mp1 != NULL) { |
| DB_TYPE(mp1) = M_CTL; |
| arc = (arc_t *)mp1->b_rptr; |
| mp1->b_wptr = mp1->b_rptr + sizeof (arc_t); |
| arc->arc_cmd = AR_ARP_CLOSING; |
| putnext(WR(q), mp1); |
| while (!ar->ar_ip_acked_close) |
| /* If we are interrupted break out */ |
| if (qwait_sig(q) == 0) |
| break; |
| } |
| } |
| /* Delete all our pending queries, 'arl' is not dereferenced */ |
| ar_ce_walk(as, ar_query_delete, ar); |
| /* |
| * The request could be pending on some arl_queue also. This |
| * happens if the arl is not yet bound, and bind is pending. |
| */ |
| ar_ll_cleanup_arl_queue(q); |
| } else { |
| index = arl->arl_index; |
| (void) strcpy(name, arl->arl_name); |
| arl->arl_closing = 1; |
| while (arl->arl_queue != NULL) |
| qwait(arl->arl_rq); |
| |
| if (arl->arl_state == ARL_S_UP) |
| ar_ll_down(arl); |
| |
| while (arl->arl_state != ARL_S_DOWN) |
| qwait(arl->arl_rq); |
| |
| if (arl->arl_flags & ARL_F_IPMP) { |
| /* |
| * Though rude, someone could force the IPMP arl |
| * closed without removing the underlying interfaces. |
| * In that case, force the ARLs out of the group. |
| */ |
| xarl = as->as_arl_head; |
| for (; xarl != NULL; xarl = xarl->arl_next) { |
| if (xarl->arl_ipmp_arl != arl || xarl == arl) |
| continue; |
| ar_ce_walk(as, ar_ce_ipmp_deactivate, xarl); |
| xarl->arl_ipmp_arl = NULL; |
| } |
| } |
| |
| ar_ll_clear_defaults(arl); |
| /* |
| * If this is the control stream for an arl, delete anything |
| * hanging off our arl. |
| */ |
| ar_ce_walk(as, ar_ce_delete_per_arl, arl); |
| /* Free any messages waiting for a bind_ack */ |
| /* Get the arl out of the chain. */ |
| rw_enter(&as->as_arl_lock, RW_WRITER); |
| for (arlp = &as->as_arl_head; *arlp; |
| arlp = &(*arlp)->arl_next) { |
| if (*arlp == arl) { |
| *arlp = arl->arl_next; |
| break; |
| } |
| } |
| |
| ASSERT(arl->arl_dlpi_deferred == NULL); |
| ar->ar_arl = NULL; |
| rw_exit(&as->as_arl_lock); |
| |
| mi_free((char *)arl); |
| } |
| /* Let's break the association between an ARL and IP instance */ |
| if (ar->ar_arl_ip_assoc != NULL) { |
| ASSERT(ar->ar_arl_ip_assoc->ar_arl_ip_assoc != NULL && |
| ar->ar_arl_ip_assoc->ar_arl_ip_assoc == ar); |
| ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL; |
| ar->ar_arl_ip_assoc = NULL; |
| } |
| cr = ar->ar_credp; |
| /* mi_close_comm frees the instance data. */ |
| (void) mi_close_comm(&as->as_head, q); |
| qprocsoff(q); |
| crfree(cr); |
| |
| if (index != 0) { |
| hook_nic_event_t info; |
| |
| info.hne_nic = index; |
| info.hne_lif = 0; |
| info.hne_event = NE_UNPLUMB; |
| info.hne_data = name; |
| info.hne_datalen = strlen(name); |
| (void) hook_run(as->as_net_data->netd_hooks, |
| as->as_arpnicevents, (hook_data_t)&info); |
| } |
| netstack_rele(as->as_netstack); |
| return (0); |
| } |
| |
| /* |
| * Dispatch routine for ARP commands. This routine can be called out of |
| * either ar_wput or ar_rput, in response to IOCTLs or M_PROTO messages. |
| */ |
| /* TODO: error reporting for M_PROTO case */ |
| static int |
| ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig, boolean_t from_wput) |
| { |
| arct_t *arct; |
| uint32_t cmd; |
| ssize_t len; |
| mblk_t *mp = mp_orig; |
| cred_t *cr = NULL; |
| |
| if (!mp) |
| return (ENOENT); |
| |
| /* We get both M_PROTO and M_IOCTL messages, so watch out! */ |
| if (DB_TYPE(mp) == M_IOCTL) { |
| struct iocblk *ioc; |
| ioc = (struct iocblk *)mp->b_rptr; |
| cmd = ioc->ioc_cmd; |
| cr = ioc->ioc_cr; |
| mp = mp->b_cont; |
| if (!mp) |
| return (ENOENT); |
| } |
| len = MBLKL(mp); |
| if (len < sizeof (uint32_t) || !OK_32PTR(mp->b_rptr)) |
| return (ENOENT); |
| if (mp_orig == mp) |
| cmd = *(uint32_t *)mp->b_rptr; |
| for (arct = ar_cmd_tbl; ; arct++) { |
| if (arct >= A_END(ar_cmd_tbl)) |
| return (ENOENT); |
| if (arct->arct_cmd == cmd) |
| break; |
| } |
| if (len < arct->arct_min_len) { |
| /* |
| * If the command is exclusive to ARP, we return EINVAL, |
| * else we need to pass the command downstream, so return |
| * ENOENT |
| */ |
| return ((arct->arct_flags & ARF_ONLY_CMD) ? EINVAL : ENOENT); |
| } |
| if (arct->arct_priv_req != OP_NP) { |
| int error; |
| |
| if (cr == NULL) |
| cr = DB_CREDDEF(mp_orig, ((ar_t *)q->q_ptr)->ar_credp); |
| |
| if ((error = secpolicy_ip(cr, arct->arct_priv_req, |
| B_FALSE)) != 0) |
| return (error); |
| } |
| /* Disallow many commands except if from rput i.e. from IP */ |
| if (from_wput && !(arct->arct_flags & ARF_WPUT_OK)) { |
| return (EINVAL); |
| } |
| |
| if (arct->arct_flags & ARF_IOCTL_AWARE) |
| mp = mp_orig; |
| |
| DTRACE_PROBE3(cmd_dispatch, queue_t *, q, mblk_t *, mp, |
| arct_t *, arct); |
| return (*arct->arct_pfi)(q, mp); |
| } |
| |
| /* Allocate and do common initializations for DLPI messages. */ |
| static mblk_t * |
| ar_dlpi_comm(t_uscalar_t prim, size_t size) |
| { |
| mblk_t *mp; |
| |
| if ((mp = allocb(size, BPRI_HI)) == NULL) |
| return (NULL); |
| |
| /* |
| * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter |
| * of which we don't seem to use) are sent with M_PCPROTO, and |
| * that other DLPI are M_PROTO. |
| */ |
| DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO; |
| |
| mp->b_wptr = mp->b_rptr + size; |
| bzero(mp->b_rptr, size); |
| ((union DL_primitives *)mp->b_rptr)->dl_primitive = prim; |
| |
| return (mp); |
| } |
| |
| /* |
| * The following two functions serialize DLPI messages to the driver, much |
| * along the lines of ill_dlpi_send and ill_dlpi_done in IP. Basically, |
| * we wait for a DLPI message, sent downstream, to be acked before sending |
| * the next. If there are DLPI messages that have not yet been sent, queue |
| * this message (mp), else send it downstream. |
| */ |
| static void |
| ar_dlpi_send(arl_t *arl, mblk_t *mp) |
| { |
| ASSERT(arl != NULL); |
| ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); |
| |
| if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { |
| mblk_t **mpp; |
| |
| /* Must queue message. Tail insertion */ |
| mpp = &arl->arl_dlpi_deferred; |
| while (*mpp != NULL) |
| mpp = &((*mpp)->b_next); |
| *mpp = mp; |
| |
| DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp); |
| return; |
| } |
| |
| arl->arl_dlpi_pending = |
| ((union DL_primitives *)mp->b_rptr)->dl_primitive; |
| DTRACE_PROBE2(dlpi_send, arl_t *, arl, mblk_t *, mp); |
| putnext(arl->arl_wq, mp); |
| } |
| |
| /* |
| * Called when an DLPI control message has been acked; send down the next |
| * queued message (if any). |
| * The DLPI messages of interest being bind, attach, unbind and detach since |
| * these are the only ones sent by ARP via ar_dlpi_send. |
| */ |
| static void |
| ar_dlpi_done(arl_t *arl, t_uscalar_t prim) |
| { |
| mblk_t *mp; |
| |
| if (arl->arl_dlpi_pending != prim) { |
| DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl, |
| t_uscalar_t, prim); |
| return; |
| } |
| |
| if ((mp = arl->arl_dlpi_deferred) == NULL) { |
| DTRACE_PROBE2(dlpi_done_idle, arl_t *, arl, t_uscalar_t, prim); |
| arl->arl_dlpi_pending = DL_PRIM_INVAL; |
| ar_cmd_done(arl); |
| return; |
| } |
| |
| arl->arl_dlpi_deferred = mp->b_next; |
| mp->b_next = NULL; |
| |
| ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); |
| |
| arl->arl_dlpi_pending = |
| ((union DL_primitives *)mp->b_rptr)->dl_primitive; |
| DTRACE_PROBE2(dlpi_done_next, arl_t *, arl, mblk_t *, mp); |
| putnext(arl->arl_wq, mp); |
| } |
| |
| static void |
| ar_cmd_done(arl_t *arl) |
| { |
| mblk_t *mp; |
| int cmd; |
| int err; |
| mblk_t *mp1; |
| mblk_t *dlpi_op_done_mp = NULL; |
| queue_t *dlpi_op_done_q; |
| ar_t *ar_arl; |
| ar_t *ar_ip; |
| queue_t *q; |
| |
| ASSERT(arl->arl_state == ARL_S_UP || arl->arl_state == ARL_S_DOWN); |
| |
| /* |
| * If the current operation was initiated by IP there must be |
| * an op enqueued in arl_queue. But if ar_close has sent down |
| * a detach/unbind, there is no command enqueued. Also if the IP-ARP |
| * stream has closed the cleanup would be done and there won't be any mp |
| */ |
| if ((mp = arl->arl_queue) == NULL) |
| return; |
| |
| if ((cmd = (uintptr_t)mp->b_prev) & CMD_IN_PROGRESS) { |
| mp1 = ar_cmd_dequeue(arl); |
| ASSERT(mp == mp1); |
| |
| cmd &= ~CMD_IN_PROGRESS; |
| if (cmd == AR_INTERFACE_UP) { |
| /* |
| * There is an ioctl waiting for us... |
| */ |
| if (arl->arl_state == ARL_S_UP) |
| err = 0; |
| else |
| err = EINVAL; |
| |
| dlpi_op_done_mp = ar_alloc(AR_DLPIOP_DONE, err); |
| if (dlpi_op_done_mp != NULL) { |
| /* |
| * Better performance if we send the response |
| * after the potential MAPPING_ADDs command |
| * that are likely to follow. (Do it below the |
| * while loop, instead of putnext right now) |
| */ |
| dlpi_op_done_q = WR(mp->b_queue); |
| } |
| |
| if (err == 0) { |
| /* |
| * Now that we have the ARL instance |
| * corresponding to the IP instance let's make |
| * the association here. |
| */ |
| ar_ip = (ar_t *)mp->b_queue->q_ptr; |
| ar_arl = (ar_t *)arl->arl_rq->q_ptr; |
| ar_arl->ar_arl_ip_assoc = ar_ip; |
| ar_ip->ar_arl_ip_assoc = ar_arl; |
| } |
| } |
| inet_freemsg(mp); |
| } |
| |
| /* |
| * Run the commands that have been enqueued while we were waiting |
| * for the last command (AR_INTERFACE_UP or AR_INTERFACE_DOWN) |
| * to complete. |
| */ |
| while ((mp = ar_cmd_dequeue(arl)) != NULL) { |
| mp->b_prev = AR_DRAINING; |
| q = mp->b_queue; |
| mp->b_queue = NULL; |
| |
| /* |
| * Don't call put(q, mp) since it can lead to reorder of |
| * messages by sending the current messages to the end of |
| * arp's syncq |
| */ |
| if (q->q_flag & QREADR) |
| ar_rput(q, mp); |
| else |
| ar_wput(q, mp); |
| |
| if ((mp = arl->arl_queue) == NULL) |
| goto done; /* no work to do */ |
| |
| if ((cmd = (uintptr_t)mp->b_prev) & CMD_IN_PROGRESS) { |
| /* |
| * The current command is an AR_INTERFACE_UP or |
| * AR_INTERFACE_DOWN and is waiting for a DLPI ack |
| * from the driver. Return. We can't make progress now. |
| */ |
| goto done; |
| } |
| } |
| |
| done: |
| if (dlpi_op_done_mp != NULL) { |
| DTRACE_PROBE3(cmd_done_next, arl_t *, arl, |
| queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp); |
| putnext(dlpi_op_done_q, dlpi_op_done_mp); |
| } |
| } |
| |
| /* |
| * Queue all arp commands coming from clients. Typically these commands |
| * come from IP, but could also come from other clients. The commands |
| * are serviced in FIFO order. Some commands need to wait and restart |
| * after the DLPI response from the driver is received. Typically |
| * AR_INTERFACE_UP and AR_INTERFACE_DOWN. ar_dlpi_done restarts |
| * the command and then dequeues the queue at arl_queue and calls ar_rput |
| * or ar_wput for each enqueued command. AR_DRAINING is used to signify |
| * that the command is being executed thru a drain from ar_dlpi_done. |
| * Functions handling the individual commands such as ar_entry_add |
| * check for this flag in b_prev to determine whether the command has |
| * to be enqueued for later processing or must be processed now. |
| * |
| * b_next used to thread the enqueued command mblks |
| * b_queue used to identify the queue of the originating request(client) |
| * b_prev used to store the command itself for easy parsing. |
| */ |
| static void |
| ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd, |
| boolean_t tail_insert) |
| { |
| mp->b_queue = q; |
| if (arl->arl_queue == NULL) { |
| ASSERT(arl->arl_queue_tail == NULL); |
| mp->b_prev = (void *)((uintptr_t)(cmd | CMD_IN_PROGRESS)); |
| mp->b_next = NULL; |
| arl->arl_queue = mp; |
| arl->arl_queue_tail = mp; |
| } else if (tail_insert) { |
| mp->b_prev = (void *)((uintptr_t)cmd); |
| mp->b_next = NULL; |
| arl->arl_queue_tail->b_next = mp; |
| arl->arl_queue_tail = mp; |
| } else { |
| /* head insert */ |
| mp->b_prev = (void *)((uintptr_t)cmd | CMD_IN_PROGRESS); |
| mp->b_next = arl->arl_queue; |
| arl->arl_queue = mp; |
| } |
| } |
| |
| static mblk_t * |
| ar_cmd_dequeue(arl_t *arl) |
| { |
| mblk_t *mp; |
| |
| if (arl->arl_queue == NULL) { |
| ASSERT(arl->arl_queue_tail == NULL); |
| return (NULL); |
| } |
| mp = arl->arl_queue; |
| arl->arl_queue = mp->b_next; |
| if (arl->arl_queue == NULL) |
| arl->arl_queue_tail = NULL; |
| mp->b_next = NULL; |
| return (mp); |
| } |
| |
| /* |
| * Standard ACE timer handling: compute 'fuzz' around a central value or from 0 |
| * up to a value, and then set the timer. The randomization is necessary to |
| * prevent groups of systems from falling into synchronization on the network |
| * and producing ARP packet storms. |
| */ |
| static void |
| ace_set_timer(ace_t *ace, boolean_t initial_time) |
| { |
| clock_t intv, rnd, frac; |
| |
| (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd)); |
| /* Note that clock_t is signed; must chop off bits */ |
| rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1; |
| intv = ace->ace_xmit_interval; |
| if (initial_time) { |
| /* Set intv to be anywhere in the [1 .. intv] range */ |
| if (intv <= 0) |
| intv = 1; |
| else |
| intv = (rnd % intv) + 1; |
| } else { |
| /* Compute 'frac' as 20% of the configured interval */ |
| if ((frac = intv / 5) <= 1) |
| frac = 2; |
| /* Set intv randomly in the range [intv-frac .. intv+frac] */ |
| if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0) |
| intv = 1; |
| } |
| mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, intv); |
| } |
| |
| /* |
| * Process entry add requests from external messages. |
| * It is also called by ip_rput_dlpi_writer() through |
| * ipif_resolver_up() to change hardware address when |
| * an asynchronous hardware address change notification |
| * arrives from the driver. |
| */ |
| static int |
| ar_entry_add(queue_t *q, mblk_t *mp_orig) |
| { |
| area_t *area; |
| ace_t *ace; |
| uchar_t *hw_addr; |
| uint32_t hw_addr_len; |
| uchar_t *proto_addr; |
| uint32_t proto_addr_len; |
| uchar_t *proto_mask; |
| arl_t *arl; |
| mblk_t *mp = mp_orig; |
| int err; |
| uint_t aflags; |
| boolean_t unverified; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| /* We handle both M_IOCTL and M_PROTO messages. */ |
| if (DB_TYPE(mp) == M_IOCTL) |
| mp = mp->b_cont; |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL) |
| return (EINVAL); |
| /* |
| * Newly received commands from clients go to the tail of the queue. |
| */ |
| if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { |
| DTRACE_PROBE3(eadd_enqueued, queue_t *, q, mblk_t *, mp_orig, |
| arl_t *, arl); |
| ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_ADD, B_TRUE); |
| return (EINPROGRESS); |
| } |
| mp_orig->b_prev = NULL; |
| |
| area = (area_t *)mp->b_rptr; |
| aflags = area->area_flags; |
| |
| /* |
| * If the previous entry wasn't published and we are now going |
| * to publish, then we need to do address verification. The previous |
| * entry may have been a local unpublished address or even an external |
| * address. If the entry we find was in an unverified state we retain |
| * this. |
| * If it's a new published entry, then we're obligated to do |
| * duplicate address detection now. |
| */ |
| ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup_entry); |
| if (ace != NULL) { |
| unverified = !(ace->ace_flags & ACE_F_PUBLISH) && |
| (aflags & ACE_F_PUBLISH); |
| if (ace->ace_flags & ACE_F_UNVERIFIED) |
| unverified = B_TRUE; |
| ar_ce_delete(ace); |
| } else { |
| unverified = (aflags & ACE_F_PUBLISH) != 0; |
| } |
| |
| /* Allow client to request DAD restart */ |
| if (aflags & ACE_F_UNVERIFIED) |
| unverified = B_TRUE; |
| |
| /* Extract parameters from the message. */ |
| hw_addr_len = area->area_hw_addr_length; |
| hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len); |
| proto_addr_len = area->area_proto_addr_length; |
| proto_addr = mi_offset_paramc(mp, area->area_proto_addr_offset, |
| proto_addr_len); |
| proto_mask = mi_offset_paramc(mp, area->area_proto_mask_offset, |
| proto_addr_len); |
| if (proto_mask == NULL) { |
| DTRACE_PROBE2(eadd_bad_mask, arl_t *, arl, area_t *, area); |
| return (EINVAL); |
| } |
| err = ar_ce_create( |
| arl, |
| area->area_proto, |
| hw_addr, |
| hw_addr_len, |
| proto_addr, |
| proto_addr_len, |
| proto_mask, |
| NULL, |
| (uint32_t)0, |
| NULL, |
| aflags & ~ACE_F_MAPPING & ~ACE_F_UNVERIFIED & ~ACE_F_DEFEND); |
| if (err != 0) { |
| DTRACE_PROBE3(eadd_create_failed, arl_t *, arl, area_t *, area, |
| int, err); |
| return (err); |
| } |
| |
| if (aflags & ACE_F_PUBLISH) { |
| arlphy_t *ap; |
| |
| ace = ar_ce_lookup(arl, area->area_proto, proto_addr, |
| proto_addr_len); |
| ASSERT(ace != NULL); |
| |
| ap = ace->ace_xmit_arl->arl_phy; |
| |
| if (hw_addr == NULL || hw_addr_len == 0) { |
| hw_addr = ap->ap_hw_addr; |
| } else if (aflags & ACE_F_MYADDR) { |
| /* |
| * If hardware address changes, then make sure |
| * that the hardware address and hardware |
| * address length fields in arlphy_t get updated |
| * too. Otherwise, they will continue carrying |
| * the old hardware address information. |
| */ |
| ASSERT((hw_addr != NULL) && (hw_addr_len != 0)); |
| bcopy(hw_addr, ap->ap_hw_addr, hw_addr_len); |
| ap->ap_hw_addrlen = hw_addr_len; |
| } |
| |
| if (ace->ace_flags & ACE_F_FAST) { |
| ace->ace_xmit_count = as->as_fastprobe_count; |
| ace->ace_xmit_interval = as->as_fastprobe_delay; |
| } else { |
| ace->ace_xmit_count = as->as_probe_count; |
| ace->ace_xmit_interval = as->as_probe_delay; |
| } |
| |
| /* |
| * If the user has disabled duplicate address detection for |
| * this kind of interface (fast or slow) by setting the probe |
| * count to zero, then pretend as if we've verified the |
| * address, and go right to address defense mode. |
| */ |
| if (ace->ace_xmit_count == 0) |
| unverified = B_FALSE; |
| |
| /* |
| * If we need to do duplicate address detection, then kick that |
| * off. Otherwise, send out a gratuitous ARP message in order |
| * to update everyone's caches with the new hardware address. |
| */ |
| if (unverified) { |
| ace->ace_flags |= ACE_F_UNVERIFIED; |
| if (ace->ace_xmit_interval == 0) { |
| /* |
| * User has configured us to send the first |
| * probe right away. Do so, and set up for |
| * the subsequent probes. |
| */ |
| DTRACE_PROBE2(eadd_probe, ace_t *, ace, |
| area_t *, area); |
| ar_xmit(ace->ace_xmit_arl, ARP_REQUEST, |
| area->area_proto, proto_addr_len, |
| hw_addr, NULL, NULL, proto_addr, NULL, as); |
| ace->ace_xmit_count--; |
| ace->ace_xmit_interval = |
| (ace->ace_flags & ACE_F_FAST) ? |
| as->as_fastprobe_interval : |
| as->as_probe_interval; |
| ace_set_timer(ace, B_FALSE); |
| } else { |
| DTRACE_PROBE2(eadd_delay, ace_t *, ace, |
| area_t *, area); |
| /* Regular delay before initial probe */ |
| ace_set_timer(ace, B_TRUE); |
| } |
| } else { |
| DTRACE_PROBE2(eadd_announce, ace_t *, ace, |
| area_t *, area); |
| ar_xmit(ace->ace_xmit_arl, ARP_REQUEST, |
| area->area_proto, proto_addr_len, hw_addr, |
| proto_addr, ap->ap_arp_addr, proto_addr, NULL, as); |
| ace->ace_last_bcast = ddi_get_lbolt(); |
| |
| /* |
| * If AUTHORITY is set, it is not just a proxy arp |
| * entry; we believe we're the authority for this |
| * entry. In that case, and if we're not just doing |
| * one-off defense of the address, we send more than |
| * one copy, so we'll still have a good chance of |
| * updating everyone even when there's a packet loss |
| * or two. |
| */ |
| if ((aflags & ACE_F_AUTHORITY) && |
| !(aflags & ACE_F_DEFEND) && |
| as->as_publish_count > 0) { |
| /* Account for the xmit we just did */ |
| ace->ace_xmit_count = as->as_publish_count - 1; |
| ace->ace_xmit_interval = |
| as->as_publish_interval; |
| if (ace->ace_xmit_count > 0) |
| ace_set_timer(ace, B_FALSE); |
| } |
| } |
| } |
| return (0); |
| } |
| |
| /* Process entry delete requests from external messages. */ |
| static int |
| ar_entry_delete(queue_t *q, mblk_t *mp_orig) |
| { |
| ace_t *ace; |
| arl_t *arl; |
| mblk_t *mp = mp_orig; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| /* We handle both M_IOCTL and M_PROTO messages. */ |
| if (DB_TYPE(mp) == M_IOCTL) |
| mp = mp->b_cont; |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL) |
| return (EINVAL); |
| /* |
| * Newly received commands from clients go to the tail of the queue. |
| */ |
| if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { |
| DTRACE_PROBE3(edel_enqueued, queue_t *, q, mblk_t *, mp_orig, |
| arl_t *, arl); |
| ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_DELETE, B_TRUE); |
| return (EINPROGRESS); |
| } |
| mp_orig->b_prev = NULL; |
| |
| /* |
| * Need to know if it is a mapping or an exact match. Check exact |
| * match first. |
| */ |
| ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup); |
| if (ace != NULL) { |
| ared_t *ared = (ared_t *)mp->b_rptr; |
| |
| /* |
| * If it's a permanent entry, then the client is the one who |
| * told us to delete it, so there's no reason to notify. |
| */ |
| if (ACE_NONPERM(ace)) |
| ar_delete_notify(ace); |
| /* |
| * Only delete the ARP entry if it is non-permanent, or |
| * ARED_F_PRESERVE_PERM flags is not set. |
| */ |
| if (ACE_NONPERM(ace) || |
| !(ared->ared_flags & ARED_F_PRESERVE_PERM)) { |
| ar_ce_delete(ace); |
| } |
| return (0); |
| } |
| return (ENXIO); |
| } |
| |
| /* |
| * Process entry query requests from external messages. |
| * Bump up the ire_stats_freed for all errors except |
| * EINPROGRESS - which means the packet has been queued. |
| * For all other errors the packet is going to be freed |
| * and hence we account for ire being freed if it |
| * is a M_PROTO message. |
| */ |
| static int |
| ar_entry_query(queue_t *q, mblk_t *mp_orig) |
| { |
| ace_t *ace; |
| areq_t *areq; |
| arl_t *arl; |
| int err; |
| mblk_t *mp = mp_orig; |
| uchar_t *proto_addr; |
| uchar_t *sender_addr; |
| uint32_t proto_addr_len; |
| clock_t ms; |
| boolean_t is_mproto = B_TRUE; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| /* We handle both M_IOCTL and M_PROTO messages. */ |
| if (DB_TYPE(mp) == M_IOCTL) { |
| is_mproto = B_FALSE; |
| mp = mp->b_cont; |
| } |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL) { |
| DTRACE_PROBE2(query_no_arl, queue_t *, q, mblk_t *, mp); |
| err = EINVAL; |
| goto err_ret; |
| } |
| /* |
| * Newly received commands from clients go to the tail of the queue. |
| */ |
| if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { |
| DTRACE_PROBE3(query_enqueued, queue_t *, q, mblk_t *, mp_orig, |
| arl_t *, arl); |
| ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_QUERY, B_TRUE); |
| return (EINPROGRESS); |
| } |
| mp_orig->b_prev = NULL; |
| |
| areq = (areq_t *)mp->b_rptr; |
| proto_addr_len = areq->areq_target_addr_length; |
| proto_addr = mi_offset_paramc(mp, areq->areq_target_addr_offset, |
| proto_addr_len); |
| if (proto_addr == NULL) { |
| DTRACE_PROBE1(query_illegal_address, areq_t *, areq); |
| err = EINVAL; |
| goto err_ret; |
| } |
| /* Stash the reply queue pointer for later use. */ |
| mp->b_prev = (mblk_t *)OTHERQ(q); |
| mp->b_next = NULL; |
| if (areq->areq_xmit_interval == 0) |
| areq->areq_xmit_interval = AR_DEF_XMIT_INTERVAL; |
| ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len); |
| if (ace != NULL && (ace->ace_flags & ACE_F_OLD)) { |
| /* |
| * This is a potentially stale entry that IP's asking about. |
| * Since IP is asking, it must not have an answer anymore, |
| * either due to periodic ARP flush or due to SO_DONTROUTE. |
| * Rather than go forward with what we've got, restart |
| * resolution. |
| */ |
| DTRACE_PROBE2(query_stale_ace, ace_t *, ace, areq_t *, areq); |
| ar_ce_delete(ace); |
| ace = NULL; |
| } |
| if (ace != NULL) { |
| mblk_t **mpp; |
| uint32_t count = 0; |
| |
| /* |
| * There is already a cache entry. This means there is either |
| * a permanent entry, or address resolution is in progress. |
| * If the latter, there should be one or more queries queued |
| * up. We link the current one in at the end, if there aren't |
| * too many outstanding. |
| */ |
| for (mpp = &ace->ace_query_mp; mpp[0]; mpp = &mpp[0]->b_next) { |
| if (++count > areq->areq_max_buffered) { |
| DTRACE_PROBE2(query_overflow, ace_t *, ace, |
| areq_t *, areq); |
| mp->b_prev = NULL; |
| err = EALREADY; |
| goto err_ret; |
| } |
| } |
| /* Put us on the list. */ |
| mpp[0] = mp; |
| if (count != 0) { |
| /* |
| * If a query was already queued up, then we must not |
| * have an answer yet. |
| */ |
| DTRACE_PROBE2(query_in_progress, ace_t *, ace, |
| areq_t *, areq); |
| return (EINPROGRESS); |
| } |
| if (ACE_RESOLVED(ace)) { |
| /* |
| * We have an answer already. |
| * Keep a dup of mp since proto_addr points to it |
| * and mp has been placed on the ace_query_mp list. |
| */ |
| mblk_t *mp1; |
| |
| DTRACE_PROBE2(query_resolved, ace_t *, ace, |
| areq_t *, areq); |
| mp1 = dupmsg(mp); |
| ar_query_reply(ace, 0, proto_addr, proto_addr_len); |
| freemsg(mp1); |
| return (EINPROGRESS); |
| } |
| if (ace->ace_flags & ACE_F_MAPPING) { |
| /* Should never happen */ |
| DTRACE_PROBE2(query_unresolved_mapping, ace_t *, ace, |
| areq_t *, areq); |
| mpp[0] = mp->b_next; |
| err = ENXIO; |
| goto err_ret; |
| } |
| DTRACE_PROBE2(query_unresolved, ace_t, ace, areq_t *, areq); |
| } else { |
| /* No ace yet. Make one now. (This is the common case.) */ |
| if (areq->areq_xmit_count == 0) { |
| DTRACE_PROBE2(query_template, arl_t *, arl, |
| areq_t *, areq); |
| mp->b_prev = NULL; |
| err = ENXIO; |
| goto err_ret; |
| } |
| /* |
| * Check for sender addr being NULL or not before |
| * we create the ace. It is easy to cleanup later. |
| */ |
| sender_addr = mi_offset_paramc(mp, |
| areq->areq_sender_addr_offset, |
| areq->areq_sender_addr_length); |
| if (sender_addr == NULL) { |
| DTRACE_PROBE2(query_no_sender, arl_t *, arl, |
| areq_t *, areq); |
| mp->b_prev = NULL; |
| err = EINVAL; |
| goto err_ret; |
| } |
| err = ar_ce_create(OWNING_ARL(arl), areq->areq_proto, NULL, 0, |
| proto_addr, proto_addr_len, NULL, |
| NULL, (uint32_t)0, sender_addr, |
| areq->areq_flags); |
| if (err != 0) { |
| DTRACE_PROBE3(query_create_failed, arl_t *, arl, |
| areq_t *, areq, int, err); |
| mp->b_prev = NULL; |
| goto err_ret; |
| } |
| ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, |
| proto_addr_len); |
| if (ace == NULL || ace->ace_query_mp != NULL) { |
| /* Shouldn't happen! */ |
| DTRACE_PROBE3(query_lookup_failed, arl_t *, arl, |
| areq_t *, areq, ace_t *, ace); |
| mp->b_prev = NULL; |
| err = ENXIO; |
| goto err_ret; |
| } |
| ace->ace_query_mp = mp; |
| } |
| ms = ar_query_xmit(as, ace); |
| if (ms == 0) { |
| /* Immediate reply requested. */ |
| ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); |
| } else { |
| mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, ms); |
| } |
| return (EINPROGRESS); |
| err_ret: |
| if (is_mproto) { |
| ip_stack_t *ipst = as->as_netstack->netstack_ip; |
| |
| BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed); |
| } |
| return (err); |
| } |
| |
| /* Handle simple query requests. */ |
| static int |
| ar_entry_squery(queue_t *q, mblk_t *mp_orig) |
| { |
| ace_t *ace; |
| area_t *area; |
| arl_t *arl; |
| uchar_t *hw_addr; |
| uint32_t hw_addr_len; |
| mblk_t *mp = mp_orig; |
| uchar_t *proto_addr; |
| int proto_addr_len; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| if (DB_TYPE(mp) == M_IOCTL) |
| mp = mp->b_cont; |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL) |
| return (EINVAL); |
| /* |
| * Newly received commands from clients go to the tail of the queue. |
| */ |
| if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { |
| DTRACE_PROBE3(squery_enqueued, queue_t *, q, mblk_t *, mp_orig, |
| arl_t *, arl); |
| ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_SQUERY, B_TRUE); |
| return (EINPROGRESS); |
| } |
| mp_orig->b_prev = NULL; |
| |
| /* Extract parameters from the request message. */ |
| area = (area_t *)mp->b_rptr; |
| proto_addr_len = area->area_proto_addr_length; |
| proto_addr = mi_offset_paramc(mp, area->area_proto_addr_offset, |
| proto_addr_len); |
| hw_addr_len = area->area_hw_addr_length; |
| hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len); |
| if (proto_addr == NULL || hw_addr == NULL) { |
| DTRACE_PROBE1(squery_illegal_address, area_t *, area); |
| return (EINVAL); |
| } |
| ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len); |
| if (ace == NULL) { |
| return (ENXIO); |
| } |
| if (hw_addr_len < ace->ace_hw_addr_length) { |
| return (EINVAL); |
| } |
| if (ACE_RESOLVED(ace)) { |
| /* Got it, prepare the response. */ |
| ASSERT(area->area_hw_addr_length == ace->ace_hw_addr_length); |
| ar_set_address(ace, hw_addr, proto_addr, proto_addr_len); |
| } else { |
| /* |
| * We have an incomplete entry. Set the length to zero and |
| * just return out the flags. |
| */ |
| area->area_hw_addr_length = 0; |
| } |
| area->area_flags = ace->ace_flags; |
| if (mp == mp_orig) { |
| /* Non-ioctl case */ |
| /* TODO: change message type? */ |
| DB_TYPE(mp) = M_CTL; /* Caught by ip_wput */ |
| DTRACE_PROBE3(squery_reply, queue_t *, q, mblk_t *, mp, |
| arl_t *, arl); |
| qreply(q, mp); |
| return (EINPROGRESS); |
| } |
| return (0); |
| } |
| |
| /* Process an interface down causing us to detach and unbind. */ |
| /* ARGSUSED */ |
| static int |
| ar_interface_down(queue_t *q, mblk_t *mp) |
| { |
| arl_t *arl; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL || arl->arl_closing) { |
| DTRACE_PROBE2(down_no_arl, queue_t *, q, mblk_t *, mp); |
| return (EINVAL); |
| } |
| |
| /* |
| * Newly received commands from clients go to the tail of the queue. |
| */ |
| if (CMD_NEEDS_QUEUEING(mp, arl)) { |
| DTRACE_PROBE3(down_enqueued, queue_t *, q, mblk_t *, mp, |
| arl_t *, arl); |
| ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_TRUE); |
| return (EINPROGRESS); |
| } |
| mp->b_prev = NULL; |
| /* |
| * The arl is already down, no work to do. |
| */ |
| if (arl->arl_state == ARL_S_DOWN) { |
| /* ar_rput frees the mp */ |
| return (0); |
| } |
| |
| /* |
| * This command cannot complete in a single shot now itself. |
| * It has to be restarted after the receipt of the ack from |
| * the driver. So we need to enqueue the command (at the head). |
| */ |
| ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_FALSE); |
| |
| ASSERT(arl->arl_state == ARL_S_UP); |
| |
| /* Free all arp entries for this interface */ |
| ar_ce_walk(as, ar_ce_delete_per_arl, arl); |
| |
| ar_ll_down(arl); |
| /* Return EINPROGRESS so that ar_rput does not free the 'mp' */ |
| return (EINPROGRESS); |
| } |
| |
| |
| /* Process an interface up causing the info req sequence to start. */ |
| /* ARGSUSED */ |
| static int |
| ar_interface_up(queue_t *q, mblk_t *mp) |
| { |
| arl_t *arl; |
| int err; |
| mblk_t *mp1; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL || arl->arl_closing) { |
| DTRACE_PROBE2(up_no_arl, queue_t *, q, mblk_t *, mp); |
| err = EINVAL; |
| goto done; |
| } |
| |
| /* |
| * Newly received commands from clients go to the tail of the queue. |
| */ |
| if (CMD_NEEDS_QUEUEING(mp, arl)) { |
| DTRACE_PROBE3(up_enqueued, queue_t *, q, mblk_t *, mp, |
| arl_t *, arl); |
| ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_TRUE); |
| return (EINPROGRESS); |
| } |
| mp->b_prev = NULL; |
| |
| /* |
| * The arl is already up. No work to do. |
| */ |
| if (arl->arl_state == ARL_S_UP) { |
| err = 0; |
| goto done; |
| } |
| |
| /* |
| * This command cannot complete in a single shot now itself. |
| * It has to be restarted after the receipt of the ack from |
| * the driver. So we need to enqueue the command (at the head). |
| */ |
| ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_FALSE); |
| |
| err = ar_ll_up(arl); |
| |
| /* Return EINPROGRESS so that ar_rput does not free the 'mp' */ |
| return (EINPROGRESS); |
| |
| done: |
| /* caller frees 'mp' */ |
| |
| mp1 = ar_alloc(AR_DLPIOP_DONE, err); |
| if (mp1 != NULL) { |
| q = WR(q); |
| DTRACE_PROBE3(up_send_err, queue_t *, q, mblk_t *, mp1, |
| int, err); |
| putnext(q, mp1); |
| } |
| return (err); |
| } |
| |
| /* |
| * Given an arie_t `mp', find the arl_t's that it names and return them |
| * in `*arlp' and `*ipmp_arlp'. If they cannot be found, return B_FALSE. |
| */ |
| static boolean_t |
| ar_ipmp_lookup(arp_stack_t *as, mblk_t *mp, arl_t **arlp, arl_t **ipmp_arlp) |
| { |
| arie_t *arie = (arie_t *)mp->b_rptr; |
| |
| *arlp = ar_ll_lookup_from_mp(as, mp); |
| if (*arlp == NULL) { |
| DTRACE_PROBE1(ipmp_lookup_no_arl, mblk_t *, mp); |
| return (B_FALSE); |
| } |
| |
| arie->arie_grifname[LIFNAMSIZ - 1] = '\0'; |
| *ipmp_arlp = ar_ll_lookup_by_name(as, arie->arie_grifname); |
| if (*ipmp_arlp == NULL) { |
| DTRACE_PROBE1(ipmp_lookup_no_ipmp_arl, mblk_t *, mp); |
| return (B_FALSE); |
| } |
| |
| DTRACE_PROBE2(ipmp_lookup, arl_t *, *arlp, arl_t *, *ipmp_arlp); |
| return (B_TRUE); |
| } |
| |
| /* |
| * Bind an arl_t to an IPMP group arl_t. |
| */ |
| static int |
| ar_ipmp_activate(queue_t *q, mblk_t *mp) |
| { |
| arl_t *arl, *ipmp_arl; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| if (!ar_ipmp_lookup(as, mp, &arl, &ipmp_arl)) |
| return (EINVAL); |
| |
| if (arl->arl_ipmp_arl != NULL) { |
| DTRACE_PROBE1(ipmp_activated_already, arl_t *, arl); |
| return (EALREADY); |
| } |
| |
| DTRACE_PROBE2(ipmp_activate, arl_t *, arl, arl_t *, ipmp_arl); |
| arl->arl_ipmp_arl = ipmp_arl; |
| return (0); |
| } |
| |
| /* |
| * Unbind an arl_t from an IPMP group arl_t and update the ace_t's so |
| * that it is no longer part of the group. |
| */ |
| static int |
| ar_ipmp_deactivate(queue_t *q, mblk_t *mp) |
| { |
| arl_t *arl, *ipmp_arl; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| if (!ar_ipmp_lookup(as, mp, &arl, &ipmp_arl)) |
| return (EINVAL); |
| |
| if (ipmp_arl != arl->arl_ipmp_arl) { |
| DTRACE_PROBE2(ipmp_deactivate_notactive, arl_t *, arl, arl_t *, |
| ipmp_arl); |
| return (EINVAL); |
| } |
| |
| DTRACE_PROBE2(ipmp_deactivate, arl_t *, arl, arl_t *, |
| arl->arl_ipmp_arl); |
| ar_ce_walk(as, ar_ce_ipmp_deactivate, arl); |
| arl->arl_ipmp_arl = NULL; |
| return (0); |
| } |
| |
| /* |
| * Enable an interface to process ARP_REQUEST and ARP_RESPONSE messages. |
| */ |
| /* ARGSUSED */ |
| static int |
| ar_interface_on(queue_t *q, mblk_t *mp) |
| { |
| arl_t *arl; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL) { |
| DTRACE_PROBE2(on_no_arl, queue_t *, q, mblk_t *, mp); |
| return (EINVAL); |
| } |
| |
| DTRACE_PROBE3(on_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl); |
| arl->arl_flags &= ~ARL_F_NOARP; |
| return (0); |
| } |
| |
| /* |
| * Disable an interface from processing |
| * ARP_REQUEST and ARP_RESPONSE messages |
| */ |
| /* ARGSUSED */ |
| static int |
| ar_interface_off(queue_t *q, mblk_t *mp) |
| { |
| arl_t *arl; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| |
| arl = ar_ll_lookup_from_mp(as, mp); |
| if (arl == NULL) { |
| DTRACE_PROBE2(off_no_arl, queue_t *, q, mblk_t *, mp); |
| return (EINVAL); |
| } |
| |
| DTRACE_PROBE3(off_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl); |
| arl->arl_flags |= ARL_F_NOARP; |
| return (0); |
| } |
| |
| /* |
| * The queue 'q' is closing. Walk all the arl's and free any message |
| * pending in the arl_queue if it originated from the closing q. |
| * Also cleanup the ip_pending_queue, if the arp-IP stream is closing. |
| */ |
| static void |
| ar_ll_cleanup_arl_queue(queue_t *q) |
| { |
| arl_t *arl; |
| mblk_t *mp; |
| mblk_t *mpnext; |
| mblk_t *prev; |
| arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; |
| ip_stack_t *ipst = as->as_netstack->netstack_ip; |
| |
| for (arl = as->as_arl_head; arl != NULL; arl = arl->arl_next) { |
| for (prev = NULL, mp = arl->arl_queue; mp != NULL; |
| mp = mpnext) { |
| mpnext = mp->b_next; |
| if ((void *)mp->b_queue == (void *)q || |
| (void *)mp->b_queue == (void *)OTHERQ(q)) { |
| if (prev == NULL) |
| arl->arl_queue = mp->b_next; |
| else |
| prev->b_next = mp->b_next; |
| if (arl->arl_queue_tail == mp) |
| arl->arl_queue_tail = prev; |
| if (DB_TYPE(mp) == M_PROTO && |
| *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) { |
| BUMP_IRE_STATS(ipst->ips_ire_stats_v4, |
| ire_stats_freed); |
| } |
| inet_freemsg(mp); |
| } else { |
| prev = mp; |
| } |
| } |
| } |
| } |
| |
| /* |
| * Look up a lower level tap by name. |
| */ |
| static arl_t * |
| ar_ll_lookup_by_name(arp_stack_t *as, const char *name) |
| { |
| arl_t *arl; |
| |
| for (arl = as->as_arl_head; arl; arl = arl->arl_next) { |
| if (strcmp(arl->arl_name, name) == 0) { |
| return (arl); |
| } |
| } |
| return (NULL); |
| } |
| |
| /* |
| * Look up a lower level tap using parameters extracted from the common |
| * portion of the ARP command. |
| */ |
| static arl_t * |
| ar_ll_lookup_from_mp(arp_stack_t *as, mblk_t *mp) |
| { |
| arc_t *arc = (arc_t *)mp->b_rptr; |
| uint8_t *name; |
| size_t namelen = arc->arc_name_length; |
| |
| name = mi_offset_param(mp, arc->arc_name_offset, namelen); |
| if (name == NULL || name[namelen - 1] != '\0') |
| return (NULL); |
| return (ar_ll_lookup_by_name(as, (char *)name)); |
| } |
| |
| static void |
| ar_ll_init(arp_stack_t *as, ar_t *ar, mblk_t *mp) |
| { |
| arl_t *arl; |
| dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; |
| |
| ASSERT(ar->ar_arl == NULL); |
| |
| if ((arl = (arl_t *)mi_zalloc(sizeof (arl_t))) == NULL) |
| return; |
| |
| if (dlia->dl_mac_type == SUNW_DL_IPMP) { |
| arl->arl_flags |= ARL_F_IPMP; |
| arl->arl_ipmp_arl = arl; |
| } |
| |
| arl->arl_provider_style = dlia->dl_provider_style; |
| arl->arl_rq = ar->ar_rq; |
| arl->arl_wq = ar->ar_wq; |
| |
| arl->arl_dlpi_pending = DL_PRIM_INVAL; |
| |
| ar->ar_arl = arl; |
| |
| /* |
| * If/when ARP gets pushed into the IP module then this code to make |
| * a number uniquely identify an ARP instance can be removed and the |
| * ifindex from IP used. Rather than try and reinvent or copy the |
| * code used by IP for the purpose of allocating an index number |
| * (and trying to keep the number small), just allocate it in an |
| * ever increasing manner. This index number isn't ever exposed to |
| * users directly, its only use is for providing the pfhooks interface |
| * with a number it can use to uniquely identify an interface in time. |
| * |
| * Using a 32bit counter, over 136 plumbs would need to be done every |
| * second of every day (non-leap year) for it to wrap around and the |
| * for() loop below to kick in as a performance concern. |
| */ |
| if (as->as_arp_counter_wrapped) { |
| arl_t *arl1; |
| |
| do { |
| for (arl1 = as->as_arl_head; arl1 != NULL; |
| arl1 = arl1->arl_next) |
| if (arl1->arl_index == |
| as->as_arp_index_counter) { |
| as->as_arp_index_counter++; |
| if (as->as_arp_index_counter == 0) { |
| as->as_arp_counter_wrapped++; |
| as->as_arp_index_counter = 1; |
| } |
| break; |
| } |
| } while (arl1 != NULL); |
| } else { |
| arl->arl_index = as->as_arp_index_counter; |
| } |
| as->as_arp_index_counter++; |
| if (as->as_arp_index_counter == 0) { |
| as->as_arp_counter_wrapped++; |
| as->as_arp_index_counter = 1; |
| } |
| } |
| |
| /* |
| * This routine is called during module initialization when the DL_INFO_ACK |
| * comes back from the device. We set up defaults for all the device dependent |
| * doo-dads we are going to need. This will leave us ready to roll if we are |
| * attempting auto-configuration. Alternatively, these defaults can be |
| * overridden by initialization procedures possessing higher intelligence. |
| */ |
| static void |
| ar_ll_set_defaults(arl_t *arl, mblk_t *mp) |
| { |
| ar_m_t *arm; |
| dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; |
| dl_unitdata_req_t *dlur; |
| uchar_t *up; |
| arlphy_t *ap; |
| |
| ASSERT(arl != NULL); |
| |
| /* |
| * Clear any stale defaults that might exist. |
| */ |
| ar_ll_clear_defaults(arl); |
| |
| if (arl->arl_flags & ARL_F_IPMP) { |
| /* |
| * If this is an IPMP arl_t, we have nothing to do, |
| * since we will never transmit or receive. |
| */ |
| return; |
| } |
| |
| ap = kmem_zalloc(sizeof (arlphy_t), KM_NOSLEEP); |
| if (ap == NULL) |
| goto bad; |
| arl->arl_phy = ap; |
| |
| if ((arm = ar_m_lookup(dlia->dl_mac_type)) == NULL) |
| arm = ar_m_lookup(DL_OTHER); |
| ASSERT(arm != NULL); |
| |
| /* |
| * We initialize based on parameters in the (currently) not too |
| * exhaustive ar_m_tbl. |
| */ |
| if (dlia->dl_version == DL_VERSION_2) { |
| /* XXX DLPI spec allows dl_sap_length of 0 before binding. */ |
| ap->ap_saplen = dlia->dl_sap_length; |
| ap->ap_hw_addrlen = dlia->dl_brdcst_addr_length; |
| } else { |
| ap->ap_saplen = arm->ar_mac_sap_length; |
| ap->ap_hw_addrlen = arm->ar_mac_hw_addr_length; |
| } |
| ap->ap_arp_hw_type = arm->ar_mac_arp_hw_type; |
| |
| /* |
| * Allocate the hardware and ARP addresses; note that the hardware |
| * address cannot be filled in until we see the DL_BIND_ACK. |
| */ |
| ap->ap_hw_addr = kmem_zalloc(ap->ap_hw_addrlen, KM_NOSLEEP); |
| ap->ap_arp_addr = kmem_alloc(ap->ap_hw_addrlen, KM_NOSLEEP); |
| if (ap->ap_hw_addr == NULL || ap->ap_arp_addr == NULL) |
| goto bad; |
| |
| if (dlia->dl_version == DL_VERSION_2) { |
| if ((up = mi_offset_param(mp, dlia->dl_brdcst_addr_offset, |
| ap->ap_hw_addrlen)) == NULL) |
| goto bad; |
| bcopy(up, ap->ap_arp_addr, ap->ap_hw_addrlen); |
| } else { |
| /* |
| * No choice but to assume a broadcast address of all ones, |
| * known to work on some popular networks. |
| */ |
| (void) memset(ap->ap_arp_addr, ~0, ap->ap_hw_addrlen); |
| } |
| |
| /* |
| * Make us a template DL_UNITDATA_REQ message which we will use for |
| * broadcasting resolution requests, and which we will clone to hand |
| * back as responses to the protocols. |
| */ |
| ap->ap_xmit_mp = ar_dlpi_comm(DL_UNITDATA_REQ, ap->ap_hw_addrlen + |
| ABS(ap->ap_saplen) + sizeof (dl_unitdata_req_t)); |
| if (ap->ap_xmit_mp == NULL) |
| goto bad; |
| |
| dlur = (dl_unitdata_req_t *)ap->ap_xmit_mp->b_rptr; |
| dlur->dl_priority.dl_min = 0; |
| dlur->dl_priority.dl_max = 0; |
| dlur->dl_dest_addr_length = ap->ap_hw_addrlen + ABS(ap->ap_saplen); |
| dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); |
| |
| /* NOTE: the destination address and sap offsets are permanently set */ |
| ap->ap_xmit_sapoff = dlur->dl_dest_addr_offset; |
| ap->ap_xmit_addroff = dlur->dl_dest_addr_offset; |
| if (ap->ap_saplen < 0) |
| ap->ap_xmit_sapoff += ap->ap_hw_addrlen; /* sap last */ |
| else |
| ap->ap_xmit_addroff += ap->ap_saplen; /* addr last */ |
| |
| *(uint16_t *)((caddr_t)dlur + ap->ap_xmit_sapoff) = ETHERTYPE_ARP; |
| return; |
| bad: |
| ar_ll_clear_defaults(arl); |
| } |
| |
| static void |
| ar_ll_clear_defaults(arl_t *arl) |
| { |
| arlphy_t *ap = arl->arl_phy; |
| |
| if (ap != NULL) { |
| arl->arl_phy = NULL; |
| if (ap->ap_hw_addr != NULL) |
| kmem_free(ap->ap_hw_addr, ap->ap_hw_addrlen); |
| if (ap->ap_arp_addr != NULL) |
| kmem_free(ap->ap_arp_addr, ap->ap_hw_addrlen); |
| freemsg(ap->ap_xmit_mp); |
| kmem_free(ap, sizeof (arlphy_t)); |
| } |
| } |
| |
| static void |
| ar_ll_down(arl_t *arl) |
| { |
| mblk_t *mp; |
| ar_t *ar; |
| |
| ASSERT(arl->arl_state == ARL_S_UP); |
| |
| /* Let's break the association between an ARL and IP instance */ |
| ar = (ar_t *)arl->arl_rq->q_ptr; |
| if (ar->ar_arl_ip_assoc != NULL) { |
| ASSERT(ar->ar_arl_ip_assoc->ar_arl_ip_assoc != NULL && |
| ar->ar_arl_ip_assoc->ar_arl_ip_assoc == ar); |
| ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL; |
| ar->ar_arl_ip_assoc = NULL; |
| } |
| |
| arl->arl_state = ARL_S_PENDING; |
| |
| mp = arl->arl_unbind_mp; |
| ASSERT(mp != NULL); |
| ar_dlpi_send(arl, mp); |
| arl->arl_unbind_mp = NULL; |
| |
| if (arl->arl_provider_style == DL_STYLE2) { |
| mp = arl->arl_detach_mp; |
| ASSERT(mp != NULL); |
| ar_dlpi_send(arl, mp); |
| arl->arl_detach_mp = NULL; |
| } |
| } |
| |
| static int |
| ar_ll_up(arl_t *arl) |
| { |
| mblk_t *attach_mp = NULL; |
| mblk_t *bind_mp = NULL; |
| mblk_t *detach_mp = NULL; |
| mblk_t *unbind_mp = NULL; |
| mblk_t *info_mp = NULL; |
| mblk_t *notify_mp = NULL; |
| |
| ASSERT(arl->arl_state == ARL_S_DOWN); |
| |
| if (arl->arl_provider_style == DL_STYLE2) { |
| attach_mp = |
| ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t)); |
| if (attach_mp == NULL) |
| goto bad; |
| ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = |
| arl->arl_ppa; |
| |
| detach_mp = |
| ar_dlpi_comm(DL_DETACH_REQ, sizeof (dl_detach_req_t)); |
| if (detach_mp == NULL) |
| goto bad; |
| } |
| |
| info_mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t)); |
| if (info_mp == NULL) |
| goto bad; |
| |
| /* Allocate and initialize a bind message. */ |
| bind_mp = ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t)); |
| if (bind_mp == NULL) |
| goto bad; |
| ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP; |
| ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; |
| |
| unbind_mp = ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t)); |
| if (unbind_mp == NULL) |
| goto bad; |
| |
| notify_mp = ar_dlpi_comm(DL_NOTIFY_REQ, sizeof (dl_notify_req_t)); |
| if (notify_mp == NULL) |
| goto bad; |
| ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = |
| DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; |
| |
| arl->arl_state = ARL_S_PENDING; |
| if (arl->arl_provider_style == DL_STYLE2) { |
| ar_dlpi_send(arl, attach_mp); |
| ASSERT(detach_mp |