| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. |
| */ |
| |
| /* |
| * MAC Services Module - misc utilities |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/mac.h> |
| #include <sys/mac_impl.h> |
| #include <sys/mac_client_priv.h> |
| #include <sys/mac_client_impl.h> |
| #include <sys/mac_soft_ring.h> |
| #include <sys/strsubr.h> |
| #include <sys/strsun.h> |
| #include <sys/vlan.h> |
| #include <sys/pattr.h> |
| #include <sys/pci_tools.h> |
| #include <inet/ip.h> |
| #include <inet/ip_impl.h> |
| #include <inet/ip6.h> |
| #include <sys/vtrace.h> |
| #include <sys/dlpi.h> |
| #include <sys/sunndi.h> |
| #include <inet/ipsec_impl.h> |
| #include <inet/sadb.h> |
| #include <inet/ipsecesp.h> |
| #include <inet/ipsecah.h> |
| |
| /* |
| * Copy an mblk, preserving its hardware checksum flags. |
| */ |
| static mblk_t * |
| mac_copymsg_cksum(mblk_t *mp) |
| { |
| mblk_t *mp1; |
| uint32_t start, stuff, end, value, flags; |
| |
| mp1 = copymsg(mp); |
| if (mp1 == NULL) |
| return (NULL); |
| |
| hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); |
| (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, |
| flags, KM_NOSLEEP); |
| |
| return (mp1); |
| } |
| |
| /* |
| * Copy an mblk chain, presenting the hardware checksum flags of the |
| * individual mblks. |
| */ |
| mblk_t * |
| mac_copymsgchain_cksum(mblk_t *mp) |
| { |
| mblk_t *nmp = NULL; |
| mblk_t **nmpp = &nmp; |
| |
| for (; mp != NULL; mp = mp->b_next) { |
| if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) { |
| freemsgchain(nmp); |
| return (NULL); |
| } |
| |
| nmpp = &((*nmpp)->b_next); |
| } |
| |
| return (nmp); |
| } |
| |
| /* |
| * Process the specified mblk chain for proper handling of hardware |
| * checksum offload. This routine is invoked for loopback traffic |
| * between MAC clients. |
| * The function handles a NULL mblk chain passed as argument. |
| */ |
| mblk_t * |
| mac_fix_cksum(mblk_t *mp_chain) |
| { |
| mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; |
| uint32_t flags, start, stuff, end, value; |
| |
| for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { |
| uint16_t len; |
| uint32_t offset; |
| struct ether_header *ehp; |
| uint16_t sap; |
| |
| hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, |
| &flags); |
| if (flags == 0) |
| continue; |
| |
| /* |
| * Since the processing of checksum offload for loopback |
| * traffic requires modification of the packet contents, |
| * ensure sure that we are always modifying our own copy. |
| */ |
| if (DB_REF(mp) > 1) { |
| mp1 = copymsg(mp); |
| if (mp1 == NULL) |
| continue; |
| mp1->b_next = mp->b_next; |
| mp->b_next = NULL; |
| freemsg(mp); |
| if (prev != NULL) |
| prev->b_next = mp1; |
| else |
| new_chain = mp1; |
| mp = mp1; |
| } |
| |
| /* |
| * Ethernet, and optionally VLAN header. |
| */ |
| /* LINTED: improper alignment cast */ |
| ehp = (struct ether_header *)mp->b_rptr; |
| if (ntohs(ehp->ether_type) == VLAN_TPID) { |
| struct ether_vlan_header *evhp; |
| |
| ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); |
| /* LINTED: improper alignment cast */ |
| evhp = (struct ether_vlan_header *)mp->b_rptr; |
| sap = ntohs(evhp->ether_type); |
| offset = sizeof (struct ether_vlan_header); |
| } else { |
| sap = ntohs(ehp->ether_type); |
| offset = sizeof (struct ether_header); |
| } |
| |
| if (MBLKL(mp) <= offset) { |
| offset -= MBLKL(mp); |
| if (mp->b_cont == NULL) { |
| /* corrupted packet, skip it */ |
| if (prev != NULL) |
| prev->b_next = mp->b_next; |
| else |
| new_chain = mp->b_next; |
| mp1 = mp->b_next; |
| mp->b_next = NULL; |
| freemsg(mp); |
| mp = mp1; |
| continue; |
| } |
| mp = mp->b_cont; |
| } |
| |
| if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { |
| ipha_t *ipha = NULL; |
| |
| /* |
| * In order to compute the full and header |
| * checksums, we need to find and parse |
| * the IP and/or ULP headers. |
| */ |
| |
| sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; |
| |
| /* |
| * IP header. |
| */ |
| if (sap != ETHERTYPE_IP) |
| continue; |
| |
| ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); |
| /* LINTED: improper alignment cast */ |
| ipha = (ipha_t *)(mp->b_rptr + offset); |
| |
| if (flags & HCK_FULLCKSUM) { |
| ipaddr_t src, dst; |
| uint32_t cksum; |
| uint16_t *up; |
| uint8_t proto; |
| |
| /* |
| * Pointer to checksum field in ULP header. |
| */ |
| proto = ipha->ipha_protocol; |
| ASSERT(ipha->ipha_version_and_hdr_length == |
| IP_SIMPLE_HDR_VERSION); |
| |
| switch (proto) { |
| case IPPROTO_TCP: |
| /* LINTED: improper alignment cast */ |
| up = IPH_TCPH_CHECKSUMP(ipha, |
| IP_SIMPLE_HDR_LENGTH); |
| break; |
| |
| case IPPROTO_UDP: |
| /* LINTED: improper alignment cast */ |
| up = IPH_UDPH_CHECKSUMP(ipha, |
| IP_SIMPLE_HDR_LENGTH); |
| break; |
| |
| default: |
| cmn_err(CE_WARN, "mac_fix_cksum: " |
| "unexpected protocol: %d", proto); |
| continue; |
| } |
| |
| /* |
| * Pseudo-header checksum. |
| */ |
| src = ipha->ipha_src; |
| dst = ipha->ipha_dst; |
| len = ntohs(ipha->ipha_length) - |
| IP_SIMPLE_HDR_LENGTH; |
| |
| cksum = (dst >> 16) + (dst & 0xFFFF) + |
| (src >> 16) + (src & 0xFFFF); |
| cksum += htons(len); |
| |
| /* |
| * The checksum value stored in the packet needs |
| * to be correct. Compute it here. |
| */ |
| *up = 0; |
| cksum += (((proto) == IPPROTO_UDP) ? |
| IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); |
| cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + |
| offset, cksum); |
| *(up) = (uint16_t)(cksum ? cksum : ~cksum); |
| |
| /* |
| * Flag the packet so that it appears |
| * that the checksum has already been |
| * verified by the hardware. |
| */ |
| flags &= ~HCK_FULLCKSUM; |
| flags |= HCK_FULLCKSUM_OK; |
| value = 0; |
| } |
| |
| if (flags & HCK_IPV4_HDRCKSUM) { |
| ASSERT(ipha != NULL); |
| ipha->ipha_hdr_checksum = |
| (uint16_t)ip_csum_hdr(ipha); |
| flags &= ~HCK_IPV4_HDRCKSUM; |
| flags |= HCK_IPV4_HDRCKSUM_OK; |
| |
| } |
| } |
| |
| if (flags & HCK_PARTIALCKSUM) { |
| uint16_t *up, partial, cksum; |
| uchar_t *ipp; /* ptr to beginning of IP header */ |
| |
| if (mp->b_cont != NULL) { |
| mblk_t *mp1; |
| |
| mp1 = msgpullup(mp, offset + end); |
| if (mp1 == NULL) |
| continue; |
| mp1->b_next = mp->b_next; |
| mp->b_next = NULL; |
| freemsg(mp); |
| if (prev != NULL) |
| prev->b_next = mp1; |
| else |
| new_chain = mp1; |
| mp = mp1; |
| } |
| |
| ipp = mp->b_rptr + offset; |
| /* LINTED: cast may result in improper alignment */ |
| up = (uint16_t *)((uchar_t *)ipp + stuff); |
| partial = *up; |
| *up = 0; |
| |
| cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, |
| end - start, partial); |
| cksum = ~cksum; |
| *up = cksum ? cksum : ~cksum; |
| |
| /* |
| * Since we already computed the whole checksum, |
| * indicate to the stack that it has already |
| * been verified by the hardware. |
| */ |
| flags &= ~HCK_PARTIALCKSUM; |
| flags |= HCK_FULLCKSUM_OK; |
| value = 0; |
| } |
| |
| (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, |
| value, flags, KM_NOSLEEP); |
| } |
| |
| return (new_chain); |
| } |
| |
| /* |
| * Add VLAN tag to the specified mblk. |
| */ |
| mblk_t * |
| mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid) |
| { |
| mblk_t *hmp; |
| struct ether_vlan_header *evhp; |
| struct ether_header *ehp; |
| uint32_t start, stuff, end, value, flags; |
| |
| ASSERT(pri != 0 || vid != 0); |
| |
| /* |
| * Allocate an mblk for the new tagged ethernet header, |
| * and copy the MAC addresses and ethertype from the |
| * original header. |
| */ |
| |
| hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); |
| if (hmp == NULL) { |
| freemsg(mp); |
| return (NULL); |
| } |
| |
| evhp = (struct ether_vlan_header *)hmp->b_rptr; |
| ehp = (struct ether_header *)mp->b_rptr; |
| |
| bcopy(ehp, evhp, (ETHERADDRL * 2)); |
| evhp->ether_type = ehp->ether_type; |
| evhp->ether_tpid = htons(ETHERTYPE_VLAN); |
| |
| hmp->b_wptr += sizeof (struct ether_vlan_header); |
| mp->b_rptr += sizeof (struct ether_header); |
| |
| /* |
| * Free the original message if it's now empty. Link the |
| * rest of messages to the header message. |
| */ |
| hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); |
| (void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags, |
| KM_NOSLEEP); |
| if (MBLKL(mp) == 0) { |
| hmp->b_cont = mp->b_cont; |
| freeb(mp); |
| } else { |
| hmp->b_cont = mp; |
| } |
| ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header)); |
| |
| /* |
| * Initialize the new TCI (Tag Control Information). |
| */ |
| evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid)); |
| |
| return (hmp); |
| } |
| |
| /* |
| * Adds a VLAN tag with the specified VID and priority to each mblk of |
| * the specified chain. |
| */ |
| mblk_t * |
| mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid) |
| { |
| mblk_t *next_mp, **prev, *mp; |
| |
| mp = mp_chain; |
| prev = &mp_chain; |
| |
| while (mp != NULL) { |
| next_mp = mp->b_next; |
| mp->b_next = NULL; |
| if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) { |
| freemsgchain(next_mp); |
| break; |
| } |
| *prev = mp; |
| prev = &mp->b_next; |
| mp = mp->b_next = next_mp; |
| } |
| |
| return (mp_chain); |
| } |
| |
| /* |
| * Strip VLAN tag |
| */ |
| mblk_t * |
| mac_strip_vlan_tag(mblk_t *mp) |
| { |
| mblk_t *newmp; |
| struct ether_vlan_header *evhp; |
| |
| evhp = (struct ether_vlan_header *)mp->b_rptr; |
| if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { |
| ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); |
| |
| if (DB_REF(mp) > 1) { |
| newmp = copymsg(mp); |
| if (newmp == NULL) |
| return (NULL); |
| freemsg(mp); |
| mp = newmp; |
| } |
| |
| evhp = (struct ether_vlan_header *)mp->b_rptr; |
| |
| ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); |
| mp->b_rptr += VLAN_TAGSZ; |
| } |
| return (mp); |
| } |
| |
| /* |
| * Strip VLAN tag from each mblk of the chain. |
| */ |
| mblk_t * |
| mac_strip_vlan_tag_chain(mblk_t *mp_chain) |
| { |
| mblk_t *mp, *next_mp, **prev; |
| |
| mp = mp_chain; |
| prev = &mp_chain; |
| |
| while (mp != NULL) { |
| next_mp = mp->b_next; |
| mp->b_next = NULL; |
| if ((mp = mac_strip_vlan_tag(mp)) == NULL) { |
| freemsgchain(next_mp); |
| break; |
| } |
| *prev = mp; |
| prev = &mp->b_next; |
| mp = mp->b_next = next_mp; |
| } |
| |
| return (mp_chain); |
| } |
| |
| /* |
| * Default callback function. Used when the datapath is not yet initialized. |
| */ |
| /* ARGSUSED */ |
| void |
| mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, |
| boolean_t loopback) |
| { |
| mblk_t *mp1 = mp; |
| |
| while (mp1 != NULL) { |
| mp1->b_prev = NULL; |
| mp1->b_queue = NULL; |
| mp1 = mp1->b_next; |
| } |
| freemsgchain(mp); |
| } |
| |
| /* |
| * Determines the IPv6 header length accounting for all the optional IPv6 |
| * headers (hop-by-hop, destination, routing and fragment). The header length |
| * and next header value (a transport header) is captured. |
| * |
| * Returns B_FALSE if all the IP headers are not in the same mblk otherwise |
| * returns B_TRUE. |
| */ |
| boolean_t |
| mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length, |
| uint8_t *next_hdr, ip6_frag_t **fragp) |
| { |
| uint16_t length; |
| uint_t ehdrlen; |
| uint8_t *whereptr; |
| uint8_t *nexthdrp; |
| ip6_dest_t *desthdr; |
| ip6_rthdr_t *rthdr; |
| ip6_frag_t *fraghdr; |
| |
| if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) |
| return (B_FALSE); |
| ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); |
| length = IPV6_HDR_LEN; |
| whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ |
| |
| if (fragp != NULL) |
| *fragp = NULL; |
| |
| nexthdrp = &ip6h->ip6_nxt; |
| while (whereptr < endptr) { |
| /* Is there enough left for len + nexthdr? */ |
| if (whereptr + MIN_EHDR_LEN > endptr) |
| break; |
| |
| switch (*nexthdrp) { |
| case IPPROTO_HOPOPTS: |
| case IPPROTO_DSTOPTS: |
| /* Assumes the headers are identical for hbh and dst */ |
| desthdr = (ip6_dest_t *)whereptr; |
| ehdrlen = 8 * (desthdr->ip6d_len + 1); |
| if ((uchar_t *)desthdr + ehdrlen > endptr) |
| return (B_FALSE); |
| nexthdrp = &desthdr->ip6d_nxt; |
| break; |
| case IPPROTO_ROUTING: |
| rthdr = (ip6_rthdr_t *)whereptr; |
| ehdrlen = 8 * (rthdr->ip6r_len + 1); |
| if ((uchar_t *)rthdr + ehdrlen > endptr) |
| return (B_FALSE); |
| nexthdrp = &rthdr->ip6r_nxt; |
| break; |
| case IPPROTO_FRAGMENT: |
| fraghdr = (ip6_frag_t *)whereptr; |
| ehdrlen = sizeof (ip6_frag_t); |
| if ((uchar_t *)&fraghdr[1] > endptr) |
| return (B_FALSE); |
| nexthdrp = &fraghdr->ip6f_nxt; |
| if (fragp != NULL) |
| *fragp = fraghdr; |
| break; |
| case IPPROTO_NONE: |
| /* No next header means we're finished */ |
| default: |
| *hdr_length = length; |
| *next_hdr = *nexthdrp; |
| return (B_TRUE); |
| } |
| length += ehdrlen; |
| whereptr += ehdrlen; |
| *hdr_length = length; |
| *next_hdr = *nexthdrp; |
| } |
| switch (*nexthdrp) { |
| case IPPROTO_HOPOPTS: |
| case IPPROTO_DSTOPTS: |
| case IPPROTO_ROUTING: |
| case IPPROTO_FRAGMENT: |
| /* |
| * If any know extension headers are still to be processed, |
| * the packet's malformed (or at least all the IP header(s) are |
| * not in the same mblk - and that should never happen. |
| */ |
| return (B_FALSE); |
| |
| default: |
| /* |
| * If we get here, we know that all of the IP headers were in |
| * the same mblk, even if the ULP header is in the next mblk. |
| */ |
| *hdr_length = length; |
| *next_hdr = *nexthdrp; |
| return (B_TRUE); |
| } |
| } |
| |
| /* |
| * The following set of routines are there to take care of interrupt |
| * re-targeting for legacy (fixed) interrupts. Some older versions |
| * of the popular NICs like e1000g do not support MSI-X interrupts |
| * and they reserve fixed interrupts for RX/TX rings. To re-target |
| * these interrupts, PCITOOL ioctls need to be used. |
| */ |
| typedef struct mac_dladm_intr { |
| int ino; |
| int cpu_id; |
| char driver_path[MAXPATHLEN]; |
| char nexus_path[MAXPATHLEN]; |
| } mac_dladm_intr_t; |
| |
| /* Bind the interrupt to cpu_num */ |
| static int |
| mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int oldcpuid, int ino) |
| { |
| pcitool_intr_set_t iset; |
| int err; |
| |
| iset.old_cpu = oldcpuid; |
| iset.ino = ino; |
| iset.cpu_id = cpu_num; |
| iset.user_version = PCITOOL_VERSION; |
| err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL, |
| kcred, NULL); |
| |
| return (err); |
| } |
| |
| /* |
| * Search interrupt information. iget is filled in with the info to search |
| */ |
| static boolean_t |
| mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln) |
| { |
| int i; |
| char driver_path[2 * MAXPATHLEN]; |
| |
| for (i = 0; i < iget_p->num_devs; i++) { |
| (void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN); |
| (void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN, |
| ":%s%d", iget_p->dev[i].driver_name, |
| iget_p->dev[i].dev_inst); |
| /* Match the device path for the device path */ |
| if (strcmp(driver_path, dln->driver_path) == 0) { |
| dln->ino = iget_p->ino; |
| dln->cpu_id = iget_p->cpu_id; |
| return (B_TRUE); |
| } |
| } |
| return (B_FALSE); |
| } |
| |
| /* |
| * Get information about ino, i.e. if this is the interrupt for our |
| * device and where it is bound etc. |
| */ |
| static boolean_t |
| mac_get_single_intr(ldi_handle_t lh, int oldcpuid, int ino, |
| mac_dladm_intr_t *dln) |
| { |
| pcitool_intr_get_t *iget_p; |
| int ipsz; |
| int nipsz; |
| int err; |
| uint8_t inum; |
| |
| /* |
| * Check if SLEEP is OK, i.e if could come here in response to |
| * changing the fanout due to some callback from the driver, say |
| * link speed changes. |
| */ |
| ipsz = PCITOOL_IGET_SIZE(0); |
| iget_p = kmem_zalloc(ipsz, KM_SLEEP); |
| |
| iget_p->num_devs_ret = 0; |
| iget_p->user_version = PCITOOL_VERSION; |
| iget_p->cpu_id = oldcpuid; |
| iget_p->ino = ino; |
| |
| err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, |
| FKIOCTL, kcred, NULL); |
| if (err != 0) { |
| kmem_free(iget_p, ipsz); |
| return (B_FALSE); |
| } |
| if (iget_p->num_devs == 0) { |
| kmem_free(iget_p, ipsz); |
| return (B_FALSE); |
| } |
| inum = iget_p->num_devs; |
| if (iget_p->num_devs_ret < iget_p->num_devs) { |
| /* Reallocate */ |
| nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs); |
| |
| kmem_free(iget_p, ipsz); |
| ipsz = nipsz; |
| iget_p = kmem_zalloc(ipsz, KM_SLEEP); |
| |
| iget_p->num_devs_ret = inum; |
| iget_p->cpu_id = oldcpuid; |
| iget_p->ino = ino; |
| iget_p->user_version = PCITOOL_VERSION; |
| err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, |
| FKIOCTL, kcred, NULL); |
| if (err != 0) { |
| kmem_free(iget_p, ipsz); |
| return (B_FALSE); |
| } |
| /* defensive */ |
| if (iget_p->num_devs != iget_p->num_devs_ret) { |
| kmem_free(iget_p, ipsz); |
| return (B_FALSE); |
| } |
| } |
| |
| if (mac_search_intrinfo(iget_p, dln)) { |
| kmem_free(iget_p, ipsz); |
| return (B_TRUE); |
| } |
| kmem_free(iget_p, ipsz); |
| return (B_FALSE); |
| } |
| |
| /* |
| * Get the interrupts and check each one to see if it is for our device. |
| */ |
| static int |
| mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid) |
| { |
| pcitool_intr_info_t intr_info; |
| int err; |
| int ino; |
| int oldcpuid; |
| |
| err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info, |
| FKIOCTL, kcred, NULL); |
| if (err != 0) |
| return (-1); |
| |
| for (oldcpuid = 0; oldcpuid < intr_info.num_cpu; oldcpuid++) { |
| for (ino = 0; ino < intr_info.num_intr; ino++) { |
| if (mac_get_single_intr(lh, oldcpuid, ino, dln)) { |
| if (dln->cpu_id == cpuid) |
| return (0); |
| return (1); |
| } |
| } |
| } |
| return (-1); |
| } |
| |
| /* |
| * Obtain the nexus parent node info. for mdip. |
| */ |
| static dev_info_t * |
| mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln) |
| { |
| struct dev_info *tdip = (struct dev_info *)mdip; |
| struct ddi_minor_data *minordata; |
| int circ; |
| dev_info_t *pdip; |
| char pathname[MAXPATHLEN]; |
| |
| while (tdip != NULL) { |
| /* |
| * The netboot code could call this function while walking the |
| * device tree so we need to use ndi_devi_tryenter() here to |
| * avoid deadlock. |
| */ |
| if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0) |
| break; |
| |
| for (minordata = tdip->devi_minor; minordata != NULL; |
| minordata = minordata->next) { |
| if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL, |
| strlen(DDI_NT_INTRCTL)) == 0) { |
| pdip = minordata->dip; |
| (void) ddi_pathname(pdip, pathname); |
| (void) snprintf(dln->nexus_path, MAXPATHLEN, |
| "/devices%s:intr", pathname); |
| (void) ddi_pathname_minor(minordata, pathname); |
| ndi_devi_exit((dev_info_t *)tdip, circ); |
| return (pdip); |
| } |
| } |
| ndi_devi_exit((dev_info_t *)tdip, circ); |
| tdip = tdip->devi_parent; |
| } |
| return (NULL); |
| } |
| |
| /* |
| * For a primary MAC client, if the user has set a list or CPUs or |
| * we have obtained it implicitly, we try to retarget the interrupt |
| * for that device on one of the CPUs in the list. |
| * We assign the interrupt to the same CPU as the poll thread. |
| */ |
| static boolean_t |
| mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid) |
| { |
| ldi_handle_t lh = NULL; |
| ldi_ident_t li = NULL; |
| int err; |
| int ret; |
| mac_dladm_intr_t dln; |
| dev_info_t *dip; |
| struct ddi_minor_data *minordata; |
| |
| dln.nexus_path[0] = '\0'; |
| dln.driver_path[0] = '\0'; |
| |
| minordata = ((struct dev_info *)mdip)->devi_minor; |
| while (minordata != NULL) { |
| if (minordata->type == DDM_MINOR) |
| break; |
| minordata = minordata->next; |
| } |
| if (minordata == NULL) |
| return (B_FALSE); |
| |
| (void) ddi_pathname_minor(minordata, dln.driver_path); |
| |
| dip = mac_get_nexus_node(mdip, &dln); |
| /* defensive */ |
| if (dip == NULL) |
| return (B_FALSE); |
| |
| err = ldi_ident_from_major(ddi_driver_major(dip), &li); |
| if (err != 0) |
| return (B_FALSE); |
| |
| err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li); |
| if (err != 0) |
| return (B_FALSE); |
| |
| ret = mac_validate_intr(lh, &dln, cpuid); |
| if (ret < 0) { |
| (void) ldi_close(lh, FREAD|FWRITE, kcred); |
| return (B_FALSE); |
| } |
| /* cmn_note? */ |
| if (ret != 0) |
| if ((err = (mac_set_intr(lh, cpuid, dln.cpu_id, dln.ino))) |
| != 0) { |
| (void) ldi_close(lh, FREAD|FWRITE, kcred); |
| return (B_FALSE); |
| } |
| (void) ldi_close(lh, FREAD|FWRITE, kcred); |
| return (B_TRUE); |
| } |
| |
| void |
| mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) |
| { |
| dev_info_t *mdip = (dev_info_t *)arg; |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_resource_props_t *mrp; |
| mac_perim_handle_t mph; |
| flow_entry_t *flent = mcip->mci_flent; |
| mac_soft_ring_set_t *rx_srs; |
| mac_cpus_t *srs_cpu; |
| |
| if (!mac_check_interrupt_binding(mdip, cpuid)) |
| cpuid = -1; |
| mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); |
| mrp = MCIP_RESOURCE_PROPS(mcip); |
| mrp->mrp_rx_intr_cpu = cpuid; |
| if (flent != NULL && flent->fe_rx_srs_cnt == 2) { |
| rx_srs = flent->fe_rx_srs[1]; |
| srs_cpu = &rx_srs->srs_cpu; |
| srs_cpu->mc_rx_intr_cpu = cpuid; |
| } |
| mac_perim_exit(mph); |
| } |
| |
| int32_t |
| mac_client_intr_cpu(mac_client_handle_t mch) |
| { |
| mac_client_impl_t *mcip = (mac_client_impl_t *)mch; |
| mac_cpus_t *srs_cpu; |
| mac_soft_ring_set_t *rx_srs; |
| flow_entry_t *flent = mcip->mci_flent; |
| mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); |
| mac_ring_t *ring; |
| mac_intr_t *mintr; |
| |
| /* |
| * Check if we need to retarget the interrupt. We do this only |
| * for the primary MAC client. We do this if we have the only |
| * exclusive ring in the group. |
| */ |
| if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { |
| rx_srs = flent->fe_rx_srs[1]; |
| srs_cpu = &rx_srs->srs_cpu; |
| ring = rx_srs->srs_ring; |
| mintr = &ring->mr_info.mri_intr; |
| /* |
| * If ddi_handle is present or the poll CPU is |
| * already bound to the interrupt CPU, return -1. |
| */ |
| if (mintr->mi_ddi_handle != NULL || |
| ((mrp->mrp_ncpus != 0) && |
| (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) { |
| return (-1); |
| } |
| return (srs_cpu->mc_rx_pollid); |
| } |
| return (-1); |
| } |
| |
| void * |
| mac_get_devinfo(mac_handle_t mh) |
| { |
| mac_impl_t *mip = (mac_impl_t *)mh; |
| |
| return ((void *)mip->mi_dip); |
| } |
| |
| #define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1]) |
| #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) |
| #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) |
| |
| uint64_t |
| mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) |
| { |
| struct ether_header *ehp; |
| uint64_t hash = 0; |
| uint16_t sap; |
| uint_t skip_len; |
| uint8_t proto; |
| boolean_t ip_fragmented; |
| |
| /* |
| * We may want to have one of these per MAC type plugin in the |
| * future. For now supports only ethernet. |
| */ |
| if (media != DL_ETHER) |
| return (0L); |
| |
| /* for now we support only outbound packets */ |
| ASSERT(is_outbound); |
| ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); |
| ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); |
| |
| /* compute L2 hash */ |
| |
| ehp = (struct ether_header *)mp->b_rptr; |
| |
| if ((policy & MAC_PKT_HASH_L2) != 0) { |
| uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; |
| uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; |
| hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); |
| policy &= ~MAC_PKT_HASH_L2; |
| } |
| |
| if (policy == 0) |
| goto done; |
| |
| /* skip ethernet header */ |
| |
| sap = ntohs(ehp->ether_type); |
| if (sap == ETHERTYPE_VLAN) { |
| struct ether_vlan_header *evhp; |
| mblk_t *newmp = NULL; |
| |
| skip_len = sizeof (struct ether_vlan_header); |
| if (MBLKL(mp) < skip_len) { |
| /* the vlan tag is the payload, pull up first */ |
| newmp = msgpullup(mp, -1); |
| if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { |
| goto done; |
| } |
| evhp = (struct ether_vlan_header *)newmp->b_rptr; |
| } else { |
| evhp = (struct ether_vlan_header *)mp->b_rptr; |
| } |
| |
| sap = ntohs(evhp->ether_type); |
| freemsg(newmp); |
| } else { |
| skip_len = sizeof (struct ether_header); |
| } |
| |
| /* if ethernet header is in its own mblk, skip it */ |
| if (MBLKL(mp) <= skip_len) { |
| skip_len -= MBLKL(mp); |
| mp = mp->b_cont; |
| if (mp == NULL) |
| goto done; |
| } |
| |
| sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; |
| |
| /* compute IP src/dst addresses hash and skip IPv{4,6} header */ |
| |
| switch (sap) { |
| case ETHERTYPE_IP: { |
| ipha_t *iphp; |
| |
| /* |
| * If the header is not aligned or the header doesn't fit |
| * in the mblk, bail now. Note that this may cause packets |
| * reordering. |
| */ |
| iphp = (ipha_t *)(mp->b_rptr + skip_len); |
| if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || |
| !OK_32PTR((char *)iphp)) |
| goto done; |
| |
| proto = iphp->ipha_protocol; |
| skip_len += IPH_HDR_LENGTH(iphp); |
| |
| /* Check if the packet is fragmented. */ |
| ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) & |
| IPH_OFFSET; |
| |
| /* |
| * For fragmented packets, use addresses in addition to |
| * the frag_id to generate the hash inorder to get |
| * better distribution. |
| */ |
| if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { |
| uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); |
| uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); |
| |
| hash ^= (PKT_HASH_4BYTES(ip_src) ^ |
| PKT_HASH_4BYTES(ip_dst)); |
| policy &= ~MAC_PKT_HASH_L3; |
| } |
| |
| if (ip_fragmented) { |
| uint8_t *identp = (uint8_t *)&iphp->ipha_ident; |
| hash ^= PKT_HASH_2BYTES(identp); |
| goto done; |
| } |
| break; |
| } |
| case ETHERTYPE_IPV6: { |
| ip6_t *ip6hp; |
| ip6_frag_t *frag = NULL; |
| uint16_t hdr_length; |
| |
| /* |
| * If the header is not aligned or the header doesn't fit |
| * in the mblk, bail now. Note that this may cause packets |
| * reordering. |
| */ |
| |
| ip6hp = (ip6_t *)(mp->b_rptr + skip_len); |
| if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || |
| !OK_32PTR((char *)ip6hp)) |
| goto done; |
| |
| if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length, |
| &proto, &frag)) |
| goto done; |
| skip_len += hdr_length; |
| |
| /* |
| * For fragmented packets, use addresses in addition to |
| * the frag_id to generate the hash inorder to get |
| * better distribution. |
| */ |
| if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) { |
| uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); |
| uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); |
| |
| hash ^= (PKT_HASH_4BYTES(ip_src) ^ |
| PKT_HASH_4BYTES(ip_dst)); |
| policy &= ~MAC_PKT_HASH_L3; |
| } |
| |
| if (frag != NULL) { |
| uint8_t *identp = (uint8_t *)&frag->ip6f_ident; |
| hash ^= PKT_HASH_4BYTES(identp); |
| goto done; |
| } |
| break; |
| } |
| default: |
| goto done; |
| } |
| |
| if (policy == 0) |
| goto done; |
| |
| /* if ip header is in its own mblk, skip it */ |
| if (MBLKL(mp) <= skip_len) { |
| skip_len -= MBLKL(mp); |
| mp = mp->b_cont; |
| if (mp == NULL) |
| goto done; |
| } |
| |
| /* parse ULP header */ |
| again: |
| switch (proto) { |
| case IPPROTO_TCP: |
| case IPPROTO_UDP: |
| case IPPROTO_ESP: |
| case IPPROTO_SCTP: |
| /* |
| * These Internet Protocols are intentionally designed |
| * for hashing from the git-go. Port numbers are in the first |
| * word for transports, SPI is first for ESP. |
| */ |
| if (mp->b_rptr + skip_len + 4 > mp->b_wptr) |
| goto done; |
| hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); |
| break; |
| |
| case IPPROTO_AH: { |
| ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); |
| uint_t ah_length = AH_TOTAL_LEN(ah); |
| |
| if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) |
| goto done; |
| |
| proto = ah->ah_nexthdr; |
| skip_len += ah_length; |
| |
| /* if AH header is in its own mblk, skip it */ |
| if (MBLKL(mp) <= skip_len) { |
| skip_len -= MBLKL(mp); |
| mp = mp->b_cont; |
| if (mp == NULL) |
| goto done; |
| } |
| |
| goto again; |
| } |
| } |
| |
| done: |
| return (hash); |
| } |