| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| |
| /* |
| * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. |
| */ |
| |
| /* |
| * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. |
| * Copyright 2012 Nexenta Systems, Inc. All rights reserved. |
| * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. |
| * Copyright 2017 Joyent, Inc. |
| */ |
| |
| #include "ixgbe_sw.h" |
| |
| static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, |
| uint32_t, boolean_t); |
| static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, |
| uint32_t); |
| static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, |
| ixgbe_tx_context_t *, size_t); |
| static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); |
| static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); |
| |
| static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); |
| static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, |
| ixgbe_tx_context_t *); |
| static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, |
| ixgbe_tx_context_t *); |
| |
| #ifndef IXGBE_DEBUG |
| #pragma inline(ixgbe_save_desc) |
| #pragma inline(ixgbe_get_context) |
| #pragma inline(ixgbe_check_context) |
| #pragma inline(ixgbe_fill_context) |
| #endif |
| |
| /* |
| * ixgbe_ring_tx |
| * |
| * To transmit one mblk through one specified ring. |
| * |
| * One mblk can consist of several fragments, each fragment |
| * will be processed with different methods based on the size. |
| * For the fragments with size less than the bcopy threshold, |
| * they will be processed by using bcopy; otherwise, they will |
| * be processed by using DMA binding. |
| * |
| * To process the mblk, a tx control block is got from the |
| * free list. One tx control block contains one tx buffer, which |
| * is used to copy mblk fragments' data; and one tx DMA handle, |
| * which is used to bind a mblk fragment with DMA resource. |
| * |
| * Several small mblk fragments can be copied into one tx control |
| * block's buffer, and then the buffer will be transmitted with |
| * one tx descriptor. |
| * |
| * A large fragment only binds with one tx control block's DMA |
| * handle, and it can span several tx descriptors for transmitting. |
| * |
| * So to transmit a packet (mblk), several tx control blocks can |
| * be used. After the processing, those tx control blocks will |
| * be put to the work list. |
| */ |
| mblk_t * |
| ixgbe_ring_tx(void *arg, mblk_t *mp) |
| { |
| ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; |
| ixgbe_t *ixgbe = tx_ring->ixgbe; |
| tx_type_t current_flag, next_flag; |
| uint32_t current_len, next_len; |
| uint32_t desc_total; |
| size_t mbsize; |
| int desc_num; |
| boolean_t copy_done, eop; |
| mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; |
| tx_control_block_t *tcb; |
| ixgbe_tx_context_t tx_context, *ctx; |
| link_list_t pending_list; |
| uint32_t len, hdr_frag_len, hdr_len; |
| uint32_t copy_thresh; |
| mblk_t *hdr_new_mp = NULL; |
| mblk_t *hdr_pre_mp = NULL; |
| mblk_t *hdr_nmp = NULL; |
| |
| ASSERT(mp->b_next == NULL); |
| |
| if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || |
| (ixgbe->ixgbe_state & IXGBE_ERROR) || |
| (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || |
| !(ixgbe->ixgbe_state & IXGBE_STARTED) || |
| ixgbe->link_state != LINK_STATE_UP) { |
| freemsg(mp); |
| return (NULL); |
| } |
| |
| copy_thresh = ixgbe->tx_copy_thresh; |
| |
| /* Get the mblk size */ |
| mbsize = 0; |
| for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { |
| mbsize += MBLKL(nmp); |
| } |
| |
| if (ixgbe->tx_hcksum_enable) { |
| /* |
| * Retrieve checksum context information from the mblk |
| * that will be used to decide whether/how to fill the |
| * context descriptor. |
| */ |
| ctx = &tx_context; |
| if (ixgbe_get_context(mp, ctx) < 0) { |
| freemsg(mp); |
| return (NULL); |
| } |
| |
| /* |
| * If the mblk size exceeds the max size ixgbe could |
| * process, then discard this mblk, and return NULL. |
| */ |
| if ((ctx->lso_flag && |
| ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || |
| (!ctx->lso_flag && |
| (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { |
| freemsg(mp); |
| IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); |
| return (NULL); |
| } |
| } else { |
| ctx = NULL; |
| } |
| |
| /* |
| * Check and recycle tx descriptors. |
| * The recycle threshold here should be selected carefully |
| */ |
| if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { |
| tx_ring->tx_recycle(tx_ring); |
| } |
| |
| /* |
| * After the recycling, if the tbd_free is less than the |
| * overload_threshold, assert overload, return mp; |
| * and we need to re-schedule the tx again. |
| */ |
| if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { |
| tx_ring->reschedule = B_TRUE; |
| tx_ring->stat_overload++; |
| return (mp); |
| } |
| |
| /* |
| * The pending_list is a linked list that is used to save |
| * the tx control blocks that have packet data processed |
| * but have not put the data to the tx descriptor ring. |
| * It is used to reduce the lock contention of the tx_lock. |
| */ |
| LINK_LIST_INIT(&pending_list); |
| desc_num = 0; |
| desc_total = 0; |
| |
| /* |
| * The software should guarantee LSO packet header(MAC+IP+TCP) |
| * to be within one descriptor. Here we reallocate and refill the |
| * the header if it's physical memory non-contiguous. |
| */ |
| if ((ctx != NULL) && ctx->lso_flag) { |
| /* find the last fragment of the header */ |
| len = MBLKL(mp); |
| ASSERT(len > 0); |
| hdr_nmp = mp; |
| hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; |
| while (len < hdr_len) { |
| hdr_pre_mp = hdr_nmp; |
| hdr_nmp = hdr_nmp->b_cont; |
| len += MBLKL(hdr_nmp); |
| } |
| /* |
| * If the header and the payload are in different mblks, |
| * we simply force the header to be copied into pre-allocated |
| * page-aligned buffer. |
| */ |
| if (len == hdr_len) |
| goto adjust_threshold; |
| |
| hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); |
| /* |
| * There are two cases we need to reallocate a mblk for the |
| * last header fragment: |
| * 1. the header is in multiple mblks and the last fragment |
| * share the same mblk with the payload |
| * 2. the header is in a single mblk shared with the payload |
| * and the header is physical memory non-contiguous |
| */ |
| if ((hdr_nmp != mp) || |
| (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) |
| < hdr_len)) { |
| tx_ring->stat_lso_header_fail++; |
| /* |
| * reallocate the mblk for the last header fragment, |
| * expect to bcopy into pre-allocated page-aligned |
| * buffer |
| */ |
| hdr_new_mp = allocb(hdr_frag_len, NULL); |
| if (!hdr_new_mp) |
| return (mp); |
| bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, |
| hdr_frag_len); |
| /* link the new header fragment with the other parts */ |
| hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; |
| hdr_new_mp->b_cont = hdr_nmp; |
| if (hdr_pre_mp) |
| hdr_pre_mp->b_cont = hdr_new_mp; |
| else |
| mp = hdr_new_mp; |
| hdr_nmp->b_rptr += hdr_frag_len; |
| } |
| adjust_threshold: |
| /* |
| * adjust the bcopy threshhold to guarantee |
| * the header to use bcopy way |
| */ |
| if (copy_thresh < hdr_len) |
| copy_thresh = hdr_len; |
| } |
| |
| current_mp = mp; |
| current_len = MBLKL(current_mp); |
| /* |
| * Decide which method to use for the first fragment |
| */ |
| current_flag = (current_len <= copy_thresh) ? |
| USE_COPY : USE_DMA; |
| /* |
| * If the mblk includes several contiguous small fragments, |
| * they may be copied into one buffer. This flag is used to |
| * indicate whether there are pending fragments that need to |
| * be copied to the current tx buffer. |
| * |
| * If this flag is B_TRUE, it indicates that a new tx control |
| * block is needed to process the next fragment using either |
| * copy or DMA binding. |
| * |
| * Otherwise, it indicates that the next fragment will be |
| * copied to the current tx buffer that is maintained by the |
| * current tx control block. No new tx control block is needed. |
| */ |
| copy_done = B_TRUE; |
| while (current_mp) { |
| next_mp = current_mp->b_cont; |
| eop = (next_mp == NULL); /* Last fragment of the packet? */ |
| next_len = eop ? 0: MBLKL(next_mp); |
| |
| /* |
| * When the current fragment is an empty fragment, if |
| * the next fragment will still be copied to the current |
| * tx buffer, we cannot skip this fragment here. Because |
| * the copy processing is pending for completion. We have |
| * to process this empty fragment in the tx_copy routine. |
| * |
| * If the copy processing is completed or a DMA binding |
| * processing is just completed, we can just skip this |
| * empty fragment. |
| */ |
| if ((current_len == 0) && (copy_done)) { |
| current_mp = next_mp; |
| current_len = next_len; |
| current_flag = (current_len <= copy_thresh) ? |
| USE_COPY : USE_DMA; |
| continue; |
| } |
| |
| if (copy_done) { |
| /* |
| * Get a new tx control block from the free list |
| */ |
| tcb = ixgbe_get_free_list(tx_ring); |
| |
| if (tcb == NULL) { |
| tx_ring->stat_fail_no_tcb++; |
| goto tx_failure; |
| } |
| |
| /* |
| * Push the tx control block to the pending list |
| * to avoid using lock too early |
| */ |
| LIST_PUSH_TAIL(&pending_list, &tcb->link); |
| } |
| |
| if (current_flag == USE_COPY) { |
| /* |
| * Check whether to use bcopy or DMA binding to process |
| * the next fragment, and if using bcopy, whether we |
| * need to continue copying the next fragment into the |
| * current tx buffer. |
| */ |
| ASSERT((tcb->tx_buf.len + current_len) <= |
| tcb->tx_buf.size); |
| |
| if (eop) { |
| /* |
| * This is the last fragment of the packet, so |
| * the copy processing will be completed with |
| * this fragment. |
| */ |
| next_flag = USE_NONE; |
| copy_done = B_TRUE; |
| } else if ((tcb->tx_buf.len + current_len + next_len) > |
| tcb->tx_buf.size) { |
| /* |
| * If the next fragment is too large to be |
| * copied to the current tx buffer, we need |
| * to complete the current copy processing. |
| */ |
| next_flag = (next_len > copy_thresh) ? |
| USE_DMA: USE_COPY; |
| copy_done = B_TRUE; |
| } else if (next_len > copy_thresh) { |
| /* |
| * The next fragment needs to be processed with |
| * DMA binding. So the copy prcessing will be |
| * completed with the current fragment. |
| */ |
| next_flag = USE_DMA; |
| copy_done = B_TRUE; |
| } else { |
| /* |
| * Continue to copy the next fragment to the |
| * current tx buffer. |
| */ |
| next_flag = USE_COPY; |
| copy_done = B_FALSE; |
| } |
| |
| desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, |
| current_len, copy_done); |
| } else { |
| /* |
| * Check whether to use bcopy or DMA binding to process |
| * the next fragment. |
| */ |
| next_flag = (next_len > copy_thresh) ? |
| USE_DMA: USE_COPY; |
| ASSERT(copy_done == B_TRUE); |
| |
| desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, |
| current_len); |
| } |
| |
| if (desc_num > 0) |
| desc_total += desc_num; |
| else if (desc_num < 0) |
| goto tx_failure; |
| |
| current_mp = next_mp; |
| current_len = next_len; |
| current_flag = next_flag; |
| } |
| |
| /* |
| * Attach the mblk to the last tx control block |
| */ |
| ASSERT(tcb); |
| ASSERT(tcb->mp == NULL); |
| tcb->mp = mp; |
| |
| /* |
| * 82598/82599 chipset has a limitation that no more than 32 tx |
| * descriptors can be transmited out at one time. |
| * |
| * Here is a workaround for it: pull up the mblk then send it |
| * out with bind way. By doing so, no more than MAX_COOKIE (18) |
| * descriptors is needed. |
| */ |
| if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { |
| tx_ring->stat_break_tbd_limit++; |
| |
| /* |
| * Discard the mblk and free the used resources |
| */ |
| tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); |
| while (tcb) { |
| tcb->mp = NULL; |
| ixgbe_free_tcb(tcb); |
| tcb = (tx_control_block_t *) |
| LIST_GET_NEXT(&pending_list, &tcb->link); |
| } |
| |
| /* |
| * Return the tx control blocks in the pending list to |
| * the free list. |
| */ |
| ixgbe_put_free_list(tx_ring, &pending_list); |
| |
| /* |
| * pull up the mblk and send it out with bind way |
| */ |
| if ((pull_mp = msgpullup(mp, -1)) == NULL) { |
| tx_ring->reschedule = B_TRUE; |
| |
| /* |
| * If new mblk has been allocted for the last header |
| * fragment of a LSO packet, we should restore the |
| * modified mp. |
| */ |
| if (hdr_new_mp) { |
| hdr_new_mp->b_cont = NULL; |
| freeb(hdr_new_mp); |
| hdr_nmp->b_rptr -= hdr_frag_len; |
| if (hdr_pre_mp) |
| hdr_pre_mp->b_cont = hdr_nmp; |
| else |
| mp = hdr_nmp; |
| } |
| return (mp); |
| } |
| |
| LINK_LIST_INIT(&pending_list); |
| desc_total = 0; |
| |
| /* |
| * if the packet is a LSO packet, we simply |
| * transmit the header in one descriptor using the copy way |
| */ |
| if ((ctx != NULL) && ctx->lso_flag) { |
| hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + |
| ctx->l4_hdr_len; |
| |
| tcb = ixgbe_get_free_list(tx_ring); |
| if (tcb == NULL) { |
| tx_ring->stat_fail_no_tcb++; |
| goto tx_failure; |
| } |
| desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, |
| hdr_len, B_TRUE); |
| LIST_PUSH_TAIL(&pending_list, &tcb->link); |
| desc_total += desc_num; |
| |
| pull_mp->b_rptr += hdr_len; |
| } |
| |
| tcb = ixgbe_get_free_list(tx_ring); |
| if (tcb == NULL) { |
| tx_ring->stat_fail_no_tcb++; |
| goto tx_failure; |
| } |
| if ((ctx != NULL) && ctx->lso_flag) { |
| desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, |
| mbsize - hdr_len); |
| } else { |
| desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, |
| mbsize); |
| } |
| if (desc_num < 0) { |
| goto tx_failure; |
| } |
| LIST_PUSH_TAIL(&pending_list, &tcb->link); |
| |
| desc_total += desc_num; |
| tcb->mp = pull_mp; |
| } |
| |
| /* |
| * Before fill the tx descriptor ring with the data, we need to |
| * ensure there are adequate free descriptors for transmit |
| * (including one context descriptor). |
| * Do not use up all the tx descriptors. |
| * Otherwise tx recycle will fail and cause false hang. |
| */ |
| if (tx_ring->tbd_free <= (desc_total + 1)) { |
| tx_ring->tx_recycle(tx_ring); |
| } |
| |
| mutex_enter(&tx_ring->tx_lock); |
| /* |
| * If the number of free tx descriptors is not enough for transmit |
| * then return mp. |
| * |
| * Note: we must put this check under the mutex protection to |
| * ensure the correctness when multiple threads access it in |
| * parallel. |
| */ |
| if (tx_ring->tbd_free <= (desc_total + 1)) { |
| tx_ring->stat_fail_no_tbd++; |
| mutex_exit(&tx_ring->tx_lock); |
| goto tx_failure; |
| } |
| |
| desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, |
| mbsize); |
| |
| ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); |
| |
| tx_ring->stat_obytes += mbsize; |
| tx_ring->stat_opackets ++; |
| |
| mutex_exit(&tx_ring->tx_lock); |
| |
| /* |
| * now that the transmission succeeds, need to free the original |
| * mp if we used the pulling up mblk for transmission. |
| */ |
| if (pull_mp) { |
| freemsg(mp); |
| } |
| |
| return (NULL); |
| |
| tx_failure: |
| /* |
| * If transmission fails, need to free the pulling up mblk. |
| */ |
| if (pull_mp) { |
| freemsg(pull_mp); |
| } |
| |
| /* |
| * If new mblk has been allocted for the last header |
| * fragment of a LSO packet, we should restore the |
| * modified mp. |
| */ |
| if (hdr_new_mp) { |
| hdr_new_mp->b_cont = NULL; |
| freeb(hdr_new_mp); |
| hdr_nmp->b_rptr -= hdr_frag_len; |
| if (hdr_pre_mp) |
| hdr_pre_mp->b_cont = hdr_nmp; |
| else |
| mp = hdr_nmp; |
| } |
| /* |
| * Discard the mblk and free the used resources |
| */ |
| tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); |
| while (tcb) { |
| tcb->mp = NULL; |
| |
| ixgbe_free_tcb(tcb); |
| |
| tcb = (tx_control_block_t *) |
| LIST_GET_NEXT(&pending_list, &tcb->link); |
| } |
| |
| /* |
| * Return the tx control blocks in the pending list to the free list. |
| */ |
| ixgbe_put_free_list(tx_ring, &pending_list); |
| |
| /* Transmit failed, do not drop the mblk, rechedule the transmit */ |
| tx_ring->reschedule = B_TRUE; |
| |
| return (mp); |
| } |
| |
| /* |
| * ixgbe_tx_copy |
| * |
| * Copy the mblk fragment to the pre-allocated tx buffer |
| */ |
| static int |
| ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, |
| uint32_t len, boolean_t copy_done) |
| { |
| dma_buffer_t *tx_buf; |
| uint32_t desc_num; |
| _NOTE(ARGUNUSED(tx_ring)); |
| |
| tx_buf = &tcb->tx_buf; |
| |
| /* |
| * Copy the packet data of the mblk fragment into the |
| * pre-allocated tx buffer, which is maintained by the |
| * tx control block. |
| * |
| * Several mblk fragments can be copied into one tx buffer. |
| * The destination address of the current copied fragment in |
| * the tx buffer is next to the end of the previous copied |
| * fragment. |
| */ |
| if (len > 0) { |
| bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); |
| |
| tx_buf->len += len; |
| tcb->frag_num++; |
| } |
| |
| desc_num = 0; |
| |
| /* |
| * If it is the last fragment copied to the current tx buffer, |
| * in other words, if there's no remaining fragment or the remaining |
| * fragment requires a new tx control block to process, we need to |
| * complete the current copy processing by syncing up the current |
| * DMA buffer and saving the descriptor data. |
| */ |
| if (copy_done) { |
| /* |
| * Sync the DMA buffer of the packet data |
| */ |
| DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); |
| |
| tcb->tx_type = USE_COPY; |
| |
| /* |
| * Save the address and length to the private data structure |
| * of the tx control block, which will be used to fill the |
| * tx descriptor ring after all the fragments are processed. |
| */ |
| ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); |
| desc_num++; |
| } |
| |
| return (desc_num); |
| } |
| |
| /* |
| * ixgbe_tx_bind |
| * |
| * Bind the mblk fragment with DMA |
| */ |
| static int |
| ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, |
| uint32_t len) |
| { |
| int status, i; |
| ddi_dma_cookie_t dma_cookie; |
| uint_t ncookies; |
| int desc_num; |
| |
| /* |
| * Use DMA binding to process the mblk fragment |
| */ |
| status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, |
| (caddr_t)mp->b_rptr, len, |
| DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, |
| 0, &dma_cookie, &ncookies); |
| |
| if (status != DDI_DMA_MAPPED) { |
| tx_ring->stat_fail_dma_bind++; |
| return (-1); |
| } |
| |
| tcb->frag_num++; |
| tcb->tx_type = USE_DMA; |
| /* |
| * Each fragment can span several cookies. One cookie will have |
| * one tx descriptor to transmit. |
| */ |
| desc_num = 0; |
| for (i = ncookies; i > 0; i--) { |
| /* |
| * Save the address and length to the private data structure |
| * of the tx control block, which will be used to fill the |
| * tx descriptor ring after all the fragments are processed. |
| */ |
| ixgbe_save_desc(tcb, |
| dma_cookie.dmac_laddress, |
| dma_cookie.dmac_size); |
| |
| desc_num++; |
| |
| if (i > 1) |
| ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); |
| } |
| |
| return (desc_num); |
| } |
| |
| /* |
| * ixgbe_get_context |
| * |
| * Get the context information from the mblk |
| */ |
| static int |
| ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) |
| { |
| uint32_t start; |
| uint32_t hckflags; |
| uint32_t lsoflags; |
| uint32_t mss; |
| uint32_t len; |
| uint32_t size; |
| uint32_t offset; |
| unsigned char *pos; |
| ushort_t etype; |
| uint32_t mac_hdr_len; |
| uint32_t l4_proto; |
| uint32_t l4_hdr_len; |
| |
| ASSERT(mp != NULL); |
| |
| mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); |
| bzero(ctx, sizeof (ixgbe_tx_context_t)); |
| |
| if (hckflags == 0) { |
| return (0); |
| } |
| |
| ctx->hcksum_flags = hckflags; |
| |
| mac_lso_get(mp, &mss, &lsoflags); |
| ctx->mss = mss; |
| ctx->lso_flag = (lsoflags == HW_LSO); |
| |
| /* |
| * LSO relies on tx h/w checksum, so here will drop the package |
| * if h/w checksum flag is not declared. |
| */ |
| if (ctx->lso_flag) { |
| if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && |
| (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { |
| IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " |
| "checksum flags are not specified when doing LSO"); |
| return (-1); |
| } |
| } |
| |
| etype = 0; |
| mac_hdr_len = 0; |
| l4_proto = 0; |
| |
| /* |
| * Firstly get the position of the ether_type/ether_tpid. |
| * Here we don't assume the ether (VLAN) header is fully included |
| * in one mblk fragment, so we go thourgh the fragments to parse |
| * the ether type. |
| */ |
| size = len = MBLKL(mp); |
| offset = offsetof(struct ether_header, ether_type); |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| |
| etype = ntohs(*(ushort_t *)(uintptr_t)pos); |
| if (etype == ETHERTYPE_VLAN) { |
| /* |
| * Get the position of the ether_type in VLAN header |
| */ |
| offset = offsetof(struct ether_vlan_header, ether_type); |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| |
| etype = ntohs(*(ushort_t *)(uintptr_t)pos); |
| mac_hdr_len = sizeof (struct ether_vlan_header); |
| } else { |
| mac_hdr_len = sizeof (struct ether_header); |
| } |
| |
| /* |
| * Here we don't assume the IP(V6) header is fully included in |
| * one mblk fragment. |
| */ |
| switch (etype) { |
| case ETHERTYPE_IP: |
| if (ctx->lso_flag) { |
| offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| *((uint16_t *)(uintptr_t)(pos)) = 0; |
| |
| offset = offsetof(ipha_t, ipha_hdr_checksum) + |
| mac_hdr_len; |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| *((uint16_t *)(uintptr_t)(pos)) = 0; |
| |
| /* |
| * To perform ixgbe LSO, here also need to fill |
| * the tcp checksum field of the packet with the |
| * following pseudo-header checksum: |
| * (ip_source_addr, ip_destination_addr, l4_proto) |
| * Currently the tcp/ip stack has done it. |
| */ |
| } |
| |
| offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| |
| l4_proto = *(uint8_t *)pos; |
| break; |
| case ETHERTYPE_IPV6: |
| offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| |
| l4_proto = *(uint8_t *)pos; |
| break; |
| default: |
| /* Unrecoverable error */ |
| IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); |
| return (-2); |
| } |
| |
| if (ctx->lso_flag) { |
| offset = mac_hdr_len + start; |
| while (size <= offset) { |
| mp = mp->b_cont; |
| ASSERT(mp != NULL); |
| len = MBLKL(mp); |
| size += len; |
| } |
| pos = mp->b_rptr + offset + len - size; |
| |
| l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); |
| } else { |
| /* |
| * l4 header length is only required for LSO |
| */ |
| l4_hdr_len = 0; |
| } |
| |
| ctx->mac_hdr_len = mac_hdr_len; |
| ctx->ip_hdr_len = start; |
| ctx->l4_proto = l4_proto; |
| ctx->l4_hdr_len = l4_hdr_len; |
| |
| return (0); |
| } |
| |
| /* |
| * ixgbe_check_context |
| * |
| * Check if a new context descriptor is needed |
| */ |
| static boolean_t |
| ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) |
| { |
| ixgbe_tx_context_t *last; |
| |
| if (ctx == NULL) |
| return (B_FALSE); |
| |
| /* |
| * Compare the context data retrieved from the mblk and the |
| * stored data of the last context descriptor. The data need |
| * to be checked are: |
| * hcksum_flags |
| * l4_proto |
| * mac_hdr_len |
| * ip_hdr_len |
| * lso_flag |
| * mss (only checked for LSO) |
| * l4_hr_len (only checked for LSO) |
| * Either one of the above data is changed, a new context descriptor |
| * will be needed. |
| */ |
| last = &tx_ring->tx_context; |
| |
| if ((ctx->hcksum_flags != last->hcksum_flags) || |
| (ctx->l4_proto != last->l4_proto) || |
| (ctx->mac_hdr_len != last->mac_hdr_len) || |
| (ctx->ip_hdr_len != last->ip_hdr_len) || |
| (ctx->lso_flag != last->lso_flag) || |
| (ctx->lso_flag && ((ctx->mss != last->mss) || |
| (ctx->l4_hdr_len != last->l4_hdr_len)))) { |
| return (B_TRUE); |
| } |
| |
| return (B_FALSE); |
| } |
| |
| /* |
| * ixgbe_fill_context |
| * |
| * Fill the context descriptor with hardware checksum informations |
| */ |
| static void |
| ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, |
| ixgbe_tx_context_t *ctx) |
| { |
| /* |
| * Fill the context descriptor with the checksum |
| * context information we've got. |
| */ |
| ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; |
| ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << |
| IXGBE_ADVTXD_MACLEN_SHIFT; |
| |
| ctx_tbd->type_tucmd_mlhl = |
| IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; |
| |
| if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) |
| ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; |
| |
| if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { |
| switch (ctx->l4_proto) { |
| case IPPROTO_TCP: |
| ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; |
| break; |
| case IPPROTO_UDP: |
| /* |
| * We don't have to explicitly set: |
| * ctx_tbd->type_tucmd_mlhl |= |
| * IXGBE_ADVTXD_TUCMD_L4T_UDP; |
| * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b |
| */ |
| break; |
| default: |
| /* Unrecoverable error */ |
| IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); |
| break; |
| } |
| } |
| |
| ctx_tbd->seqnum_seed = 0; |
| |
| if (ctx->lso_flag) { |
| ctx_tbd->mss_l4len_idx = |
| (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | |
| (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); |
| } else { |
| ctx_tbd->mss_l4len_idx = 0; |
| } |
| } |
| |
| /* |
| * ixgbe_tx_fill_ring |
| * |
| * Fill the tx descriptor ring with the data |
| */ |
| static int |
| ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, |
| ixgbe_tx_context_t *ctx, size_t mbsize) |
| { |
| struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; |
| boolean_t load_context; |
| uint32_t index, tcb_index, desc_num; |
| union ixgbe_adv_tx_desc *tbd, *first_tbd; |
| tx_control_block_t *tcb, *first_tcb; |
| uint32_t hcksum_flags; |
| int i; |
| |
| ASSERT(mutex_owned(&tx_ring->tx_lock)); |
| |
| tbd = NULL; |
| first_tbd = NULL; |
| first_tcb = NULL; |
| desc_num = 0; |
| hcksum_flags = 0; |
| load_context = B_FALSE; |
| |
| /* |
| * Get the index of the first tx descriptor that will be filled, |
| * and the index of the first work list item that will be attached |
| * with the first used tx control block in the pending list. |
| * Note: the two indexes are the same. |
| */ |
| index = tx_ring->tbd_tail; |
| tcb_index = tx_ring->tbd_tail; |
| |
| if (ctx != NULL) { |
| hcksum_flags = ctx->hcksum_flags; |
| |
| /* |
| * Check if a new context descriptor is needed for this packet |
| */ |
| load_context = ixgbe_check_context(tx_ring, ctx); |
| |
| if (load_context) { |
| tbd = &tx_ring->tbd_ring[index]; |
| |
| /* |
| * Fill the context descriptor with the |
| * hardware checksum offload informations. |
| */ |
| ixgbe_fill_context( |
| (struct ixgbe_adv_tx_context_desc *)tbd, ctx); |
| |
| index = NEXT_INDEX(index, 1, tx_ring->ring_size); |
| desc_num++; |
| |
| /* |
| * Store the checksum context data if |
| * a new context descriptor is added |
| */ |
| tx_ring->tx_context = *ctx; |
| } |
| } |
| |
| first_tbd = &tx_ring->tbd_ring[index]; |
| |
| /* |
| * Fill tx data descriptors with the data saved in the pending list. |
| * The tx control blocks in the pending list are added to the work list |
| * at the same time. |
| * |
| * The work list is strictly 1:1 corresponding to the descriptor ring. |
| * One item of the work list corresponds to one tx descriptor. Because |
| * one tx control block can span multiple tx descriptors, the tx |
| * control block will be added to the first work list item that |
| * corresponds to the first tx descriptor generated from that tx |
| * control block. |
| */ |
| tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); |
| first_tcb = tcb; |
| while (tcb != NULL) { |
| |
| for (i = 0; i < tcb->desc_num; i++) { |
| tbd = &tx_ring->tbd_ring[index]; |
| |
| tbd->read.buffer_addr = tcb->desc[i].address; |
| tbd->read.cmd_type_len = tcb->desc[i].length; |
| |
| tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT |
| | IXGBE_ADVTXD_DTYP_DATA; |
| |
| tbd->read.olinfo_status = 0; |
| |
| index = NEXT_INDEX(index, 1, tx_ring->ring_size); |
| desc_num++; |
| } |
| |
| /* |
| * Add the tx control block to the work list |
| */ |
| ASSERT(tx_ring->work_list[tcb_index] == NULL); |
| tx_ring->work_list[tcb_index] = tcb; |
| |
| tcb_index = index; |
| tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); |
| } |
| |
| if (load_context) { |
| /* |
| * Count the context descriptor for |
| * the first tx control block. |
| */ |
| first_tcb->desc_num++; |
| } |
| first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); |
| |
| /* |
| * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only |
| * valid in the first descriptor of the packet. |
| * Setting paylen in every first_tbd for all parts. |
| * 82599, X540 and X550 require the packet length in paylen field |
| * with or without LSO and 82598 will ignore it in non-LSO mode. |
| */ |
| ASSERT(first_tbd != NULL); |
| first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; |
| |
| switch (hw->mac.type) { |
| case ixgbe_mac_82598EB: |
| if (ctx != NULL && ctx->lso_flag) { |
| first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; |
| first_tbd->read.olinfo_status |= |
| (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len |
| - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; |
| } |
| break; |
| |
| case ixgbe_mac_82599EB: |
| case ixgbe_mac_X540: |
| case ixgbe_mac_X550: |
| case ixgbe_mac_X550EM_x: |
| if (ctx != NULL && ctx->lso_flag) { |
| first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; |
| first_tbd->read.olinfo_status |= |
| (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len |
| - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; |
| } else { |
| first_tbd->read.olinfo_status |= |
| (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| /* Set hardware checksum bits */ |
| if (hcksum_flags != 0) { |
| if (hcksum_flags & HCK_IPV4_HDRCKSUM) |
| first_tbd->read.olinfo_status |= |
| IXGBE_ADVTXD_POPTS_IXSM; |
| if (hcksum_flags & HCK_PARTIALCKSUM) |
| first_tbd->read.olinfo_status |= |
| IXGBE_ADVTXD_POPTS_TXSM; |
| } |
| |
| /* |
| * The last descriptor of packet needs End Of Packet (EOP), |
| * and Report Status (RS) bits set |
| */ |
| ASSERT(tbd != NULL); |
| tbd->read.cmd_type_len |= |
| IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; |
| |
| /* |
| * Sync the DMA buffer of the tx descriptor ring |
| */ |
| DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); |
| |
| /* |
| * Update the number of the free tx descriptors. |
| * The mutual exclusion between the transmission and the recycling |
| * (for the tx descriptor ring and the work list) is implemented |
| * with the atomic operation on the number of the free tx descriptors. |
| * |
| * Note: we should always decrement the counter tbd_free before |
| * advancing the hardware TDT pointer to avoid the race condition - |
| * before the counter tbd_free is decremented, the transmit of the |
| * tx descriptors has done and the counter tbd_free is increased by |
| * the tx recycling. |
| */ |
| i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); |
| ASSERT(i >= 0); |
| |
| tx_ring->tbd_tail = index; |
| |
| /* |
| * Advance the hardware TDT pointer of the tx descriptor ring |
| */ |
| IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); |
| |
| if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != |
| DDI_FM_OK) { |
| ddi_fm_service_impact(tx_ring->ixgbe->dip, |
| DDI_SERVICE_DEGRADED); |
| atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); |
| } |
| |
| return (desc_num); |
| } |
| |
| /* |
| * ixgbe_save_desc |
| * |
| * Save the address/length pair to the private array |
| * of the tx control block. The address/length pairs |
| * will be filled into the tx descriptor ring later. |
| */ |
| static void |
| ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) |
| { |
| sw_desc_t *desc; |
| |
| desc = &tcb->desc[tcb->desc_num]; |
| desc->address = address; |
| desc->length = length; |
| |
| tcb->desc_num++; |
| } |
| |
| /* |
| * ixgbe_tx_recycle_legacy |
| * |
| * Recycle the tx descriptors and tx control blocks. |
| * |
| * The work list is traversed to check if the corresponding |
| * tx descriptors have been transmitted. If so, the resources |
| * bound to the tx control blocks will be freed, and those |
| * tx control blocks will be returned to the free list. |
| */ |
| uint32_t |
| ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) |
| { |
| uint32_t index, last_index, prev_index; |
| int desc_num; |
| boolean_t desc_done; |
| tx_control_block_t *tcb; |
| link_list_t pending_list; |
| ixgbe_t *ixgbe = tx_ring->ixgbe; |
| |
| mutex_enter(&tx_ring->recycle_lock); |
| |
| ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); |
| |
| if (tx_ring->tbd_free == tx_ring->ring_size) { |
| tx_ring->recycle_fail = 0; |
| tx_ring->stall_watchdog = 0; |
| if (tx_ring->reschedule) { |
| tx_ring->reschedule = B_FALSE; |
| mac_tx_ring_update(ixgbe->mac_hdl, |
| tx_ring->ring_handle); |
| } |
| mutex_exit(&tx_ring->recycle_lock); |
| return (0); |
| } |
| |
| /* |
| * Sync the DMA buffer of the tx descriptor ring |
| */ |
| DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); |
| |
| if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { |
| mutex_exit(&tx_ring->recycle_lock); |
| ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); |
| atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); |
| return (0); |
| } |
| |
| LINK_LIST_INIT(&pending_list); |
| desc_num = 0; |
| index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ |
| |
| tcb = tx_ring->work_list[index]; |
| ASSERT(tcb != NULL); |
| |
| while (tcb != NULL) { |
| /* |
| * Get the last tx descriptor of this packet. |
| * If the last tx descriptor is done, then |
| * we can recycle all descriptors of a packet |
| * which usually includes several tx control blocks. |
| * For 82599, LSO descriptors can not be recycled |
| * unless the whole packet's transmission is done. |
| * That's why packet level recycling is used here. |
| * For 82598, there's not such limit. |
| */ |
| last_index = tcb->last_index; |
| /* |
| * MAX_TX_RING_SIZE is used to judge whether |
| * the index is a valid value or not. |
| */ |
| if (last_index == MAX_TX_RING_SIZE) |
| break; |
| |
| /* |
| * Check if the Descriptor Done bit is set |
| */ |
| desc_done = tx_ring->tbd_ring[last_index].wb.status & |
| IXGBE_TXD_STAT_DD; |
| if (desc_done) { |
| /* |
| * recycle all descriptors of the packet |
| */ |
| while (tcb != NULL) { |
| /* |
| * Strip off the tx control block from |
| * the work list, and add it to the |
| * pending list. |
| */ |
| tx_ring->work_list[index] = NULL; |
| LIST_PUSH_TAIL(&pending_list, &tcb->link); |
| |
| /* |
| * Count the total number of the tx |
| * descriptors recycled |
| */ |
| desc_num += tcb->desc_num; |
| |
| index = NEXT_INDEX(index, tcb->desc_num, |
| tx_ring->ring_size); |
| |
| tcb = tx_ring->work_list[index]; |
| |
| prev_index = PREV_INDEX(index, 1, |
| tx_ring->ring_size); |
| if (prev_index == last_index) |
| break; |
| } |
| } else { |
| break; |
| } |
| } |
| |
| /* |
| * If no tx descriptors are recycled, no need to do more processing |
| */ |
| if (desc_num == 0) { |
| tx_ring->recycle_fail++; |
| mutex_exit(&tx_ring->recycle_lock); |
| return (0); |
| } |
| |
| tx_ring->recycle_fail = 0; |
| tx_ring->stall_watchdog = 0; |
| |
| /* |
| * Update the head index of the tx descriptor ring |
| */ |
| tx_ring->tbd_head = index; |
| |
| /* |
| * Update the number of the free tx descriptors with atomic operations |
| */ |
| atomic_add_32(&tx_ring->tbd_free, desc_num); |
| |
| if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && |
| (tx_ring->reschedule)) { |
| tx_ring->reschedule = B_FALSE; |
| mac_tx_ring_update(ixgbe->mac_hdl, |
| tx_ring->ring_handle); |
| } |
| mutex_exit(&tx_ring->recycle_lock); |
| |
| /* |
| * Free the resources used by the tx control blocks |
| * in the pending list |
| */ |
| tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); |
| while (tcb != NULL) { |
| /* |
| * Release the resources occupied by the tx control block |
| */ |
| ixgbe_free_tcb(tcb); |
| |
| tcb = (tx_control_block_t *) |
| LIST_GET_NEXT(&pending_list, &tcb->link); |
| } |
| |
| /* |
| * Add the tx control blocks in the pending list to the free list. |
| */ |
| ixgbe_put_free_list(tx_ring, &pending_list); |
| |
| return (desc_num); |
| } |
| |
| /* |
| * ixgbe_tx_recycle_head_wb |
| * |
| * Check the head write-back, and recycle all the transmitted |
| * tx descriptors and tx control blocks. |
| */ |
| uint32_t |
| ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) |
| { |
| uint32_t index; |
| uint32_t head_wb; |
| int desc_num; |
| tx_control_block_t *tcb; |
| link_list_t pending_list; |
| ixgbe_t *ixgbe = tx_ring->ixgbe; |
| |
| mutex_enter(&tx_ring->recycle_lock); |
| |
| ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); |
| |
| if (tx_ring->tbd_free == tx_ring->ring_size) { |
| tx_ring->recycle_fail = 0; |
| tx_ring->stall_watchdog = 0; |
| if (tx_ring->reschedule) { |
| tx_ring->reschedule = B_FALSE; |
| mac_tx_ring_update(ixgbe->mac_hdl, |
| tx_ring->ring_handle); |
| } |
| mutex_exit(&tx_ring->recycle_lock); |
| return (0); |
| } |
| |
| /* |
| * Sync the DMA buffer of the tx descriptor ring |
| * |
| * Note: For head write-back mode, the tx descriptors will not |
| * be written back, but the head write-back value is stored at |
| * the last extra tbd at the end of the DMA area, we still need |
| * to sync the head write-back value for kernel. |
| * |
| * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); |
| */ |
| (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, |
| sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, |
| sizeof (uint32_t), |
| DDI_DMA_SYNC_FORKERNEL); |
| |
| if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { |
| mutex_exit(&tx_ring->recycle_lock); |
| ddi_fm_service_impact(ixgbe->dip, |
| DDI_SERVICE_DEGRADED); |
| atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); |
| return (0); |
| } |
| |
| LINK_LIST_INIT(&pending_list); |
| desc_num = 0; |
| index = tx_ring->tbd_head; /* Next index to clean */ |
| |
| /* |
| * Get the value of head write-back |
| */ |
| head_wb = *tx_ring->tbd_head_wb; |
| while (index != head_wb) { |
| tcb = tx_ring->work_list[index]; |
| ASSERT(tcb != NULL); |
| |
| if (OFFSET(index, head_wb, tx_ring->ring_size) < |
| tcb->desc_num) { |
| /* |
| * The current tx control block is not |
| * completely transmitted, stop recycling |
| */ |
| break; |
| } |
| |
| /* |
| * Strip off the tx control block from the work list, |
| * and add it to the pending list. |
| */ |
| tx_ring->work_list[index] = NULL; |
| LIST_PUSH_TAIL(&pending_list, &tcb->link); |
| |
| /* |
| * Advance the index of the tx descriptor ring |
| */ |
| index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); |
| |
| /* |
| * Count the total number of the tx descriptors recycled |
| */ |
| desc_num += tcb->desc_num; |
| } |
| |
| /* |
| * If no tx descriptors are recycled, no need to do more processing |
| */ |
| if (desc_num == 0) { |
| tx_ring->recycle_fail++; |
| mutex_exit(&tx_ring->recycle_lock); |
| return (0); |
| } |
| |
| tx_ring->recycle_fail = 0; |
| tx_ring->stall_watchdog = 0; |
| |
| /* |
| * Update the head index of the tx descriptor ring |
| */ |
| tx_ring->tbd_head = index; |
| |
| /* |
| * Update the number of the free tx descriptors with atomic operations |
| */ |
| atomic_add_32(&tx_ring->tbd_free, desc_num); |
| |
| if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && |
| (tx_ring->reschedule)) { |
| tx_ring->reschedule = B_FALSE; |
| mac_tx_ring_update(ixgbe->mac_hdl, |
| tx_ring->ring_handle); |
| } |
| mutex_exit(&tx_ring->recycle_lock); |
| |
| /* |
| * Free the resources used by the tx control blocks |
| * in the pending list |
| */ |
| tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); |
| while (tcb) { |
| /* |
| * Release the resources occupied by the tx control block |
| */ |
| ixgbe_free_tcb(tcb); |
| |
| tcb = (tx_control_block_t *) |
| LIST_GET_NEXT(&pending_list, &tcb->link); |
| } |
| |
| /* |
| * Add the tx control blocks in the pending list to the free list. |
| */ |
| ixgbe_put_free_list(tx_ring, &pending_list); |
| |
| return (desc_num); |
| } |
| |
| /* |
| * ixgbe_free_tcb - free up the tx control block |
| * |
| * Free the resources of the tx control block, including |
| * unbind the previously bound DMA handle, and reset other |
| * control fields. |
| */ |
| void |
| ixgbe_free_tcb(tx_control_block_t *tcb) |
| { |
| switch (tcb->tx_type) { |
| case USE_COPY: |
| /* |
| * Reset the buffer length that is used for copy |
| */ |
| tcb->tx_buf.len = 0; |
| break; |
| case USE_DMA: |
| /* |
| * Release the DMA resource that is used for |
| * DMA binding. |
| */ |
| (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); |
| break; |
| default: |
| break; |
| } |
| |
| /* |
| * Free the mblk |
| */ |
| if (tcb->mp != NULL) { |
| freemsg(tcb->mp); |
| tcb->mp = NULL; |
| } |
| |
| tcb->tx_type = USE_NONE; |
| tcb->last_index = MAX_TX_RING_SIZE; |
| tcb->frag_num = 0; |
| tcb->desc_num = 0; |
| } |
| |
| /* |
| * ixgbe_get_free_list - Get a free tx control block from the free list |
| * |
| * The atomic operation on the number of the available tx control block |
| * in the free list is used to keep this routine mutual exclusive with |
| * the routine ixgbe_put_check_list. |
| */ |
| static tx_control_block_t * |
| ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) |
| { |
| tx_control_block_t *tcb; |
| |
| /* |
| * Check and update the number of the free tx control block |
| * in the free list. |
| */ |
| if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) |
| return (NULL); |
| |
| mutex_enter(&tx_ring->tcb_head_lock); |
| |
| tcb = tx_ring->free_list[tx_ring->tcb_head]; |
| ASSERT(tcb != NULL); |
| tx_ring->free_list[tx_ring->tcb_head] = NULL; |
| tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, |
| tx_ring->free_list_size); |
| |
| mutex_exit(&tx_ring->tcb_head_lock); |
| |
| return (tcb); |
| } |
| |
| /* |
| * ixgbe_put_free_list |
| * |
| * Put a list of used tx control blocks back to the free list |
| * |
| * A mutex is used here to ensure the serialization. The mutual exclusion |
| * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with |
| * the atomic operation on the counter tcb_free. |
| */ |
| void |
| ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) |
| { |
| uint32_t index; |
| int tcb_num; |
| tx_control_block_t *tcb; |
| |
| mutex_enter(&tx_ring->tcb_tail_lock); |
| |
| index = tx_ring->tcb_tail; |
| |
| tcb_num = 0; |
| tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); |
| while (tcb != NULL) { |
| ASSERT(tx_ring->free_list[index] == NULL); |
| tx_ring->free_list[index] = tcb; |
| |
| tcb_num++; |
| |
| index = NEXT_INDEX(index, 1, tx_ring->free_list_size); |
| |
| tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); |
| } |
| |
| tx_ring->tcb_tail = index; |
| |
| /* |
| * Update the number of the free tx control block |
| * in the free list. This operation must be placed |
| * under the protection of the lock. |
| */ |
| atomic_add_32(&tx_ring->tcb_free, tcb_num); |
| |
| mutex_exit(&tx_ring->tcb_tail_lock); |
| } |