blob: e4a5ef5c5b8947976cf1d5ea4fe63875029288cd [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/socket.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/tcp_seq.h>
#include <netinet/sctp.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip_if.h>
#include <inet/ip6.h>
#include <inet/mib2.h>
#include <inet/ipclassifier.h>
#include <inet/ipp_common.h>
#include <inet/ipsec_impl.h>
#include <inet/sctp_ip.h>
#include "sctp_impl.h"
#include "sctp_asconf.h"
#include "sctp_addr.h"
static struct kmem_cache *sctp_kmem_set_cache;
/*
* PR-SCTP comments.
*
* When we get a valid Forward TSN chunk, we check the fragment list for this
* SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
* the next expected SSN to be present in the stream queue, we deliver any
* such stranded messages upstream. We also update the SACK info. appropriately.
* When checking for advancing the cumulative ack (in sctp_cumack()) we must
* check for abandoned chunks and messages. While traversing the tramsmit
* list if we come across an abandoned chunk, we can skip the message (i.e.
* take it out of the (re)transmit list) since this message, and hence this
* chunk, has been marked abandoned by sctp_rexmit(). If we come across an
* unsent chunk for a message this now abandoned we need to check if a
* Forward TSN needs to be sent, this could be a case where we deferred sending
* a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
* SACK we check if the Advanced peer ack point can be moved ahead, i.e.
* if we can send a Forward TSN via sctp_check_abandoned_data().
*/
void
sctp_free_set(sctp_set_t *s)
{
sctp_set_t *p;
while (s) {
p = s->next;
kmem_cache_free(sctp_kmem_set_cache, s);
s = p;
}
}
static void
sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
{
sctp_set_t *p, *t;
if (head == NULL || num == NULL)
return;
ASSERT(*num >= 0);
ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
if (*head == NULL) {
*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
if (*head == NULL)
return;
(*head)->prev = (*head)->next = NULL;
(*head)->begin = tsn;
(*head)->end = tsn;
*num = 1;
return;
}
ASSERT((*head)->prev == NULL);
/*
* Handle this special case here so we don't have to check
* for it each time in the loop.
*/
if (SEQ_LT(tsn + 1, (*head)->begin)) {
/* add a new set, and move the head pointer */
t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
if (t == NULL)
return;
t->next = *head;
t->prev = NULL;
(*head)->prev = t;
t->begin = tsn;
t->end = tsn;
(*num)++;
*head = t;
return;
}
/*
* We need to handle the following cases, where p points to
* the current set (as we walk through the loop):
*
* 1. tsn is entirely less than p; create a new set before p.
* 2. tsn borders p from less; coalesce p with tsn.
* 3. tsn is withing p; do nothing.
* 4. tsn borders p from greater; coalesce p with tsn.
* 4a. p may now border p->next from less; if so, coalesce those
* two sets.
* 5. tsn is entirely greater then all sets; add a new set at
* the end.
*/
for (p = *head; ; p = p->next) {
if (SEQ_LT(tsn + 1, p->begin)) {
/* 1: add a new set before p. */
t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
if (t == NULL)
return;
t->next = p;
t->prev = NULL;
t->begin = tsn;
t->end = tsn;
if (p->prev) {
t->prev = p->prev;
p->prev->next = t;
}
p->prev = t;
(*num)++;
return;
}
if ((tsn + 1) == p->begin) {
/* 2: adjust p->begin */
p->begin = tsn;
return;
}
if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
/* 3; do nothing */
return;
}
if ((p->end + 1) == tsn) {
/* 4; adjust p->end */
p->end = tsn;
if (p->next != NULL && (tsn + 1) == p->next->begin) {
/* 4a: coalesce p and p->next */
t = p->next;
p->end = t->end;
p->next = t->next;
if (t->next != NULL)
t->next->prev = p;
kmem_cache_free(sctp_kmem_set_cache, t);
(*num)--;
}
return;
}
if (p->next == NULL) {
/* 5: add new set at the end */
t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
if (t == NULL)
return;
t->next = NULL;
t->prev = p;
t->begin = tsn;
t->end = tsn;
p->next = t;
(*num)++;
return;
}
if (SEQ_GT(tsn, p->end + 1))
continue;
}
}
static void
sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
{
sctp_set_t *p, *t;
if (head == NULL || *head == NULL || num == NULL)
return;
/* Nothing to remove */
if (SEQ_LT(end, (*head)->begin))
return;
/* Find out where to start removing sets */
for (p = *head; p->next; p = p->next) {
if (SEQ_LEQ(end, p->end))
break;
}
if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
/* adjust p */
p->begin = end + 1;
/* all done */
if (p == *head)
return;
} else if (SEQ_GEQ(end, p->end)) {
/* remove this set too */
p = p->next;
}
/* unlink everything before this set */
t = *head;
*head = p;
if (p != NULL && p->prev != NULL) {
p->prev->next = NULL;
p->prev = NULL;
}
sctp_free_set(t);
/* recount the number of sets */
*num = 0;
for (p = *head; p != NULL; p = p->next)
(*num)++;
}
void
sctp_sets_init()
{
sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
NULL, 0);
}
void
sctp_sets_fini()
{
kmem_cache_destroy(sctp_kmem_set_cache);
}
sctp_chunk_hdr_t *
sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
{
sctp_chunk_hdr_t *ch;
uint16_t ch_len;
if (remaining < sizeof (*ch)) {
return (NULL);
}
ch = (sctp_chunk_hdr_t *)rptr;
ch_len = ntohs(ch->sch_len);
if (ch_len < sizeof (*ch) || remaining < ch_len) {
return (NULL);
}
return (ch);
}
sctp_chunk_hdr_t *
sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
{
int pad;
uint16_t ch_len;
if (!ch) {
return (NULL);
}
ch_len = ntohs(ch->sch_len);
if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
pad = SCTP_ALIGN - pad;
}
*remaining -= (ch_len + pad);
ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
return (sctp_first_chunk((uchar_t *)ch, *remaining));
}
/*
* Attach ancillary data to a received SCTP segments.
* If the source address (fp) is not the primary, send up a
* unitdata_ind so recvfrom() can populate the msg_name field.
* If ancillary data is also requested, we append it to the
* unitdata_req. Otherwise, we just send up an optdata_ind.
*/
static int
sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
{
struct T_unitdata_ind *tudi;
int optlen;
int hdrlen;
uchar_t *optptr;
struct cmsghdr *cmsg;
mblk_t *mp1;
struct sockaddr_in6 sin_buf[1];
struct sockaddr_in6 *sin6;
struct sockaddr_in *sin4;
crb_t addflag; /* Which pieces to add */
conn_t *connp = sctp->sctp_connp;
sin4 = NULL;
sin6 = NULL;
optlen = hdrlen = 0;
addflag.crb_all = 0;
/* Figure out address size */
if (connp->conn_family == AF_INET) {
sin4 = (struct sockaddr_in *)sin_buf;
sin4->sin_family = AF_INET;
sin4->sin_port = connp->conn_fport;
IN6_V4MAPPED_TO_IPADDR(&fp->faddr, sin4->sin_addr.s_addr);
hdrlen = sizeof (*tudi) + sizeof (*sin4);
} else {
sin6 = sin_buf;
sin6->sin6_family = AF_INET6;
sin6->sin6_port = connp->conn_fport;
sin6->sin6_addr = fp->faddr;
hdrlen = sizeof (*tudi) + sizeof (*sin6);
}
/* If app asked to receive send / recv info */
if (sctp->sctp_recvsndrcvinfo)
optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
if (connp->conn_recv_ancillary.crb_all == 0)
goto noancillary;
if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
ira->ira_ruifindex != sctp->sctp_recvifindex) {
optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ip_recvpktinfo = 1;
}
/* If app asked for hoplimit and it has changed ... */
if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
ipp->ipp_hoplimit != sctp->sctp_recvhops) {
optlen += sizeof (*cmsg) + sizeof (uint_t);
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ipv6_recvhoplimit = 1;
}
/* If app asked for tclass and it has changed ... */
if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
ipp->ipp_tclass != sctp->sctp_recvtclass) {
optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ipv6_recvtclass = 1;
}
/* If app asked for hopbyhop headers and it has changed ... */
if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
(ipp->ipp_fields & IPPF_HOPOPTS),
ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
sctp->sctp_v6label_len;
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ipv6_recvhopopts = 1;
if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
&sctp->sctp_hopoptslen,
(ipp->ipp_fields & IPPF_HOPOPTS),
ipp->ipp_hopopts, ipp->ipp_hopoptslen))
return (-1);
}
/* If app asked for dst headers before routing headers ... */
if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
(ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ipv6_recvrthdrdstopts = 1;
if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
&sctp->sctp_rthdrdstoptslen,
(ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
return (-1);
}
/* If app asked for routing headers and it has changed ... */
if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
(ipp->ipp_fields & IPPF_RTHDR),
ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ipv6_recvrthdr = 1;
if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
&sctp->sctp_rthdrlen,
(ipp->ipp_fields & IPPF_RTHDR),
ipp->ipp_rthdr, ipp->ipp_rthdrlen))
return (-1);
}
/* If app asked for dest headers and it has changed ... */
if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
(ipp->ipp_fields & IPPF_DSTOPTS),
ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
if (hdrlen == 0)
hdrlen = sizeof (struct T_unitdata_ind);
addflag.crb_ipv6_recvdstopts = 1;
if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
&sctp->sctp_dstoptslen,
(ipp->ipp_fields & IPPF_DSTOPTS),
ipp->ipp_dstopts, ipp->ipp_dstoptslen))
return (-1);
}
noancillary:
/* Nothing to add */
if (hdrlen == 0)
return (-1);
mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
if (mp1 == NULL)
return (-1);
mp1->b_cont = *mp;
*mp = mp1;
mp1->b_rptr += sizeof (void *); /* pointer worth of padding */
mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
DB_TYPE(mp1) = M_PROTO;
tudi = (struct T_unitdata_ind *)mp1->b_rptr;
tudi->PRIM_type = T_UNITDATA_IND;
tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
tudi->SRC_offset = sizeof (*tudi);
tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
tudi->OPT_length = optlen;
if (sin4) {
bcopy(sin4, tudi + 1, sizeof (*sin4));
} else {
bcopy(sin6, tudi + 1, sizeof (*sin6));
}
optptr = (uchar_t *)tudi + tudi->OPT_offset;
if (sctp->sctp_recvsndrcvinfo) {
/* XXX need backout method if memory allocation fails. */
struct sctp_sndrcvinfo *sri;
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_SCTP;
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
optptr += sizeof (*cmsg);
sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
ASSERT(OK_32PTR(sri));
sri->sinfo_stream = ntohs(dcp->sdh_sid);
sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
if (SCTP_DATA_GET_UBIT(dcp)) {
sri->sinfo_flags = MSG_UNORDERED;
} else {
sri->sinfo_flags = 0;
}
sri->sinfo_ppid = dcp->sdh_payload_id;
sri->sinfo_context = 0;
sri->sinfo_timetolive = 0;
sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
sri->sinfo_cumtsn = sctp->sctp_ftsn;
sri->sinfo_assoc_id = 0;
optptr += sizeof (*sri);
}
/*
* If app asked for pktinfo and the index has changed ...
* Note that the local address never changes for the connection.
*/
if (addflag.crb_ip_recvpktinfo) {
struct in6_pktinfo *pkti;
uint_t ifindex;
ifindex = ira->ira_ruifindex;
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_PKTINFO;
cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
optptr += sizeof (*cmsg);
pkti = (struct in6_pktinfo *)optptr;
if (connp->conn_family == AF_INET6)
pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
else
IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
&pkti->ipi6_addr);
pkti->ipi6_ifindex = ifindex;
optptr += sizeof (*pkti);
ASSERT(OK_32PTR(optptr));
/* Save as "last" value */
sctp->sctp_recvifindex = ifindex;
}
/* If app asked for hoplimit and it has changed ... */
if (addflag.crb_ipv6_recvhoplimit) {
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_HOPLIMIT;
cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
optptr += sizeof (*cmsg);
*(uint_t *)optptr = ipp->ipp_hoplimit;
optptr += sizeof (uint_t);
ASSERT(OK_32PTR(optptr));
/* Save as "last" value */
sctp->sctp_recvhops = ipp->ipp_hoplimit;
}
/* If app asked for tclass and it has changed ... */
if (addflag.crb_ipv6_recvtclass) {
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_TCLASS;
cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
optptr += sizeof (*cmsg);
*(uint_t *)optptr = ipp->ipp_tclass;
optptr += sizeof (uint_t);
ASSERT(OK_32PTR(optptr));
/* Save as "last" value */
sctp->sctp_recvtclass = ipp->ipp_tclass;
}
if (addflag.crb_ipv6_recvhopopts) {
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_HOPOPTS;
cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
optptr += sizeof (*cmsg);
bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
optptr += ipp->ipp_hopoptslen;
ASSERT(OK_32PTR(optptr));
/* Save as last value */
ip_savebuf((void **)&sctp->sctp_hopopts,
&sctp->sctp_hopoptslen,
(ipp->ipp_fields & IPPF_HOPOPTS),
ipp->ipp_hopopts, ipp->ipp_hopoptslen);
}
if (addflag.crb_ipv6_recvrthdrdstopts) {
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
optptr += sizeof (*cmsg);
bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
optptr += ipp->ipp_rthdrdstoptslen;
ASSERT(OK_32PTR(optptr));
/* Save as last value */
ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
&sctp->sctp_rthdrdstoptslen,
(ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
}
if (addflag.crb_ipv6_recvrthdr) {
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_RTHDR;
cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
optptr += sizeof (*cmsg);
bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
optptr += ipp->ipp_rthdrlen;
ASSERT(OK_32PTR(optptr));
/* Save as last value */
ip_savebuf((void **)&sctp->sctp_rthdr,
&sctp->sctp_rthdrlen,
(ipp->ipp_fields & IPPF_RTHDR),
ipp->ipp_rthdr, ipp->ipp_rthdrlen);
}
if (addflag.crb_ipv6_recvdstopts) {
cmsg = (struct cmsghdr *)optptr;
cmsg->cmsg_level = IPPROTO_IPV6;
cmsg->cmsg_type = IPV6_DSTOPTS;
cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
optptr += sizeof (*cmsg);
bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
optptr += ipp->ipp_dstoptslen;
ASSERT(OK_32PTR(optptr));
/* Save as last value */
ip_savebuf((void **)&sctp->sctp_dstopts,
&sctp->sctp_dstoptslen,
(ipp->ipp_fields & IPPF_DSTOPTS),
ipp->ipp_dstopts, ipp->ipp_dstoptslen);
}
ASSERT(optptr == mp1->b_wptr);
return (0);
}
void
sctp_free_reass(sctp_instr_t *sip)
{
mblk_t *mp, *mpnext, *mctl;
for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
mpnext = mp->b_next;
mp->b_next = NULL;
mp->b_prev = NULL;
if (DB_TYPE(mp) == M_CTL) {
mctl = mp;
ASSERT(mp->b_cont != NULL);
mp = mp->b_cont;
mctl->b_cont = NULL;
freeb(mctl);
}
freemsg(mp);
}
}
/*
* If the series of data fragments of which dmp is a part is successfully
* reassembled, the first mblk in the series is returned. dc is adjusted
* to point at the data chunk in the lead mblk, and b_rptr also points to
* the data chunk; the following mblk's b_rptr's point at the actual payload.
*
* If the series is not yet reassembled, NULL is returned. dc is not changed.
* XXX should probably move this up into the state machine.
*/
/* Fragment list for un-ordered messages. Partial delivery is not supported */
static mblk_t *
sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
{
mblk_t *hmp;
mblk_t *begin = NULL;
mblk_t *end = NULL;
sctp_data_hdr_t *qdc;
uint32_t ntsn;
uint32_t tsn = ntohl((*dc)->sdh_tsn);
#ifdef DEBUG
mblk_t *mp1;
#endif
/* First frag. */
if (sctp->sctp_uo_frags == NULL) {
sctp->sctp_uo_frags = dmp;
return (NULL);
}
hmp = sctp->sctp_uo_frags;
/*
* Insert the segment according to the TSN, fragmented unordered
* chunks are sequenced by TSN.
*/
while (hmp != NULL) {
qdc = (sctp_data_hdr_t *)hmp->b_rptr;
ntsn = ntohl(qdc->sdh_tsn);
if (SEQ_GT(ntsn, tsn)) {
if (hmp->b_prev == NULL) {
dmp->b_next = hmp;
hmp->b_prev = dmp;
sctp->sctp_uo_frags = dmp;
} else {
dmp->b_next = hmp;
dmp->b_prev = hmp->b_prev;
hmp->b_prev->b_next = dmp;
hmp->b_prev = dmp;
}
break;
}
if (hmp->b_next == NULL) {
hmp->b_next = dmp;
dmp->b_prev = hmp;
break;
}
hmp = hmp->b_next;
}
/* check if we completed a msg */
if (SCTP_DATA_GET_BBIT(*dc)) {
begin = dmp;
} else if (SCTP_DATA_GET_EBIT(*dc)) {
end = dmp;
}
/*
* We walk consecutive TSNs backwards till we get a seg. with
* the B bit
*/
if (begin == NULL) {
for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
qdc = (sctp_data_hdr_t *)hmp->b_rptr;
ntsn = ntohl(qdc->sdh_tsn);
if ((int32_t)(tsn - ntsn) > 1) {
return (NULL);
}
if (SCTP_DATA_GET_BBIT(qdc)) {
begin = hmp;
break;
}
tsn = ntsn;
}
}
tsn = ntohl((*dc)->sdh_tsn);
/*
* We walk consecutive TSNs till we get a seg. with the E bit
*/
if (end == NULL) {
for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
qdc = (sctp_data_hdr_t *)hmp->b_rptr;
ntsn = ntohl(qdc->sdh_tsn);
if ((int32_t)(ntsn - tsn) > 1) {
return (NULL);
}
if (SCTP_DATA_GET_EBIT(qdc)) {
end = hmp;
break;
}
tsn = ntsn;
}
}
if (begin == NULL || end == NULL) {
return (NULL);
}
/* Got one!, Remove the msg from the list */
if (sctp->sctp_uo_frags == begin) {
ASSERT(begin->b_prev == NULL);
sctp->sctp_uo_frags = end->b_next;
if (end->b_next != NULL)
end->b_next->b_prev = NULL;
} else {
begin->b_prev->b_next = end->b_next;
if (end->b_next != NULL)
end->b_next->b_prev = begin->b_prev;
}
begin->b_prev = NULL;
end->b_next = NULL;
/*
* Null out b_next and b_prev and chain using b_cont.
*/
dmp = end = begin;
hmp = begin->b_next;
*dc = (sctp_data_hdr_t *)begin->b_rptr;
begin->b_next = NULL;
while (hmp != NULL) {
qdc = (sctp_data_hdr_t *)hmp->b_rptr;
hmp->b_rptr = (uchar_t *)(qdc + 1);
end = hmp->b_next;
dmp->b_cont = hmp;
dmp = hmp;
if (end != NULL)
hmp->b_next = NULL;
hmp->b_prev = NULL;
hmp = end;
}
BUMP_LOCAL(sctp->sctp_reassmsgs);
#ifdef DEBUG
mp1 = begin;
while (mp1 != NULL) {
ASSERT(mp1->b_next == NULL);
ASSERT(mp1->b_prev == NULL);
mp1 = mp1->b_cont;
}
#endif
return (begin);
}
/*
* Try partial delivery.
*/
static mblk_t *
sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
sctp_data_hdr_t **dc)
{
mblk_t *mp;
mblk_t *dmp;
mblk_t *qmp;
mblk_t *prev;
sctp_data_hdr_t *qdc;
uint32_t tsn;
ASSERT(DB_TYPE(hmp) == M_CTL);
dprint(4, ("trypartial: got=%d, needed=%d\n",
(int)(srp->got), (int)(srp->needed)));
mp = hmp->b_cont;
qdc = (sctp_data_hdr_t *)mp->b_rptr;
ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->hasBchunk);
tsn = ntohl(qdc->sdh_tsn) + 1;
/*
* This loop has two exit conditions: the
* end of received chunks has been reached, or
* there is a break in the sequence. We want
* to chop the reassembly list as follows (the
* numbers are TSNs):
* 10 -> 11 -> (end of chunks)
* 10 -> 11 -> | 13 (break in sequence)
*/
prev = mp;
mp = mp->b_cont;
while (mp != NULL) {
qdc = (sctp_data_hdr_t *)mp->b_rptr;
if (ntohl(qdc->sdh_tsn) != tsn)
break;
prev = mp;
mp = mp->b_cont;
tsn++;
}
/*
* We are sending all the fragments upstream, we have to retain
* the srp info for further fragments.
*/
if (mp == NULL) {
dmp = hmp->b_cont;
hmp->b_cont = NULL;
srp->nexttsn = tsn;
srp->msglen = 0;
srp->needed = 0;
srp->got = 0;
srp->partial_delivered = B_TRUE;
srp->tail = NULL;
} else {
dmp = hmp->b_cont;
hmp->b_cont = mp;
}
srp->hasBchunk = B_FALSE;
/*
* mp now points at the last chunk in the sequence,
* and prev points to mp's previous in the list.
* We chop the list at prev, and convert mp into the
* new list head by setting the B bit. Subsequence
* fragment deliveries will follow the normal reassembly
* path.
*/
prev->b_cont = NULL;
srp->partial_delivered = B_TRUE;
dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
(int)(srp->got), (int)(srp->needed)));
/*
* Adjust all mblk's except the lead so their rptr's point to the
* payload. sctp_data_chunk() will need to process the lead's
* data chunk section, so leave it's rptr pointing at the data chunk.
*/
*dc = (sctp_data_hdr_t *)dmp->b_rptr;
if (srp->tail != NULL) {
srp->got--;
ASSERT(srp->got != 0);
if (srp->needed != 0) {
srp->needed--;
ASSERT(srp->needed != 0);
}
srp->msglen -= ntohs((*dc)->sdh_len);
}
for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
qdc = (sctp_data_hdr_t *)qmp->b_rptr;
qmp->b_rptr = (uchar_t *)(qdc + 1);
/*
* Deduct the balance from got and needed here, now that
* we know we are actually delivering these data.
*/
if (srp->tail != NULL) {
srp->got--;
ASSERT(srp->got != 0);
if (srp->needed != 0) {
srp->needed--;
ASSERT(srp->needed != 0);
}
srp->msglen -= ntohs(qdc->sdh_len);
}
}
ASSERT(srp->msglen == 0);
BUMP_LOCAL(sctp->sctp_reassmsgs);
return (dmp);
}
/*
* Fragment list for ordered messages.
* If no error occures, error is set to 0. If we run out of memory, error
* is set to 1. If the peer commits a fatal error (like using different
* sequence numbers for the same data fragment series), the association is
* aborted and error is set to 2. tpfinished indicates whether we have
* assembled a complete message, this is used in sctp_data_chunk() to
* see if we can try to send any queued message for this stream.
*/
static mblk_t *
sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
sctp_instr_t *sip, boolean_t *tpfinished)
{
mblk_t *hmp;
mblk_t *pmp;
mblk_t *qmp;
mblk_t *first_mp;
sctp_reass_t *srp;
sctp_data_hdr_t *qdc;
sctp_data_hdr_t *bdc;
sctp_data_hdr_t *edc;
uint32_t tsn;
uint16_t fraglen = 0;
*error = 0;
/* find the reassembly queue for this data chunk */
hmp = qmp = sip->istr_reass;
for (; hmp != NULL; hmp = hmp->b_next) {
srp = (sctp_reass_t *)DB_BASE(hmp);
if (ntohs((*dc)->sdh_ssn) == srp->ssn)
goto foundit;
else if (SSN_GT(srp->ssn, ntohs((*dc)->sdh_ssn)))
break;
qmp = hmp;
}
/*
* Allocate a M_CTL that will contain information about this
* fragmented message.
*/
if ((pmp = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
*error = 1;
return (NULL);
}
DB_TYPE(pmp) = M_CTL;
srp = (sctp_reass_t *)DB_BASE(pmp);
pmp->b_cont = dmp;
if (hmp != NULL) {
if (sip->istr_reass == hmp) {
sip->istr_reass = pmp;
pmp->b_next = hmp;
pmp->b_prev = NULL;
hmp->b_prev = pmp;
} else {
qmp->b_next = pmp;
pmp->b_prev = qmp;
pmp->b_next = hmp;
hmp->b_prev = pmp;
}
} else {
/* make a new reass head and stick it on the end */
if (sip->istr_reass == NULL) {
sip->istr_reass = pmp;
pmp->b_prev = NULL;
} else {
qmp->b_next = pmp;
pmp->b_prev = qmp;
}
pmp->b_next = NULL;
}
srp->partial_delivered = B_FALSE;
srp->ssn = ntohs((*dc)->sdh_ssn);
empty_srp:
srp->needed = 0;
srp->got = 1;
srp->tail = dmp;
if (SCTP_DATA_GET_BBIT(*dc)) {
srp->msglen = ntohs((*dc)->sdh_len);
srp->nexttsn = ntohl((*dc)->sdh_tsn) + 1;
srp->hasBchunk = B_TRUE;
} else if (srp->partial_delivered &&
srp->nexttsn == ntohl((*dc)->sdh_tsn)) {
SCTP_DATA_SET_BBIT(*dc);
/* Last fragment */
if (SCTP_DATA_GET_EBIT(*dc)) {
srp->needed = 1;
goto frag_done;
}
srp->hasBchunk = B_TRUE;
srp->msglen = ntohs((*dc)->sdh_len);
srp->nexttsn++;
}
return (NULL);
foundit:
/*
* else already have a reassembly queue. Insert the new data chunk
* in the reassemble queue. Try the tail first, on the assumption
* that the fragments are coming in in order.
*/
qmp = srp->tail;
/*
* This means the message was partially delivered.
*/
if (qmp == NULL) {
ASSERT(srp->got == 0 && srp->needed == 0 &&
srp->partial_delivered);
ASSERT(hmp->b_cont == NULL);
hmp->b_cont = dmp;
goto empty_srp;
}
qdc = (sctp_data_hdr_t *)qmp->b_rptr;
ASSERT(qmp->b_cont == NULL);
/* XXXIs it fine to do this just here? */
if ((*dc)->sdh_sid != qdc->sdh_sid) {
/* our peer is fatally confused; XXX abort the assc */
*error = 2;
return (NULL);
}
if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
qmp->b_cont = dmp;
srp->tail = dmp;
dmp->b_cont = NULL;
if (srp->hasBchunk && srp->nexttsn == ntohl((*dc)->sdh_tsn)) {
srp->msglen += ntohs((*dc)->sdh_len);
srp->nexttsn++;
}
goto inserted;
}
/* Next check for insertion at the beginning */
qmp = hmp->b_cont;
qdc = (sctp_data_hdr_t *)qmp->b_rptr;
if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
dmp->b_cont = qmp;
hmp->b_cont = dmp;
if (SCTP_DATA_GET_BBIT(*dc)) {
srp->hasBchunk = B_TRUE;
srp->nexttsn = ntohl((*dc)->sdh_tsn);
}
goto preinserted;
}
/* Insert somewhere in the middle */
for (;;) {
/* Tail check above should have caught this */
ASSERT(qmp->b_cont != NULL);
qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
/* insert here */
dmp->b_cont = qmp->b_cont;
qmp->b_cont = dmp;
break;
}
qmp = qmp->b_cont;
}
preinserted:
if (!srp->hasBchunk || ntohl((*dc)->sdh_tsn) != srp->nexttsn)
goto inserted;
/*
* fraglen contains the length of consecutive chunks of fragments.
* starting from the chunk inserted recently.
*/
tsn = srp->nexttsn;
for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
qdc = (sctp_data_hdr_t *)qmp->b_rptr;
if (tsn != ntohl(qdc->sdh_tsn))
break;
fraglen += ntohs(qdc->sdh_len);
tsn++;
}
srp->nexttsn = tsn;
srp->msglen += fraglen;
inserted:
srp->got++;
first_mp = hmp->b_cont;
if (srp->needed == 0) {
/* check if we have the first and last fragments */
bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
edc = (sctp_data_hdr_t *)srp->tail->b_rptr;
/* calculate how many fragments are needed, if possible */
if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
srp->needed = ntohl(edc->sdh_tsn) -
ntohl(bdc->sdh_tsn) + 1;
}
}
/*
* Try partial delivery if the message length has exceeded the
* partial delivery point. Only do this if we can immediately
* deliver the partially assembled message, and only partially
* deliver one message at a time (i.e. messages cannot be
* intermixed arriving at the upper layer). A simple way to
* enforce this is to only try partial delivery if this TSN is
* the next expected TSN. Partial Delivery not supported
* for un-ordered message.
*/
if (srp->needed != srp->got) {
dmp = NULL;
if (ntohl((*dc)->sdh_tsn) == sctp->sctp_ftsn &&
srp->msglen >= sctp->sctp_pd_point) {
dmp = sctp_try_partial_delivery(sctp, hmp, srp, dc);
*tpfinished = B_FALSE;
}
return (dmp);
}
frag_done:
/*
* else reassembly done; prepare the data for delivery.
* First unlink hmp from the ssn list.
*/
if (sip->istr_reass == hmp) {
sip->istr_reass = hmp->b_next;
if (hmp->b_next)
hmp->b_next->b_prev = NULL;
} else {
ASSERT(hmp->b_prev != NULL);
hmp->b_prev->b_next = hmp->b_next;
if (hmp->b_next)
hmp->b_next->b_prev = hmp->b_prev;
}
/*
* Using b_prev and b_next was a little sinful, but OK since
* this mblk is never put*'d. However, freeb() will still
* ASSERT that they are unused, so we need to NULL them out now.
*/
hmp->b_next = NULL;
hmp->b_prev = NULL;
dmp = hmp;
dmp = dmp->b_cont;
hmp->b_cont = NULL;
freeb(hmp);
*tpfinished = B_TRUE;
/*
* Adjust all mblk's except the lead so their rptr's point to the
* payload. sctp_data_chunk() will need to process the lead's
* data chunk section, so leave it's rptr pointing at the data chunk.
*/
*dc = (sctp_data_hdr_t *)dmp->b_rptr;
for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
qdc = (sctp_data_hdr_t *)qmp->b_rptr;
qmp->b_rptr = (uchar_t *)(qdc + 1);
}
BUMP_LOCAL(sctp->sctp_reassmsgs);
return (dmp);
}
static void
sctp_add_dup(uint32_t tsn, mblk_t **dups)
{
mblk_t *mp;
size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
if (dups == NULL) {
return;
}
/* first time? */
if (*dups == NULL) {
*dups = allocb(bsize, BPRI_MED);
if (*dups == NULL) {
return;
}
}
mp = *dups;
if ((mp->b_wptr - mp->b_rptr) >= bsize) {
/* maximum reached */
return;
}
/* add the duplicate tsn */
bcopy(&tsn, mp->b_wptr, sizeof (tsn));
mp->b_wptr += sizeof (tsn);
ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
}
static void
sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
{
sctp_data_hdr_t *dc;
mblk_t *dmp, *pmp;
sctp_instr_t *instr;
int ubit;
int isfrag;
uint16_t ssn;
uint32_t oftsn;
boolean_t can_deliver = B_TRUE;
uint32_t tsn;
int dlen;
boolean_t tpfinished = B_TRUE;
int32_t new_rwnd;
sctp_stack_t *sctps = sctp->sctp_sctps;
int error;
/* The following are used multiple times, so we inline them */
#define SCTP_ACK_IT(sctp, tsn) \
if (tsn == sctp->sctp_ftsn) { \
dprint(2, ("data_chunk: acking next %x\n", tsn)); \
(sctp)->sctp_ftsn++; \
if ((sctp)->sctp_sack_gaps > 0) \
(sctp)->sctp_force_sack = 1; \
} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) { \
/* Got a gap; record it */ \
BUMP_LOCAL(sctp->sctp_outseqtsns); \
dprint(2, ("data_chunk: acking gap %x\n", tsn)); \
sctp_ack_add(&sctp->sctp_sack_info, tsn, \
&sctp->sctp_sack_gaps); \
sctp->sctp_force_sack = 1; \
}
dmp = NULL;
dc = (sctp_data_hdr_t *)ch;
tsn = ntohl(dc->sdh_tsn);
dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
/* Check for duplicates */
if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
BUMP_LOCAL(sctp->sctp_idupchunks);
sctp->sctp_force_sack = 1;
sctp_add_dup(dc->sdh_tsn, dups);
return;
}
if (sctp->sctp_sack_info != NULL) {
sctp_set_t *sp;
for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
dprint(4,
("sctp_data_chunk: dropping dup > "
"cumtsn\n"));
BUMP_LOCAL(sctp->sctp_idupchunks);
sctp->sctp_force_sack = 1;
sctp_add_dup(dc->sdh_tsn, dups);
return;
}
}
}
/* We cannot deliver anything up now but we still need to handle it. */
if (SCTP_IS_DETACHED(sctp)) {
BUMP_MIB(&sctps->sctps_mib, sctpInClosed);
can_deliver = B_FALSE;
}
dlen = ntohs(dc->sdh_len) - sizeof (*dc);
/*
* Check for buffer space. Note if this is the next expected TSN
* we have to take it to avoid deadlock because we cannot deliver
* later queued TSNs and thus clear buffer space without it.
* We drop anything that is purely zero window probe data here.
*/
if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
(tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
/* Drop and SACK, but don't advance the cumulative TSN. */
sctp->sctp_force_sack = 1;
dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
"dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
ntohl(dc->sdh_tsn)));
return;
}
if (ntohs(dc->sdh_sid) >= sctp->sctp_num_istr) {
sctp_bsc_t inval_parm;
/* Will populate the CAUSE block in the ERROR chunk. */
inval_parm.bsc_sid = dc->sdh_sid;
/* RESERVED, ignored at the receiving end */
inval_parm.bsc_pad = 0;
/* ack and drop it */
sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
sizeof (sctp_bsc_t), fp);
SCTP_ACK_IT(sctp, tsn);
return;
}
ubit = SCTP_DATA_GET_UBIT(dc);
ASSERT(sctp->sctp_instr != NULL);
instr = &sctp->sctp_instr[ntohs(dc->sdh_sid)];
/* Initialize the stream, if not yet used */
if (instr->sctp == NULL)
instr->sctp = sctp;
isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
ssn = ntohs(dc->sdh_ssn);
dmp = dupb(mp);
if (dmp == NULL) {
/* drop it and don't ack it, causing the peer to retransmit */
return;
}
dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
sctp->sctp_rxqueued += dlen;
oftsn = sctp->sctp_ftsn;
if (isfrag) {
error = 0;
/* fragmented data chunk */
dmp->b_rptr = (uchar_t *)dc;
if (ubit) {
dmp = sctp_uodata_frag(sctp, dmp, &dc);
#if DEBUG
if (dmp != NULL) {
ASSERT(instr ==
&sctp->sctp_instr[ntohs(dc->sdh_sid)]);
}
#endif
} else {
dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
&tpfinished);
}
if (error != 0) {
sctp->sctp_rxqueued -= dlen;
if (error == 1) {
/*
* out of memory; don't ack it so
* the peer retransmits
*/
return;
} else if (error == 2) {
/*
* fatal error (i.e. peer used different
* ssn's for same fragmented data) --
* the association has been aborted.
* XXX need to return errval so state
* machine can also abort processing.
*/
dprint(0, ("error 2: must not happen!\n"));
return;
}
}
if (dmp == NULL) {
/*
* Can't process this data now, but the cumulative
* TSN may be advanced, so do the checks at done.
*/
SCTP_ACK_IT(sctp, tsn);
goto done;
}
}
/*
* Insert complete messages in correct order for ordered delivery.
* tpfinished is true when the incoming chunk contains a complete
* message or is the final missing fragment which completed a message.
*/
if (!ubit && tpfinished && ssn != instr->nextseq) {
/* Adjust rptr to point at the data chunk for compares */
dmp->b_rptr = (uchar_t *)dc;
dprint(2,
("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
if (instr->istr_msgs == NULL) {
instr->istr_msgs = dmp;
ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
} else {
mblk_t *imblk = instr->istr_msgs;
sctp_data_hdr_t *idc;
/*
* XXXNeed to take sequence wraps into account,
* ... and a more efficient insertion algo.
*/
for (;;) {
idc = (sctp_data_hdr_t *)imblk->b_rptr;
if (SSN_GT(ntohs(idc->sdh_ssn),
ntohs(dc->sdh_ssn))) {
if (instr->istr_msgs == imblk) {
instr->istr_msgs = dmp;
dmp->b_next = imblk;
imblk->b_prev = dmp;
} else {
ASSERT(imblk->b_prev != NULL);
imblk->b_prev->b_next = dmp;
dmp->b_prev = imblk->b_prev;
imblk->b_prev = dmp;
dmp->b_next = imblk;
}
break;
}
if (imblk->b_next == NULL) {
imblk->b_next = dmp;
dmp->b_prev = imblk;
break;
}
imblk = imblk->b_next;
}
}
(instr->istr_nmsgs)++;
(sctp->sctp_istr_nmsgs)++;
SCTP_ACK_IT(sctp, tsn);
return;
}
/*
* Else we can deliver the data directly. Recalculate
* dlen now since we may have reassembled data.
*/
dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
dlen += MBLKL(pmp);
ASSERT(sctp->sctp_rxqueued >= dlen);
/* Deliver the message. */
sctp->sctp_rxqueued -= dlen;
if (can_deliver) {
dmp->b_rptr = (uchar_t *)(dc + 1);
if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
ipp, ira) == 0) {
dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
msgdsize(dmp)));
sctp->sctp_rwnd -= dlen;
/*
* Override b_flag for SCTP sockfs internal use
*/
dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
msgdsize(dmp), 0, &error, NULL);
/*
* Since we always deliver the next TSN data chunk,
* we may buffer a little more than allowed. In
* that case, just mark the window as 0.
*/
if (new_rwnd < 0)
sctp->sctp_rwnd = 0;
else if (new_rwnd > sctp->sctp_rwnd)
sctp->sctp_rwnd = new_rwnd;
SCTP_ACK_IT(sctp, tsn);
} else {
/* Just free the message if we don't have memory. */
freemsg(dmp);
return;
}
} else {
/* About to free the data */
freemsg(dmp);
SCTP_ACK_IT(sctp, tsn);
}
/*
* data, now enqueued, may already have been processed and free'd
* by the ULP (or we may have just freed it above, if we could not
* deliver it), so we must not reference it (this is why we kept
* the ssn and ubit above).
*/
if (ubit != 0) {
BUMP_LOCAL(sctp->sctp_iudchunks);
goto done;
}
BUMP_LOCAL(sctp->sctp_idchunks);
/*
* If there was a partial delivery and it has not finished,
* don't pull anything from the pqueues.
*/
if (!tpfinished) {
goto done;
}
instr->nextseq = ssn + 1;
/* Deliver any successive data chunks in the instr queue */
while (instr->istr_nmsgs > 0) {
dmp = (mblk_t *)instr->istr_msgs;
dc = (sctp_data_hdr_t *)dmp->b_rptr;
ssn = ntohs(dc->sdh_ssn);
/* Gap in the sequence */
if (ssn != instr->nextseq)
break;
/* Else deliver the data */
(instr->istr_nmsgs)--;
(instr->nextseq)++;
(sctp->sctp_istr_nmsgs)--;
instr->istr_msgs = instr->istr_msgs->b_next;
if (instr->istr_msgs != NULL)
instr->istr_msgs->b_prev = NULL;
dmp->b_next = dmp->b_prev = NULL;
dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
ntohl(dc->sdh_tsn), (int)ssn));
/*
* If this chunk was reassembled, each b_cont represents
* another TSN; advance ftsn now.
*/
dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
dlen += MBLKL(pmp);
ASSERT(sctp->sctp_rxqueued >= dlen);
sctp->sctp_rxqueued -= dlen;
if (can_deliver) {
dmp->b_rptr = (uchar_t *)(dc + 1);
if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
ipp, ira) == 0) {
dprint(1, ("sctp_data_chunk: delivering %lu "
"bytes\n", msgdsize(dmp)));
sctp->sctp_rwnd -= dlen;
/*
* Override b_flag for SCTP sockfs internal use
*/
dmp->b_flag = tpfinished ?
0 : SCTP_PARTIAL_DATA;
new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd,
dmp, msgdsize(dmp), 0, &error, NULL);
if (new_rwnd < 0)
sctp->sctp_rwnd = 0;
else if (new_rwnd > sctp->sctp_rwnd)
sctp->sctp_rwnd = new_rwnd;
SCTP_ACK_IT(sctp, tsn);
} else {
freemsg(dmp);
return;
}
} else {
/* About to free the data */
freemsg(dmp);
SCTP_ACK_IT(sctp, tsn);
}
}
done:
/*
* If there are gap reports pending, check if advancing
* the ftsn here closes a gap. If so, we can advance
* ftsn to the end of the set.
*/
if (sctp->sctp_sack_info != NULL &&
sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
}
/*
* If ftsn has moved forward, maybe we can remove gap reports.
* NB: dmp may now be NULL, so don't dereference it here.
*/
if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
&sctp->sctp_sack_gaps);
dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
}
#ifdef DEBUG
if (sctp->sctp_sack_info != NULL) {
ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
}
#endif
#undef SCTP_ACK_IT
}
void
sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
{
sctp_chunk_hdr_t *sch;
sctp_sack_chunk_t *sc;
sctp_sack_frag_t *sf;
uint16_t num_gaps = sctp->sctp_sack_gaps;
sctp_set_t *sp;
/* Chunk hdr */
sch = (sctp_chunk_hdr_t *)dst;
sch->sch_id = CHUNK_SACK;
sch->sch_flags = 0;
sch->sch_len = htons(sacklen);
/* SACK chunk */
sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
sc = (sctp_sack_chunk_t *)(sch + 1);
sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
} else {
sc->ssc_a_rwnd = 0;
}
sc->ssc_numfrags = htons(num_gaps);
sc->ssc_numdups = 0;
/* lay in gap reports */
sf = (sctp_sack_frag_t *)(sc + 1);
for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
uint16_t offset;
/* start */
if (sp->begin > sctp->sctp_lastacked) {
offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
} else {
/* sequence number wrap */
offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
sp->begin);
}
sf->ssf_start = htons(offset);
/* end */
if (sp->end >= sp->begin) {
offset += (uint16_t)(sp->end - sp->begin);
} else {
/* sequence number wrap */
offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
}
sf->ssf_end = htons(offset);
sf++;
/* This is just for debugging (a la the following assertion) */
num_gaps--;
}
ASSERT(num_gaps == 0);
/* If the SACK timer is running, stop it */
if (sctp->sctp_ack_timer_running) {
sctp_timer_stop(sctp->sctp_ack_mp);
sctp->sctp_ack_timer_running = B_FALSE;
}
BUMP_LOCAL(sctp->sctp_obchunks);
BUMP_LOCAL(sctp->sctp_osacks);
}
mblk_t *
sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
{
mblk_t *smp;
size_t slen;
sctp_chunk_hdr_t *sch;
sctp_sack_chunk_t *sc;
int32_t acks_max;
sctp_stack_t *sctps = sctp->sctp_sctps;
uint32_t dups_len;
sctp_faddr_t *fp;
ASSERT(sendto != NULL);
if (sctp->sctp_force_sack) {
sctp->sctp_force_sack = 0;
goto checks_done;
}
acks_max = sctps->sctps_deferred_acks_max;
if (sctp->sctp_state == SCTPS_ESTABLISHED) {
if (sctp->sctp_sack_toggle < acks_max) {
/* no need to SACK right now */
dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
(void *)sctp));
return (NULL);
} else if (sctp->sctp_sack_toggle >= acks_max) {
sctp->sctp_sack_toggle = 0;
}
}
if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
dprint(2, ("sctp_make_sack: %p no sack (already)\n",
(void *)sctp));
return (NULL);
}
checks_done:
dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
if (dups != NULL)
dups_len = MBLKL(dups);
else
dups_len = 0;
slen = sizeof (*sch) + sizeof (*sc) +
(sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
/*
* If there are error chunks, check and see if we can send the
* SACK chunk and error chunks together in one packet. If not,
* send the error chunks out now.
*/
if (sctp->sctp_err_chunks != NULL) {
fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
if (sctp->sctp_err_len + slen + dups_len > fp->sfa_pmss) {
if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
SCTP_KSTAT(sctps, sctp_send_err_failed);
SCTP_KSTAT(sctps, sctp_send_sack_failed);
freemsg(sctp->sctp_err_chunks);
sctp->sctp_err_chunks = NULL;
sctp->sctp_err_len = 0;
return (NULL);
}
smp->b_cont = sctp->sctp_err_chunks;
sctp_set_iplen(sctp, smp, fp->ixa);
(void) conn_ip_output(smp, fp->ixa);
BUMP_LOCAL(sctp->sctp_opkts);
sctp->sctp_err_chunks = NULL;
sctp->sctp_err_len = 0;
}
}
smp = sctp_make_mp(sctp, sendto, slen);
if (smp == NULL) {
SCTP_KSTAT(sctps, sctp_send_sack_failed);
return (NULL);
}
sch = (sctp_chunk_hdr_t *)smp->b_wptr;
sctp_fill_sack(sctp, smp->b_wptr, slen);
smp->b_wptr += slen;
if (dups != NULL) {
sc = (sctp_sack_chunk_t *)(sch + 1);
sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
sch->sch_len = htons(slen + dups_len);
smp->b_cont = dups;
}
if (sctp->sctp_err_chunks != NULL) {
linkb(smp, sctp->sctp_err_chunks);
sctp->sctp_err_chunks = NULL;
sctp->sctp_err_len = 0;
}
return (smp);
}
/*
* Check and see if we need to send a SACK chunk. If it is needed,
* send it out. Return true if a SACK chunk is sent, false otherwise.
*/
boolean_t
sctp_sack(sctp_t *sctp, mblk_t *dups)
{
mblk_t *smp;
sctp_stack_t *sctps = sctp->sctp_sctps;
/* If we are shutting down, let send_shutdown() bundle the SACK */
if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
sctp_send_shutdown(sctp, 0);
}
ASSERT(sctp->sctp_lastdata != NULL);
if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
/* The caller of sctp_sack() will not free the dups mblk. */
if (dups != NULL)
freeb(dups);
return (B_FALSE);
}
dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
(void *)sctp->sctp_lastdata,
SCTP_PRINTADDR(sctp->sctp_lastdata->faddr)));
sctp->sctp_active = lbolt64;
BUMP_MIB(&sctps->sctps_mib, sctpOutAck);
sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->ixa);
(void) conn_ip_output(smp, sctp->sctp_lastdata->ixa);
BUMP_LOCAL(sctp->sctp_opkts);
return (B_TRUE);
}
/*
* This is called if we have a message that was partially sent and is
* abandoned. The cum TSN will be the last chunk sent for this message,
* subsequent chunks will be marked ABANDONED. We send a Forward TSN
* chunk in this case with the TSN of the last sent chunk so that the
* peer can clean up its fragment list for this message. This message
* will be removed from the transmit list when the peer sends a SACK
* back.
*/
int
sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
{
sctp_data_hdr_t *dh;
mblk_t *nmp;
mblk_t *head;
int32_t unsent = 0;
mblk_t *mp1 = meta->b_cont;
uint32_t adv_pap = sctp->sctp_adv_pap;
sctp_faddr_t *fp = sctp->sctp_current;
sctp_stack_t *sctps = sctp->sctp_sctps;
dh = (sctp_data_hdr_t *)mp1->b_rptr;
if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
sctp_ftsn_set_t *sets = NULL;
uint_t nsets = 0;
uint32_t seglen = sizeof (uint32_t);
boolean_t ubit = SCTP_DATA_GET_UBIT(dh);
while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
mp1 = mp1->b_next;
dh = (sctp_data_hdr_t *)mp1->b_rptr;
sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
if (!ubit &&
!sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
sctp->sctp_adv_pap = adv_pap;
return (ENOMEM);
}
nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
sctp_free_ftsn_set(sets);
if (nmp == NULL) {
sctp->sctp_adv_pap = adv_pap;
return (ENOMEM);
}
head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
if (head == NULL) {
sctp->sctp_adv_pap = adv_pap;
freemsg(nmp);
SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
return (ENOMEM);
}
SCTP_MSG_SET_ABANDONED(meta);
sctp_set_iplen(sctp, head, fp->ixa);
(void) conn_ip_output(head, fp->ixa);
BUMP_LOCAL(sctp->sctp_opkts);
if (!fp->timer_running)
SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
mp1 = mp1->b_next;
while (mp1 != NULL) {
ASSERT(!SCTP_CHUNK_ISSENT(mp1));
ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
SCTP_ABANDON_CHUNK(mp1);
dh = (sctp_data_hdr_t *)mp1->b_rptr;
unsent += ntohs(dh->sdh_len) - sizeof (*dh);
mp1 = mp1->b_next;
}
ASSERT(sctp->sctp_unsent >= unsent);
sctp->sctp_unsent -= unsent;
/*
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
if (!SCTP_IS_DETACHED(sctp))
SCTP_TXQ_UPDATE(sctp);
return (0);
}
return (-1);
}
uint32_t
sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
{
mblk_t *ump, *nump, *mp = NULL;
uint16_t chunklen;
uint32_t xtsn;
sctp_faddr_t *fp;
sctp_data_hdr_t *sdc;
uint32_t cumack_forward = 0;
sctp_msg_hdr_t *mhdr;
sctp_stack_t *sctps = sctp->sctp_sctps;
ump = sctp->sctp_xmit_head;
/*
* Free messages only when they're completely acked.
*/
while (ump != NULL) {
mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
if (SCTP_CHUNK_ABANDONED(mp)) {
ASSERT(SCTP_IS_MSG_ABANDONED(ump));
mp = NULL;
break;
}
/*
* We check for abandoned message if we are PR-SCTP
* aware, if this is not the first chunk in the
* message (b_cont) and if the message is marked
* abandoned.
*/
if (!SCTP_CHUNK_ISSENT(mp)) {
if (sctp->sctp_prsctp_aware &&
mp != ump->b_cont &&
(SCTP_IS_MSG_ABANDONED(ump) ||
SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
sctp))) {
(void) sctp_check_abandoned_msg(sctp,
ump);
}
goto cum_ack_done;
}
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
continue;
if (SEQ_GEQ(tsn, xtsn)) {
fp = SCTP_CHUNK_DEST(mp);
chunklen = ntohs(sdc->sdh_len);
if (sctp->sctp_out_time != 0 &&
xtsn == sctp->sctp_rtt_tsn) {
/* Got a new RTT measurement */
sctp_update_rtt(sctp, fp,
lbolt64 - sctp->sctp_out_time);
sctp->sctp_out_time = 0;
}
if (SCTP_CHUNK_ISACKED(mp))
continue;
SCTP_CHUNK_SET_SACKCNT(mp, 0);
SCTP_CHUNK_ACKED(mp);
ASSERT(fp->suna >= chunklen);
fp->suna -= chunklen;
fp->acked += chunklen;
cumack_forward += chunklen;
ASSERT(sctp->sctp_unacked >=
(chunklen - sizeof (*sdc)));
sctp->sctp_unacked -=
(chunklen - sizeof (*sdc));
if (fp->suna == 0) {
/* all outstanding data acked */
fp->pba = 0;
SCTP_FADDR_TIMER_STOP(fp);
} else {
SCTP_FADDR_TIMER_RESTART(sctp, fp,
fp->rto);
}
} else {
goto cum_ack_done;
}
}
nump = ump->b_next;
if (nump != NULL)
nump->b_prev = NULL;
if (ump == sctp->sctp_xmit_tail)
sctp->sctp_xmit_tail = nump;
if (SCTP_IS_MSG_ABANDONED(ump)) {
BUMP_LOCAL(sctp->sctp_prsctpdrop);
ump->b_next = NULL;
sctp_sendfail_event(sctp, ump, 0, B_TRUE);
} else {
sctp_free_msg(ump);
}
sctp->sctp_xmit_head = ump = nump;
}
cum_ack_done:
*first_unacked = mp;
if (cumack_forward > 0) {
BUMP_MIB(&sctps->sctps_mib, sctpInAck);
if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
}
/*
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
if (!SCTP_IS_DETACHED(sctp))
SCTP_TXQ_UPDATE(sctp);
/* Time to send a shutdown? */
if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
sctp_send_shutdown(sctp, 0);
}
sctp->sctp_xmit_unacked = mp;
} else {
/* dup ack */
BUMP_MIB(&sctps->sctps_mib, sctpInDupAck);
}
sctp->sctp_lastack_rxd = tsn;
if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
return (cumack_forward);
}
static int
sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
{
uint32_t orwnd;
if (sctp->sctp_unacked > frwnd) {
sctp->sctp_frwnd = 0;
return (0);
}
orwnd = sctp->sctp_frwnd;
sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
if (orwnd < sctp->sctp_frwnd) {
return (1);
} else {
return (0);
}
}
/*
* For un-ordered messages.
* Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
* less than/equal to ftsn. Fragments for un-ordered messages are
* strictly in sequence (w.r.t TSN).
*/
static int
sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
{
mblk_t *hmp;
mblk_t *hmp_next;
sctp_data_hdr_t *dc;
int dlen = 0;
hmp = sctp->sctp_uo_frags;
while (hmp != NULL) {
hmp_next = hmp->b_next;
dc = (sctp_data_hdr_t *)hmp->b_rptr;
if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
return (dlen);
sctp->sctp_uo_frags = hmp_next;
if (hmp_next != NULL)
hmp_next->b_prev = NULL;
hmp->b_next = NULL;
dlen += ntohs(dc->sdh_len) - sizeof (*dc);
freeb(hmp);
hmp = hmp_next;
}
return (dlen);
}
/*
* For ordered messages.
* Check for existing fragments for an sid-ssn pair reported as abandoned,
* hence will not receive, in the Forward TSN. If there are fragments, then
* we just nuke them. If and when Partial Delivery API is supported, we
* would need to send a notification to the upper layer about this.
*/
static int
sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
{
sctp_reass_t *srp;
mblk_t *hmp;
mblk_t *dmp;
mblk_t *hmp_next;
sctp_data_hdr_t *dc;
int dlen = 0;
hmp = sip->istr_reass;
while (hmp != NULL) {
hmp_next = hmp->b_next;
srp = (sctp_reass_t *)DB_BASE(hmp);
if (SSN_GT(srp->ssn, ssn))
return (dlen);
/*
* If we had sent part of this message up, send a partial
* delivery event. Since this is ordered delivery, we should
* have sent partial message only for the next in sequence,
* hence the ASSERT. See comments in sctp_data_chunk() for
* trypartial.
*/
if (srp->partial_delivered) {
ASSERT(sip->nextseq == srp->ssn);
sctp_partial_delivery_event(sctp);
}
/* Take it out of the reass queue */
sip->istr_reass = hmp_next;
if (hmp_next != NULL)
hmp_next->b_prev = NULL;
hmp->b_next = NULL;
ASSERT(hmp->b_prev == NULL);
dmp = hmp;
ASSERT(DB_TYPE(hmp) == M_CTL);
dmp = hmp->b_cont;
hmp->b_cont = NULL;
freeb(hmp);
hmp = dmp;
while (dmp != NULL) {
dc = (sctp_data_hdr_t *)dmp->b_rptr;
dlen += ntohs(dc->sdh_len) - sizeof (*dc);
dmp = dmp->b_cont;
}
freemsg(hmp);
hmp = hmp_next;
}
return (dlen);
}
/*
* Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
* any SACK gaps less than the newly updated sctp_ftsn. Walk through the
* sid-ssn pair in the Forward TSN and for each, clean the fragment list
* for this pair, if needed, and check if we can deliver subsequent
* messages, if any, from the instream queue (that were waiting for this
* sid-ssn message to show up). Once we are done try to update the SACK
* info. We could get a duplicate Forward TSN, in which case just send
* a SACK. If any of the sid values in the Forward TSN is invalid,
* send back an "Invalid Stream Identifier" error and continue processing
* the rest.
*/
static void
sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
ip_pkt_t *ipp, ip_recv_attr_t *ira)
{
uint32_t *ftsn = (uint32_t *)(ch + 1);
ftsn_entry_t *ftsn_entry;
sctp_instr_t *instr;
boolean_t can_deliver = B_TRUE;
size_t dlen;
int flen;
mblk_t *dmp;
mblk_t *pmp;
sctp_data_hdr_t *dc;
ssize_t remaining;
sctp_stack_t *sctps = sctp->sctp_sctps;
*ftsn = ntohl(*ftsn);
remaining = ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
if (SCTP_IS_DETACHED(sctp)) {
BUMP_MIB(&sctps->sctps_mib, sctpInClosed);
can_deliver = B_FALSE;
}
/*
* un-ordered messages don't have SID-SSN pair entries, we check
* for any fragments (for un-ordered message) to be discarded using
* the cumulative FTSN.
*/
flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
if (flen > 0) {
ASSERT(sctp->sctp_rxqueued >= flen);
sctp->sctp_rxqueued -= flen;
}
ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
while (remaining >= sizeof (*ftsn_entry)) {
ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
sctp_bsc_t inval_parm;
/* Will populate the CAUSE block in the ERROR chunk. */
inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
/* RESERVED, ignored at the receiving end */
inval_parm.bsc_pad = 0;
sctp_add_err(sctp, SCTP_ERR_BAD_SID,
(void *)&inval_parm, sizeof (sctp_bsc_t), fp);
ftsn_entry++;
remaining -= sizeof (*ftsn_entry);
continue;
}
instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
/* Indicates frags were nuked, update rxqueued */
if (flen > 0) {
ASSERT(sctp->sctp_rxqueued >= flen);
sctp->sctp_rxqueued -= flen;
}
/*
* It is possible to receive an FTSN chunk with SSN smaller
* than then nextseq if this chunk is a retransmission because
* of incomplete processing when it was first processed.
*/
if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
instr->nextseq = ftsn_entry->ftsn_ssn + 1;
while (instr->istr_nmsgs > 0) {
mblk_t *next;
dmp = (mblk_t *)instr->istr_msgs;
dc = (sctp_data_hdr_t *)dmp->b_rptr;
if (ntohs(dc->sdh_ssn) != instr->nextseq)
break;
next = dmp->b_next;
dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
for (pmp = dmp->b_cont; pmp != NULL;
pmp = pmp->b_cont) {
dlen += MBLKL(pmp);
}
if (can_deliver) {
int32_t nrwnd;
int error;
dmp->b_rptr = (uchar_t *)(dc + 1);
dmp->b_next = NULL;
ASSERT(dmp->b_prev == NULL);
if (sctp_input_add_ancillary(sctp,
&dmp, dc, fp, ipp, ira) == 0) {
sctp->sctp_rxqueued -= dlen;
sctp->sctp_rwnd -= dlen;
/*
* Override b_flag for SCTP sockfs
* internal use
*/
dmp->b_flag = 0;
nrwnd = sctp->sctp_ulp_recv(
sctp->sctp_ulpd, dmp, msgdsize(dmp),
0, &error, NULL);
if (nrwnd < 0)
sctp->sctp_rwnd = 0;
else if (nrwnd > sctp->sctp_rwnd)
sctp->sctp_rwnd = nrwnd;
} else {
/*
* We will resume processing when
* the FTSN chunk is re-xmitted.
*/
dmp->b_rptr = (uchar_t *)dc;
dmp->b_next = next;
dprint(0,
("FTSN dequeuing %u failed\n",
ntohs(dc->sdh_ssn)));
return;
}
} else {
sctp->sctp_rxqueued -= dlen;
ASSERT(dmp->b_prev == NULL);
dmp->b_next = NULL;
freemsg(dmp);
}
instr->istr_nmsgs--;
instr->nextseq++;
sctp->sctp_istr_nmsgs--;
if (next != NULL)
next->b_prev = NULL;
instr->istr_msgs = next;
}
ftsn_entry++;
remaining -= sizeof (*ftsn_entry);
}
/* Duplicate FTSN */
if (*ftsn <= (sctp->sctp_ftsn - 1)) {
sctp->sctp_force_sack = 1;
return;
}
/* Advance cum TSN to that reported in the Forward TSN chunk */
sctp->sctp_ftsn = *ftsn + 1;
/* Remove all the SACK gaps before the new cum TSN */
if (sctp->sctp_sack_info != NULL) {
sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
&sctp->sctp_sack_gaps);
}
/*
* If there are gap reports pending, check if advancing
* the ftsn here closes a gap. If so, we can advance
* ftsn to the end of the set.
* If ftsn has moved forward, maybe we can remove gap reports.
*/
if (sctp->sctp_sack_info != NULL &&
sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
&sctp->sctp_sack_gaps);
}
}
/*
* When we have processed a SACK we check to see if we can advance the
* cumulative TSN if there are abandoned chunks immediately following
* the updated cumulative TSN. If there are, we attempt to send a
* Forward TSN chunk.
*/
static void
sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
{
mblk_t *meta = sctp->sctp_xmit_head;
mblk_t *mp;
mblk_t *nmp;
uint32_t seglen;
uint32_t adv_pap = sctp->sctp_adv_pap;
/*
* We only check in the first meta since otherwise we can't
* advance the cumulative ack point. We just look for chunks
* marked for retransmission, else we might prematurely
* send an FTSN for a sent, but unacked, chunk.
*/
for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
if (!SCTP_CHUNK_ISSENT(mp))
return;
if (SCTP_CHUNK_WANT_REXMIT(mp))
break;
}
if (mp == NULL)
return;
sctp_check_adv_ack_pt(sctp, meta, mp);
if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
if (nmp == NULL) {
sctp->sctp_adv_pap = adv_pap;
if (!fp->timer_running)
SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
return;
}
sctp_set_iplen(sctp, nmp, fp->ixa);
(void) conn_ip_output(nmp, fp->ixa);
BUMP_LOCAL(sctp->sctp_opkts);
if (!fp->timer_running)
SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
}
}
/*
* The processing here follows the same logic in sctp_got_sack(), the reason
* we do this separately is because, usually, gap blocks are ordered and
* we can process it in sctp_got_sack(). However if they aren't we would
* need to do some additional non-optimal stuff when we start processing the
* unordered gaps. To that effect sctp_got_sack() does the processing in the
* simple case and this does the same in the more involved case.
*/
static uint32_t
sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
boolean_t *fast_recovery, uint32_t fr_xtsn)
{
uint32_t xtsn;
uint32_t gapstart = 0;
uint32_t gapend = 0;
int gapcnt;
uint16_t chunklen;
sctp_data_hdr_t *sdc;
int gstart;
mblk_t *ump = umphead;
mblk_t *mp = mphead;
sctp_faddr_t *fp;
uint32_t acked = 0;
sctp_stack_t *sctps = sctp->sctp_sctps;
/*
* gstart tracks the last (in the order of TSN) gapstart that
* we process in this SACK gaps walk.
*/
gstart = ctsn;
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
if (gapstart != 0) {
/*
* If we have reached the end of the transmit list or
* hit an unsent chunk or encountered an unordered gap
* block start from the ctsn again.
*/
if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
ump = umphead;
mp = mphead;
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
}
}
gapstart = ctsn + ntohs(ssf->ssf_start);
gapend = ctsn + ntohs(ssf->ssf_end);
/*
* Sanity checks:
*
* 1. SACK for TSN we have not sent - ABORT
* 2. Invalid or spurious gaps, ignore all gaps
*/
if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
*trysend = -1;
return (acked);
} else if (SEQ_LT(gapend, gapstart) ||
SEQ_LEQ(gapstart, ctsn)) {
break;
}
/*
* The xtsn can be the TSN processed for the last gap
* (gapend) or it could be the cumulative TSN. We continue
* with the last xtsn as long as the gaps are ordered, when
* we hit an unordered gap, we re-start from the cumulative
* TSN. For the first gap it is always the cumulative TSN.
*/
while (xtsn != gapstart) {
/*
* We can't reliably check for reneged chunks
* when walking the unordered list, so we don't.
* In case the peer reneges then we will end up
* sending the reneged chunk via timeout.
*/
mp = mp->b_next;
if (mp == NULL) {
ump = ump->b_next;
/*
* ump can't be NULL because of the sanity
* check above.
*/
ASSERT(ump != NULL);
mp = ump->b_cont;
}
/*
* mp can't be unsent because of the sanity check
* above.
*/
ASSERT(SCTP_CHUNK_ISSENT(mp));
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
}
/*
* Now that we have found the chunk with TSN == 'gapstart',
* let's walk till we hit the chunk with TSN == 'gapend'.
* All intermediate chunks will be marked ACKED, if they
* haven't already been.
*/
while (SEQ_LEQ(xtsn, gapend)) {
/*
* SACKed
*/
SCTP_CHUNK_SET_SACKCNT(mp, 0);
if (!SCTP_CHUNK_ISACKED(mp)) {
SCTP_CHUNK_ACKED(mp);
fp = SCTP_CHUNK_DEST(mp);
chunklen = ntohs(sdc->sdh_len);
ASSERT(fp->suna >= chunklen);
fp->suna -= chunklen;
if (fp->suna == 0) {
/* All outstanding data acked. */
fp->pba = 0;
SCTP_FADDR_TIMER_STOP(fp);
}
fp->acked += chunklen;
acked += chunklen;
sctp->sctp_unacked -= chunklen - sizeof (*sdc);
ASSERT(sctp->sctp_unacked >= 0);
}
/*
* Move to the next message in the transmit list
* if we are done with all the chunks from the current
* message. Note, it is possible to hit the end of the
* transmit list here, i.e. if we have already completed
* processing the gap block.
*/
mp = mp->b_next;
if (mp == NULL) {
ump = ump->b_next;
if (ump == NULL) {
ASSERT(xtsn == gapend);
break;
}
mp = ump->b_cont;
}
/*
* Likewise, we can hit an unsent chunk once we have
* completed processing the gap block.
*/
if (!SCTP_CHUNK_ISSENT(mp)) {
ASSERT(xtsn == gapend);
break;
}
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
}
/*
* We keep track of the last gap we successfully processed
* so that we can terminate the walk below for incrementing
* the SACK count.
*/
if (SEQ_LT(gstart, gapstart))
gstart = gapstart;
}
/*
* Check if have incremented the SACK count for all unacked TSNs in
* sctp_got_sack(), if so we are done.
*/
if (SEQ_LEQ(gstart, fr_xtsn))
return (acked);
ump = umphead;
mp = mphead;
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
while (SEQ_LT(xtsn, gstart)) {
/*
* We have incremented SACK count for TSNs less than fr_tsn
* in sctp_got_sack(), so don't increment them again here.
*/
if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
if (SCTP_CHUNK_SACKCNT(mp) ==
sctps->sctps_fast_rxt_thresh) {
SCTP_CHUNK_REXMIT(mp);
sctp->sctp_chk_fast_rexmit = B_TRUE;
*trysend = 1;
if (!*fast_recovery) {
/*
* Entering fast recovery.
*/
fp = SCTP_CHUNK_DEST(mp);
fp->ssthresh = fp->cwnd / 2;
if (fp->ssthresh < 2 * fp->sfa_pmss) {
fp->ssthresh =
2 * fp->sfa_pmss;
}
fp->cwnd = fp->ssthresh;
fp->pba = 0;
sctp->sctp_recovery_tsn =
sctp->sctp_ltsn - 1;
*fast_recovery = B_TRUE;
}
}
}
mp = mp->b_next;
if (mp == NULL) {
ump = ump->b_next;
/* We can't get to the end of the transmit list here */
ASSERT(ump != NULL);
mp = ump->b_cont;
}
/* We can't hit an unsent chunk here */
ASSERT(SCTP_CHUNK_ISSENT(mp));
sdc = (sctp_data_hdr_t *)mp->b_rptr;
xtsn = ntohl(sdc->sdh_tsn);
}
return (acked);
}
static int
sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
{
sctp_sack_chunk_t *sc;
sctp_data_hdr_t *sdc;
sctp_sack_frag_t *ssf;
mblk_t *ump;
mblk_t *mp;
mblk_t *mp1;
uint32_t cumtsn;
uint32_t xtsn;
uint32_t gapstart = 0;
uint32_t gapend = 0;
uint32_t acked = 0;
uint16_t chunklen;
sctp_faddr_t *fp;
int num_gaps;
int trysend = 0;
int i;
boolean_t fast_recovery = B_FALSE;
boolean_t cumack_forward = B_FALSE;
boolean_t fwd_tsn = B_FALSE;
sctp_stack_t *sctps = sctp->sctp_sctps;
BUMP_LOCAL(sctp->sctp_ibchunks);
BUMP_LOCAL(sctp->sctp_isacks);
chunklen = ntohs(sch->sch_len);
if (chunklen < (sizeof (*sch) + sizeof (*sc)))
return (0);
sc = (sctp_sack_chunk_t *)(sch + 1);
cumtsn = ntohl(sc->ssc_cumtsn);
dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
cumtsn));
/* out of order */
if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
return (0);
if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
/* Send an ABORT */
return (-1);
}
/*
* Cwnd only done when not in fast recovery mode.
*/
if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
fast_recovery = B_TRUE;
/*
* .. and if the cum TSN is not moving ahead on account Forward TSN
*/
if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
fwd_tsn = B_TRUE;
if (cumtsn == sctp->sctp_lastack_rxd &&
(sctp->sctp_xmit_unacked == NULL ||
!SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
if (sctp->sctp_xmit_unacked != NULL)
mp = sctp->sctp_xmit_unacked;
else if (sctp->sctp_xmit_head != NULL)
mp = sctp->sctp_xmit_head->b_cont;
else
mp = NULL;
BUMP_MIB(&sctps->sctps_mib, sctpInDupAck);
/*
* If we were doing a zero win probe and the win
* has now opened to at least MSS, re-transmit the
* zero win probe via sctp_rexmit_packet().
*/
if (mp != NULL && sctp->sctp_zero_win_probe &&
ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sfa_pmss) {
mblk_t *pkt;
uint_t pkt_len;
mblk_t *mp1 = mp;
mblk_t *meta = sctp->sctp_xmit_head;
/*
* Reset the RTO since we have been backing-off
* to send the ZWP.
*/
fp = sctp->sctp_current;
fp->rto = fp->srtt + 4 * fp->rttvar;
SCTP_MAX_RTO(sctp, fp);
/* Resend the ZWP */
pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
&pkt_len);
if (pkt == NULL) {
SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
return (0);
}
ASSERT(pkt_len <= fp->sfa_pmss);
sctp->sctp_zero_win_probe = B_FALSE;
sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
sctp_set_iplen(sctp, pkt, fp->ixa);
(void) conn_ip_output(pkt, fp->ixa);
BUMP_LOCAL(sctp->sctp_opkts);
}
} else {
if (sctp->sctp_zero_win_probe) {
/*
* Reset the RTO since we have been backing-off
* to send the ZWP.
*/
fp = sctp->sctp_current;
fp->rto = fp->srtt + 4 * fp->rttvar;
SCTP_MAX_RTO(sctp, fp);
sctp->sctp_zero_win_probe = B_FALSE;
/* This is probably not required */
if (!sctp->sctp_rexmitting) {
sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
}
}
acked = sctp_cumack(sctp, cumtsn, &mp);
sctp->sctp_xmit_unacked = mp;
if (acked > 0) {
trysend = 1;
cumack_forward = B_TRUE;
if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
sctp->sctp_adv_pap)) {
cumack_forward = B_FALSE;
}
}
}
num_gaps = ntohs(sc->ssc_numfrags);
UPDATE_LOCAL(sctp->sctp_gapcnt,