blob: e4744daa3528e98ffde8bf0c3bb46efddeff9de0 [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/signal.h>
#include <sys/stat.h>
#include <sys/proc.h>
#include <sys/cred.h>
#include <sys/user.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/stropts.h>
#include <sys/tihdr.h>
#include <sys/var.h>
#include <sys/poll.h>
#include <sys/termio.h>
#include <sys/ttold.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/cmn_err.h>
#include <sys/sad.h>
#include <sys/netstack.h>
#include <sys/priocntl.h>
#include <sys/jioctl.h>
#include <sys/procset.h>
#include <sys/session.h>
#include <sys/kmem.h>
#include <sys/filio.h>
#include <sys/vtrace.h>
#include <sys/debug.h>
#include <sys/strredir.h>
#include <sys/fs/fifonode.h>
#include <sys/fs/snode.h>
#include <sys/strlog.h>
#include <sys/strsun.h>
#include <sys/project.h>
#include <sys/kbio.h>
#include <sys/msio.h>
#include <sys/tty.h>
#include <sys/ptyvar.h>
#include <sys/vuid_event.h>
#include <sys/modctl.h>
#include <sys/sunddi.h>
#include <sys/sunldi_impl.h>
#include <sys/autoconf.h>
#include <sys/policy.h>
#include <sys/dld.h>
#include <sys/zone.h>
#include <sys/sodirect.h>
/*
* This define helps improve the readability of streams code while
* still maintaining a very old streams performance enhancement. The
* performance enhancement basically involved having all callers
* of straccess() perform the first check that straccess() will do
* locally before actually calling straccess(). (There by reducing
* the number of unnecessary calls to straccess().)
*/
#define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \
(stp->sd_vnode->v_type == VFIFO) ? 0 : \
straccess((x), (y)))
/*
* what is mblk_pull_len?
*
* If a streams message consists of many short messages,
* a performance degradation occurs from copyout overhead.
* To decrease the per mblk overhead, messages that are
* likely to consist of many small mblks are pulled up into
* one continuous chunk of memory.
*
* To avoid the processing overhead of examining every
* mblk, a quick heuristic is used. If the first mblk in
* the message is shorter than mblk_pull_len, it is likely
* that the rest of the mblk will be short.
*
* This heuristic was decided upon after performance tests
* indicated that anything more complex slowed down the main
* code path.
*/
#define MBLK_PULL_LEN 64
uint32_t mblk_pull_len = MBLK_PULL_LEN;
/*
* The sgttyb_handling flag controls the handling of the old BSD
* TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
*
* 0 - Emit no warnings at all and retain old, broken behavior.
* 1 - Emit no warnings and silently handle new semantics.
* 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
* (once per system invocation). Handle with new semantics.
* 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
* made (so that offenders drop core and are easy to debug).
*
* The "new semantics" are that TIOCGETP returns B38400 for
* sg_[io]speed if the corresponding value is over B38400, and that
* TIOCSET[PN] accept B38400 in these cases to mean "retain current
* bit rate."
*/
int sgttyb_handling = 1;
static boolean_t sgttyb_complaint;
/* don't push drcompat module by default on Style-2 streams */
static int push_drcompat = 0;
/*
* id value used to distinguish between different ioctl messages
*/
static uint32_t ioc_id;
static void putback(struct stdata *, queue_t *, mblk_t *, int);
static void strcleanall(struct vnode *);
static int strwsrv(queue_t *);
static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
static void struioainit(queue_t *, sodirect_t *, uio_t *);
/*
* qinit and module_info structures for stream head read and write queues
*/
struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
FIFOLOWAT };
struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
extern kmutex_t strresources; /* protects global resources */
extern kmutex_t muxifier; /* single-threads multiplexor creation */
static boolean_t msghasdata(mblk_t *bp);
#define msgnodata(bp) (!msghasdata(bp))
/*
* Stream head locking notes:
* There are four monitors associated with the stream head:
* 1. v_stream monitor: in stropen() and strclose() v_lock
* is held while the association of vnode and stream
* head is established or tested for.
* 2. open/close/push/pop monitor: sd_lock is held while each
* thread bids for exclusive access to this monitor
* for opening or closing a stream. In addition, this
* monitor is entered during pushes and pops. This
* guarantees that during plumbing operations there
* is only one thread trying to change the plumbing.
* Any other threads present in the stream are only
* using the plumbing.
* 3. read/write monitor: in the case of read, a thread holds
* sd_lock while trying to get data from the stream
* head queue. if there is none to fulfill a read
* request, it sets RSLEEP and calls cv_wait_sig() down
* in strwaitq() to await the arrival of new data.
* when new data arrives in strrput(), sd_lock is acquired
* before testing for RSLEEP and calling cv_broadcast().
* the behavior of strwrite(), strwsrv(), and WSLEEP
* mirror this.
* 4. ioctl monitor: sd_lock is gotten to ensure that only one
* thread is doing an ioctl at a time.
*
* Note, for sodirect case 3. is extended to (*sodirect_t.sod_enqueue)()
* call-back from below, further the sodirect support is for code paths
* called via kstgetmsg(), all other code paths ASSERT() that sodirect
* uioa generated mblk_t's (i.e. DBLK_UIOA) aren't processed.
*/
static int
push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
int anchor, cred_t *crp, uint_t anchor_zoneid)
{
int error;
fmodsw_impl_t *fp;
if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
return (error);
}
if (stp->sd_pushcnt >= nstrpush) {
return (EINVAL);
}
if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
stp->sd_flag |= STREOPENFAIL;
return (EINVAL);
}
/*
* push new module and call its open routine via qattach
*/
if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
return (error);
/*
* Check to see if caller wants a STREAMS anchor
* put at this place in the stream, and add if so.
*/
mutex_enter(&stp->sd_lock);
if (anchor == stp->sd_pushcnt) {
stp->sd_anchor = stp->sd_pushcnt;
stp->sd_anchorzone = anchor_zoneid;
}
mutex_exit(&stp->sd_lock);
return (0);
}
/*
* Open a stream device.
*/
int
stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
{
struct stdata *stp;
queue_t *qp;
int s;
dev_t dummydev, savedev;
struct autopush *ap;
struct dlautopush dlap;
int error = 0;
ssize_t rmin, rmax;
int cloneopen;
queue_t *brq;
major_t major;
str_stack_t *ss;
zoneid_t zoneid;
uint_t anchor;
if (audit_active)
audit_stropen(vp, devp, flag, crp);
/*
* If the stream already exists, wait for any open in progress
* to complete, then call the open function of each module and
* driver in the stream. Otherwise create the stream.
*/
TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
retry:
mutex_enter(&vp->v_lock);
if ((stp = vp->v_stream) != NULL) {
/*
* Waiting for stream to be created to device
* due to another open.
*/
mutex_exit(&vp->v_lock);
if (STRMATED(stp)) {
struct stdata *strmatep = stp->sd_mate;
STRLOCKMATES(stp);
if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
if (flag & (FNDELAY|FNONBLOCK)) {
error = EAGAIN;
mutex_exit(&strmatep->sd_lock);
goto ckreturn;
}
mutex_exit(&stp->sd_lock);
if (!cv_wait_sig(&strmatep->sd_monitor,
&strmatep->sd_lock)) {
error = EINTR;
mutex_exit(&strmatep->sd_lock);
mutex_enter(&stp->sd_lock);
goto ckreturn;
}
mutex_exit(&strmatep->sd_lock);
goto retry;
}
if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
if (flag & (FNDELAY|FNONBLOCK)) {
error = EAGAIN;
mutex_exit(&strmatep->sd_lock);
goto ckreturn;
}
mutex_exit(&strmatep->sd_lock);
if (!cv_wait_sig(&stp->sd_monitor,
&stp->sd_lock)) {
error = EINTR;
goto ckreturn;
}
mutex_exit(&stp->sd_lock);
goto retry;
}
if (stp->sd_flag & (STRDERR|STWRERR)) {
error = EIO;
mutex_exit(&strmatep->sd_lock);
goto ckreturn;
}
stp->sd_flag |= STWOPEN;
STRUNLOCKMATES(stp);
} else {
mutex_enter(&stp->sd_lock);
if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
if (flag & (FNDELAY|FNONBLOCK)) {
error = EAGAIN;
goto ckreturn;
}
if (!cv_wait_sig(&stp->sd_monitor,
&stp->sd_lock)) {
error = EINTR;
goto ckreturn;
}
mutex_exit(&stp->sd_lock);
goto retry; /* could be clone! */
}
if (stp->sd_flag & (STRDERR|STWRERR)) {
error = EIO;
goto ckreturn;
}
stp->sd_flag |= STWOPEN;
mutex_exit(&stp->sd_lock);
}
/*
* Open all modules and devices down stream to notify
* that another user is streaming. For modules, set the
* last argument to MODOPEN and do not pass any open flags.
* Ignore dummydev since this is not the first open.
*/
claimstr(stp->sd_wrq);
qp = stp->sd_wrq;
while (_SAMESTR(qp)) {
qp = qp->q_next;
if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
break;
}
releasestr(stp->sd_wrq);
mutex_enter(&stp->sd_lock);
stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
stp->sd_rerror = 0;
stp->sd_werror = 0;
ckreturn:
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
return (error);
}
/*
* This vnode isn't streaming. SPECFS already
* checked for multiple vnodes pointing to the
* same stream, so create a stream to the driver.
*/
qp = allocq();
stp = shalloc(qp);
/*
* Initialize stream head. shalloc() has given us
* exclusive access, and we have the vnode locked;
* we can do whatever we want with stp.
*/
stp->sd_flag = STWOPEN;
stp->sd_siglist = NULL;
stp->sd_pollist.ph_list = NULL;
stp->sd_sigflags = 0;
stp->sd_mark = NULL;
stp->sd_closetime = STRTIMOUT;
stp->sd_sidp = NULL;
stp->sd_pgidp = NULL;
stp->sd_vnode = vp;
stp->sd_rerror = 0;
stp->sd_werror = 0;
stp->sd_wroff = 0;
stp->sd_tail = 0;
stp->sd_iocblk = NULL;
stp->sd_cmdblk = NULL;
stp->sd_pushcnt = 0;
stp->sd_qn_minpsz = 0;
stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */
stp->sd_maxblk = INFPSZ;
stp->sd_sodirect = NULL;
qp->q_ptr = _WR(qp)->q_ptr = stp;
STREAM(qp) = STREAM(_WR(qp)) = stp;
vp->v_stream = stp;
mutex_exit(&vp->v_lock);
if (vp->v_type == VFIFO) {
stp->sd_flag |= OLDNDELAY;
/*
* This means, both for pipes and fifos
* strwrite will send SIGPIPE if the other
* end is closed. For putmsg it depends
* on whether it is a XPG4_2 application
* or not
*/
stp->sd_wput_opt = SW_SIGPIPE;
/* setq might sleep in kmem_alloc - avoid holding locks. */
setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
SQ_CI|SQ_CO, B_FALSE);
set_qend(qp);
stp->sd_strtab = fifo_getinfo();
_WR(qp)->q_nfsrv = _WR(qp);
qp->q_nfsrv = qp;
/*
* Wake up others that are waiting for stream to be created.
*/
mutex_enter(&stp->sd_lock);
/*
* nothing is be pushed on stream yet, so
* optimized stream head packetsizes are just that
* of the read queue
*/
stp->sd_qn_minpsz = qp->q_minpsz;
stp->sd_qn_maxpsz = qp->q_maxpsz;
stp->sd_flag &= ~STWOPEN;
goto fifo_opendone;
}
/* setq might sleep in kmem_alloc - avoid holding locks. */
setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
set_qend(qp);
/*
* Open driver and create stream to it (via qattach).
*/
savedev = *devp;
cloneopen = (getmajor(*devp) == clone_major);
if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
mutex_enter(&vp->v_lock);
vp->v_stream = NULL;
mutex_exit(&vp->v_lock);
mutex_enter(&stp->sd_lock);
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
freeq(_RD(qp));
shfree(stp);
return (error);
}
/*
* Set sd_strtab after open in order to handle clonable drivers
*/
stp->sd_strtab = STREAMSTAB(getmajor(*devp));
/*
* Historical note: dummydev used to be be prior to the initial
* open (via qattach above), which made the value seen
* inconsistent between an I_PUSH and an autopush of a module.
*/
dummydev = *devp;
/*
* For clone open of old style (Q not associated) network driver,
* push DRMODNAME module to handle DL_ATTACH/DL_DETACH
*/
brq = _RD(_WR(qp)->q_next);
major = getmajor(*devp);
if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
((brq->q_flag & _QASSOCIATED) == 0)) {
if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
cmn_err(CE_WARN, "cannot push " DRMODNAME
" streams module");
}
if (!NETWORK_DRV(major)) {
savedev = *devp;
} else {
/*
* For network devices, process differently based on the
* return value from dld_autopush():
*
* 0: the passed-in device points to a GLDv3 datalink with
* per-link autopush configuration; use that configuration
* and ignore any per-driver autopush configuration.
*
* 1: the passed-in device points to a physical GLDv3
* datalink without per-link autopush configuration. The
* passed in device was changed to refer to the actual
* physical device (if it's not already); we use that new
* device to look up any per-driver autopush configuration.
*
* -1: neither of the above cases applied; use the initial
* device to look up any per-driver autopush configuration.
*/
switch (dld_autopush(&savedev, &dlap)) {
case 0:
zoneid = crgetzoneid(crp);
for (s = 0; s < dlap.dap_npush; s++) {
error = push_mod(qp, &dummydev, stp,
dlap.dap_aplist[s], dlap.dap_anchor, crp,
zoneid);
if (error != 0)
break;
}
goto opendone;
case 1:
break;
case -1:
savedev = *devp;
break;
}
}
/*
* Find the autopush configuration based on "savedev". Start with the
* global zone. If not found check in the local zone.
*/
zoneid = GLOBAL_ZONEID;
retryap:
ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
netstack_str;
if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
netstack_rele(ss->ss_netstack);
if (zoneid == GLOBAL_ZONEID) {
/*
* None found. Also look in the zone's autopush table.
*/
zoneid = crgetzoneid(crp);
if (zoneid != GLOBAL_ZONEID)
goto retryap;
}
goto opendone;
}
anchor = ap->ap_anchor;
zoneid = crgetzoneid(crp);
for (s = 0; s < ap->ap_npush; s++) {
error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
anchor, crp, zoneid);
if (error != 0)
break;
}
sad_ap_rele(ap, ss);
netstack_rele(ss->ss_netstack);
opendone:
/*
* let specfs know that open failed part way through
*/
if (error) {
mutex_enter(&stp->sd_lock);
stp->sd_flag |= STREOPENFAIL;
mutex_exit(&stp->sd_lock);
}
/*
* Wake up others that are waiting for stream to be created.
*/
mutex_enter(&stp->sd_lock);
stp->sd_flag &= ~STWOPEN;
/*
* As a performance concern we are caching the values of
* q_minpsz and q_maxpsz of the module below the stream
* head in the stream head.
*/
mutex_enter(QLOCK(stp->sd_wrq->q_next));
rmin = stp->sd_wrq->q_next->q_minpsz;
rmax = stp->sd_wrq->q_next->q_maxpsz;
mutex_exit(QLOCK(stp->sd_wrq->q_next));
/* do this processing here as a performance concern */
if (strmsgsz != 0) {
if (rmax == INFPSZ)
rmax = strmsgsz;
else
rmax = MIN(strmsgsz, rmax);
}
mutex_enter(QLOCK(stp->sd_wrq));
stp->sd_qn_minpsz = rmin;
stp->sd_qn_maxpsz = rmax;
mutex_exit(QLOCK(stp->sd_wrq));
fifo_opendone:
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
return (error);
}
static int strsink(queue_t *, mblk_t *);
static struct qinit deadrend = {
strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
};
static struct qinit deadwend = {
NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
};
/*
* Close a stream.
* This is called from closef() on the last close of an open stream.
* Strclean() will already have removed the siglist and pollist
* information, so all that remains is to remove all multiplexor links
* for the stream, pop all the modules (and the driver), and free the
* stream structure.
*/
int
strclose(struct vnode *vp, int flag, cred_t *crp)
{
struct stdata *stp;
queue_t *qp;
int rval;
int freestp = 1;
queue_t *rmq;
if (audit_active)
audit_strclose(vp, flag, crp);
TRACE_1(TR_FAC_STREAMS_FR,
TR_STRCLOSE, "strclose:%p", vp);
ASSERT(vp->v_stream);
stp = vp->v_stream;
ASSERT(!(stp->sd_flag & STPLEX));
qp = stp->sd_wrq;
/*
* Needed so that strpoll will return non-zero for this fd.
* Note that with POLLNOERR STRHUP does still cause POLLHUP.
*/
mutex_enter(&stp->sd_lock);
stp->sd_flag |= STRHUP;
mutex_exit(&stp->sd_lock);
/*
* If the registered process or process group did not have an
* open instance of this stream then strclean would not be
* called. Thus at the time of closing all remaining siglist entries
* are removed.
*/
if (stp->sd_siglist != NULL)
strcleanall(vp);
ASSERT(stp->sd_siglist == NULL);
ASSERT(stp->sd_sigflags == 0);
if (STRMATED(stp)) {
struct stdata *strmatep = stp->sd_mate;
int waited = 1;
STRLOCKMATES(stp);
while (waited) {
waited = 0;
while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
mutex_exit(&strmatep->sd_lock);
cv_wait(&stp->sd_monitor, &stp->sd_lock);
mutex_exit(&stp->sd_lock);
STRLOCKMATES(stp);
waited = 1;
}
while (strmatep->sd_flag &
(STWOPEN|STRCLOSE|STRPLUMB)) {
mutex_exit(&stp->sd_lock);
cv_wait(&strmatep->sd_monitor,
&strmatep->sd_lock);
mutex_exit(&strmatep->sd_lock);
STRLOCKMATES(stp);
waited = 1;
}
}
stp->sd_flag |= STRCLOSE;
STRUNLOCKMATES(stp);
} else {
mutex_enter(&stp->sd_lock);
stp->sd_flag |= STRCLOSE;
mutex_exit(&stp->sd_lock);
}
ASSERT(qp->q_first == NULL); /* No more delayed write */
/* Check if an I_LINK was ever done on this stream */
if (stp->sd_flag & STRHASLINKS) {
netstack_t *ns;
str_stack_t *ss;
ns = netstack_find_by_cred(crp);
ASSERT(ns != NULL);
ss = ns->netstack_str;
ASSERT(ss != NULL);
(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
netstack_rele(ss->ss_netstack);
}
while (_SAMESTR(qp)) {
/*
* Holding sd_lock prevents q_next from changing in
* this stream.
*/
mutex_enter(&stp->sd_lock);
if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
/*
* sleep until awakened by strwsrv() or timeout
*/
for (;;) {
mutex_enter(QLOCK(qp->q_next));
if (!(qp->q_next->q_mblkcnt)) {
mutex_exit(QLOCK(qp->q_next));
break;
}
stp->sd_flag |= WSLEEP;
/* ensure strwsrv gets enabled */
qp->q_next->q_flag |= QWANTW;
mutex_exit(QLOCK(qp->q_next));
/* get out if we timed out or recv'd a signal */
if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
stp->sd_closetime, 0) <= 0) {
break;
}
}
stp->sd_flag &= ~WSLEEP;
}
mutex_exit(&stp->sd_lock);
rmq = qp->q_next;
if (rmq->q_flag & QISDRV) {
ASSERT(!_SAMESTR(rmq));
wait_sq_svc(_RD(qp)->q_syncq);
}
qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
}
/*
* Since we call pollwakeup in close() now, the poll list should
* be empty in most cases. The only exception is the layered devices
* (e.g. the console drivers with redirection modules pushed on top
* of it). We have to do this after calling qdetach() because
* the redirection module won't have torn down the console
* redirection until after qdetach() has been invoked.
*/
if (stp->sd_pollist.ph_list != NULL) {
pollwakeup(&stp->sd_pollist, POLLERR);
pollhead_clean(&stp->sd_pollist);
}
ASSERT(stp->sd_pollist.ph_list == NULL);
ASSERT(stp->sd_sidp == NULL);
ASSERT(stp->sd_pgidp == NULL);
/* Prevent qenable from re-enabling the stream head queue */
disable_svc(_RD(qp));
/*
* Wait until service procedure of each queue is
* run, if QINSERVICE is set.
*/
wait_svc(_RD(qp));
/*
* Now, flush both queues.
*/
flushq(_RD(qp), FLUSHALL);
flushq(qp, FLUSHALL);
/*
* If the write queue of the stream head is pointing to a
* read queue, we have a twisted stream. If the read queue
* is alive, convert the stream head queues into a dead end.
* If the read queue is dead, free the dead pair.
*/
if (qp->q_next && !_SAMESTR(qp)) {
if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */
flushq(qp->q_next, FLUSHALL); /* ensure no message */
shfree(qp->q_next->q_stream);
freeq(qp->q_next);
freeq(_RD(qp));
} else if (qp->q_next == _RD(qp)) { /* fifo */
freeq(_RD(qp));
} else { /* pipe */
freestp = 0;
/*
* The q_info pointers are never accessed when
* SQLOCK is held.
*/
ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
mutex_enter(SQLOCK(qp->q_syncq));
qp->q_qinfo = &deadwend;
_RD(qp)->q_qinfo = &deadrend;
mutex_exit(SQLOCK(qp->q_syncq));
}
} else {
freeq(_RD(qp)); /* free stream head queue pair */
}
mutex_enter(&vp->v_lock);
if (stp->sd_iocblk) {
if (stp->sd_iocblk != (mblk_t *)-1) {
freemsg(stp->sd_iocblk);
}
stp->sd_iocblk = NULL;
}
stp->sd_vnode = NULL;
vp->v_stream = NULL;
mutex_exit(&vp->v_lock);
mutex_enter(&stp->sd_lock);
freemsg(stp->sd_cmdblk);
stp->sd_cmdblk = NULL;
stp->sd_flag &= ~STRCLOSE;
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
if (freestp)
shfree(stp);
return (0);
}
static int
strsink(queue_t *q, mblk_t *bp)
{
struct copyresp *resp;
switch (bp->b_datap->db_type) {
case M_FLUSH:
if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
*bp->b_rptr &= ~FLUSHR;
bp->b_flag |= MSGNOLOOP;
/*
* Protect against the driver passing up
* messages after it has done a qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
} else {
freemsg(bp);
}
break;
case M_COPYIN:
case M_COPYOUT:
if (bp->b_cont) {
freemsg(bp->b_cont);
bp->b_cont = NULL;
}
bp->b_datap->db_type = M_IOCDATA;
bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
resp = (struct copyresp *)bp->b_rptr;
resp->cp_rval = (caddr_t)1; /* failure */
/*
* Protect against the driver passing up
* messages after it has done a qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
break;
case M_IOCTL:
if (bp->b_cont) {
freemsg(bp->b_cont);
bp->b_cont = NULL;
}
bp->b_datap->db_type = M_IOCNAK;
/*
* Protect against the driver passing up
* messages after it has done a qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
break;
default:
freemsg(bp);
break;
}
return (0);
}
/*
* Clean up after a process when it closes a stream. This is called
* from closef for all closes, whereas strclose is called only for the
* last close on a stream. The siglist is scanned for entries for the
* current process, and these are removed.
*/
void
strclean(struct vnode *vp)
{
strsig_t *ssp, *pssp, *tssp;
stdata_t *stp;
int update = 0;
TRACE_1(TR_FAC_STREAMS_FR,
TR_STRCLEAN, "strclean:%p", vp);
stp = vp->v_stream;
pssp = NULL;
mutex_enter(&stp->sd_lock);
ssp = stp->sd_siglist;
while (ssp) {
if (ssp->ss_pidp == curproc->p_pidp) {
tssp = ssp->ss_next;
if (pssp)
pssp->ss_next = tssp;
else
stp->sd_siglist = tssp;
mutex_enter(&pidlock);
PID_RELE(ssp->ss_pidp);
mutex_exit(&pidlock);
kmem_free(ssp, sizeof (strsig_t));
update = 1;
ssp = tssp;
} else {
pssp = ssp;
ssp = ssp->ss_next;
}
}
if (update) {
stp->sd_sigflags = 0;
for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
stp->sd_sigflags |= ssp->ss_events;
}
mutex_exit(&stp->sd_lock);
}
/*
* Used on the last close to remove any remaining items on the siglist.
* These could be present on the siglist due to I_ESETSIG calls that
* use process groups or processed that do not have an open file descriptor
* for this stream (Such entries would not be removed by strclean).
*/
static void
strcleanall(struct vnode *vp)
{
strsig_t *ssp, *nssp;
stdata_t *stp;
stp = vp->v_stream;
mutex_enter(&stp->sd_lock);
ssp = stp->sd_siglist;
stp->sd_siglist = NULL;
while (ssp) {
nssp = ssp->ss_next;
mutex_enter(&pidlock);
PID_RELE(ssp->ss_pidp);
mutex_exit(&pidlock);
kmem_free(ssp, sizeof (strsig_t));
ssp = nssp;
}
stp->sd_sigflags = 0;
mutex_exit(&stp->sd_lock);
}
/*
* Retrieve the next message from the logical stream head read queue
* using either rwnext (if sync stream) or getq_noenab.
* It is the callers responsibility to call qbackenable after
* it is finished with the message. The caller should not call
* qbackenable until after any putback calls to avoid spurious backenabling.
*
* Also, handle uioa initialization and process any DBLK_UIOA flaged messages.
*/
mblk_t *
strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
int *errorp)
{
sodirect_t *sodp = stp->sd_sodirect;
mblk_t *bp;
int error;
ssize_t rbytes = 0;
/* Holding sd_lock prevents the read queue from changing */
ASSERT(MUTEX_HELD(&stp->sd_lock));
if (uiop != NULL && stp->sd_struiordq != NULL &&
q->q_first == NULL &&
(!first || (stp->sd_wakeq & RSLEEP))) {
/*
* Stream supports rwnext() for the read side.
* If this is the first time we're called by e.g. strread
* only do the downcall if there is a deferred wakeup
* (registered in sd_wakeq).
*/
struiod_t uiod;
if (first)
stp->sd_wakeq &= ~RSLEEP;
(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
uiod.d_mp = 0;
/*
* Mark that a thread is in rwnext on the read side
* to prevent strrput from nacking ioctls immediately.
* When the last concurrent rwnext returns
* the ioctls are nack'ed.
*/
ASSERT(MUTEX_HELD(&stp->sd_lock));
stp->sd_struiodnak++;
/*
* Note: rwnext will drop sd_lock.
*/
error = rwnext(q, &uiod);
ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
mutex_enter(&stp->sd_lock);
stp->sd_struiodnak--;
while (stp->sd_struiodnak == 0 &&
((bp = stp->sd_struionak) != NULL)) {
stp->sd_struionak = bp->b_next;
bp->b_next = NULL;
bp->b_datap->db_type = M_IOCNAK;
/*
* Protect against the driver passing up
* messages after it has done a qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else {
mutex_exit(&stp->sd_lock);
qreply(q, bp);
mutex_enter(&stp->sd_lock);
}
}
ASSERT(MUTEX_HELD(&stp->sd_lock));
if (error == 0 || error == EWOULDBLOCK) {
if ((bp = uiod.d_mp) != NULL) {
*errorp = 0;
ASSERT(MUTEX_HELD(&stp->sd_lock));
return (bp);
}
error = 0;
} else if (error == EINVAL) {
/*
* The stream plumbing must have
* changed while we were away, so
* just turn off rwnext()s.
*/
error = 0;
} else if (error == EBUSY) {
/*
* The module might have data in transit using putnext
* Fall back on waiting + getq.
*/
error = 0;
} else {
*errorp = error;
ASSERT(MUTEX_HELD(&stp->sd_lock));
return (NULL);
}
/*
* Try a getq in case a rwnext() generated mblk
* has bubbled up via strrput().
*/
}
*errorp = 0;
ASSERT(MUTEX_HELD(&stp->sd_lock));
if (sodp != NULL && sodp->sod_state & SOD_ENABLED) {
if (sodp->sod_uioa.uioa_state & UIOA_INIT) {
/*
* First kstrgetmsg() call for an uioa_t so if any
* queued mblk_t's need to consume them before uioa
* from below can occur.
*/
sodp->sod_uioa.uioa_state &= UIOA_CLR;
sodp->sod_uioa.uioa_state |= UIOA_ENABLED;
if (q->q_first != NULL) {
struioainit(q, sodp, uiop);
}
} else if (sodp->sod_uioa.uioa_state &
(UIOA_ENABLED|UIOA_FINI)) {
ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
rbytes = 0;
} else {
rbytes = uiop->uio_resid;
}
} else {
/*
* If we have a valid uio, try and use this as a guide for how
* many bytes to retrieve from the queue via getq_noenab().
* Doing this can avoid unneccesary counting of overlong
* messages in putback(). We currently only do this for sockets
* and only if there is no sd_rputdatafunc hook.
*
* The sd_rputdatafunc hook transforms the entire message
* before any bytes in it can be given to a client. So, rbytes
* must be 0 if there is a hook.
*/
if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
(stp->sd_rputdatafunc == NULL))
rbytes = uiop->uio_resid;
}
bp = getq_noenab(q, rbytes);
sod_uioa_mblk_done(sodp, bp);
return (bp);
}
/*
* Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
* If the message does not fit in the uio the remainder of it is returned;
* otherwise NULL is returned. Any embedded zero-length mblk_t's are
* consumed, even if uio_resid reaches zero. On error, `*errorp' is set to
* the error code, the message is consumed, and NULL is returned.
*/
static mblk_t *
struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
{
int error;
ptrdiff_t n;
mblk_t *nbp;
ASSERT(bp->b_wptr >= bp->b_rptr);
do {
ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
ASSERT(n > 0);
error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
if (error != 0) {
freemsg(bp);
*errorp = error;
return (NULL);
}
}
bp->b_rptr += n;
while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
nbp = bp;
bp = bp->b_cont;
freeb(nbp);
}
} while (bp != NULL && uiop->uio_resid > 0);
*errorp = 0;
return (bp);
}
/*
* Read a stream according to the mode flags in sd_flag:
*
* (default mode) - Byte stream, msg boundaries are ignored
* RD_MSGDIS (msg discard) - Read on msg boundaries and throw away
* any data remaining in msg
* RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
* any remaining data on head of read queue
*
* Consume readable messages on the front of the queue until
* ttolwp(curthread)->lwp_count
* is satisfied, the readable messages are exhausted, or a message
* boundary is reached in a message mode. If no data was read and
* the stream was not opened with the NDELAY flag, block until data arrives.
* Otherwise return the data read and update the count.
*
* In default mode a 0 length message signifies end-of-file and terminates
* a read in progress. The 0 length message is removed from the queue
* only if it is the only message read (no data is read).
*
* An attempt to read an M_PROTO or M_PCPROTO message results in an
* EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
* If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
* If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
* are unlinked from and M_DATA blocks in the message, the protos are
* thrown away, and the data is read.
*/
/* ARGSUSED */
int
strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
{
struct stdata *stp;
mblk_t *bp, *nbp;
queue_t *q;
int error = 0;
uint_t old_sd_flag;
int first;
char rflg;
uint_t mark; /* Contains MSG*MARK and _LASTMARK */
#define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
short delim;
unsigned char pri = 0;
char waitflag;
unsigned char type;
TRACE_1(TR_FAC_STREAMS_FR,
TR_STRREAD_ENTER, "strread:%p", vp);
ASSERT(vp->v_stream);
stp = vp->v_stream;
mutex_enter(&stp->sd_lock);
if ((error = i_straccess(stp, JCREAD)) != 0) {
mutex_exit(&stp->sd_lock);
return (error);
}
if (stp->sd_flag & (STRDERR|STPLEX)) {
error = strgeterr(stp, STRDERR|STPLEX, 0);
if (error != 0) {
mutex_exit(&stp->sd_lock);
return (error);
}
}
/*
* Loop terminates when uiop->uio_resid == 0.
*/
rflg = 0;
waitflag = READWAIT;
q = _RD(stp->sd_wrq);
for (;;) {
ASSERT(MUTEX_HELD(&stp->sd_lock));
old_sd_flag = stp->sd_flag;
mark = 0;
delim = 0;
first = 1;
while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
int done = 0;
ASSERT(MUTEX_HELD(&stp->sd_lock));
if (error != 0)
goto oops;
if (stp->sd_flag & (STRHUP|STREOF)) {
goto oops;
}
if (rflg && !(stp->sd_flag & STRDELIM)) {
goto oops;
}
/*
* If a read(fd,buf,0) has been done, there is no
* need to sleep. We always have zero bytes to
* return.
*/
if (uiop->uio_resid == 0) {
goto oops;
}
qbackenable(q, 0);
TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
"strread calls strwaitq:%p, %p, %p",
vp, uiop, crp);
if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
uiop->uio_fmode, -1, &done)) != 0 || done) {
TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
"strread error or done:%p, %p, %p",
vp, uiop, crp);
if ((uiop->uio_fmode & FNDELAY) &&
(stp->sd_flag & OLDNDELAY) &&
(error == EAGAIN))
error = 0;
goto oops;
}
TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
"strread awakes:%p, %p, %p", vp, uiop, crp);
if ((error = i_straccess(stp, JCREAD)) != 0) {
goto oops;
}
first = 0;
}
ASSERT(MUTEX_HELD(&stp->sd_lock));
ASSERT(bp);
ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
pri = bp->b_band;
/*
* Extract any mark information. If the message is not
* completely consumed this information will be put in the mblk
* that is putback.
* If MSGMARKNEXT is set and the message is completely consumed
* the STRATMARK flag will be set below. Likewise, if
* MSGNOTMARKNEXT is set and the message is
* completely consumed STRNOTATMARK will be set.
*
* For some unknown reason strread only breaks the read at the
* last mark.
*/
mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
(MSGMARKNEXT|MSGNOTMARKNEXT));
if (mark != 0 && bp == stp->sd_mark) {
if (rflg) {
putback(stp, q, bp, pri);
goto oops;
}
mark |= _LASTMARK;
stp->sd_mark = NULL;
}
if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
delim = 1;
mutex_exit(&stp->sd_lock);
if (STREAM_NEEDSERVICE(stp))
stream_runservice(stp);
type = bp->b_datap->db_type;
switch (type) {
case M_DATA:
ismdata:
if (msgnodata(bp)) {
if (mark || delim) {
freemsg(bp);
} else if (rflg) {
/*
* If already read data put zero
* length message back on queue else
* free msg and return 0.
*/
bp->b_band = pri;
mutex_enter(&stp->sd_lock);
putback(stp, q, bp, pri);
mutex_exit(&stp->sd_lock);
} else {
freemsg(bp);
}
error = 0;
goto oops1;
}
rflg = 1;
waitflag |= NOINTR;
bp = struiocopyout(bp, uiop, &error);
if (error != 0)
goto oops1;
mutex_enter(&stp->sd_lock);
if (bp) {
/*
* Have remaining data in message.
* Free msg if in discard mode.
*/
if (stp->sd_read_opt & RD_MSGDIS) {
freemsg(bp);
} else {
bp->b_band = pri;
if ((mark & _LASTMARK) &&
(stp->sd_mark == NULL))
stp->sd_mark = bp;
bp->b_flag |= mark & ~_LASTMARK;
if (delim)
bp->b_flag |= MSGDELIM;
if (msgnodata(bp))
freemsg(bp);
else
putback(stp, q, bp, pri);
}
} else {
/*
* Consumed the complete message.
* Move the MSG*MARKNEXT information
* to the stream head just in case
* the read queue becomes empty.
*
* If the stream head was at the mark
* (STRATMARK) before we dropped sd_lock above
* and some data was consumed then we have
* moved past the mark thus STRATMARK is
* cleared. However, if a message arrived in
* strrput during the copyout above causing
* STRATMARK to be set we can not clear that
* flag.
*/
if (mark &
(MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
if (mark & MSGMARKNEXT) {
stp->sd_flag &= ~STRNOTATMARK;
stp->sd_flag |= STRATMARK;
} else if (mark & MSGNOTMARKNEXT) {
stp->sd_flag &= ~STRATMARK;
stp->sd_flag |= STRNOTATMARK;
} else {
stp->sd_flag &=
~(STRATMARK|STRNOTATMARK);
}
} else if (rflg && (old_sd_flag & STRATMARK)) {
stp->sd_flag &= ~STRATMARK;
}
}
/*
* Check for signal messages at the front of the read
* queue and generate the signal(s) if appropriate.
* The only signal that can be on queue is M_SIG at
* this point.
*/
while ((((bp = q->q_first)) != NULL) &&
(bp->b_datap->db_type == M_SIG)) {
bp = getq_noenab(q, 0);
/*
* sd_lock is held so the content of the
* read queue can not change.
*/
ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
mutex_exit(&stp->sd_lock);
freemsg(bp);
if (STREAM_NEEDSERVICE(stp))
stream_runservice(stp);
mutex_enter(&stp->sd_lock);
}
if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
delim ||
(stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
goto oops;
}
continue;
case M_SIG:
strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
freemsg(bp);
mutex_enter(&stp->sd_lock);
continue;
case M_PROTO:
case M_PCPROTO:
/*
* Only data messages are readable.
* Any others generate an error, unless
* RD_PROTDIS or RD_PROTDAT is set.
*/
if (stp->sd_read_opt & RD_PROTDAT) {
for (nbp = bp; nbp; nbp = nbp->b_next) {
if ((nbp->b_datap->db_type ==
M_PROTO) ||
(nbp->b_datap->db_type ==
M_PCPROTO)) {
nbp->b_datap->db_type = M_DATA;
} else {
break;
}
}
/*
* clear stream head hi pri flag based on
* first message
*/
if (type == M_PCPROTO) {
mutex_enter(&stp->sd_lock);
stp->sd_flag &= ~STRPRI;
mutex_exit(&stp->sd_lock);
}
goto ismdata;
} else if (stp->sd_read_opt & RD_PROTDIS) {
/*
* discard non-data messages
*/
while (bp &&
((bp->b_datap->db_type == M_PROTO) ||
(bp->b_datap->db_type == M_PCPROTO))) {
nbp = unlinkb(bp);
freeb(bp);
bp = nbp;
}
/*
* clear stream head hi pri flag based on
* first message
*/
if (type == M_PCPROTO) {
mutex_enter(&stp->sd_lock);
stp->sd_flag &= ~STRPRI;
mutex_exit(&stp->sd_lock);
}
if (bp) {
bp->b_band = pri;
goto ismdata;
} else {
break;
}
}
/* FALLTHRU */
case M_PASSFP:
if ((bp->b_datap->db_type == M_PASSFP) &&
(stp->sd_read_opt & RD_PROTDIS)) {
freemsg(bp);
break;
}
mutex_enter(&stp->sd_lock);
putback(stp, q, bp, pri);
mutex_exit(&stp->sd_lock);
if (rflg == 0)
error = EBADMSG;
goto oops1;
default:
/*
* Garbage on stream head read queue.
*/
cmn_err(CE_WARN, "bad %x found at stream head\n",
bp->b_datap->db_type);
freemsg(bp);
goto oops1;
}
mutex_enter(&stp->sd_lock);
}
oops:
mutex_exit(&stp->sd_lock);
oops1:
qbackenable(q, pri);
return (error);
#undef _LASTMARK
}
/*
* Default processing of M_PROTO/M_PCPROTO messages.
* Determine which wakeups and signals are needed.
* This can be replaced by a user-specified procedure for kernel users
* of STREAMS.
*/
/* ARGSUSED */
mblk_t *
strrput_proto(vnode_t *vp, mblk_t *mp,
strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
*wakeups = RSLEEP;
*allmsgsigs = 0;
switch (mp->b_datap->db_type) {
case M_PROTO:
if (mp->b_band == 0) {
*firstmsgsigs = S_INPUT | S_RDNORM;
*pollwakeups = POLLIN | POLLRDNORM;
} else {
*firstmsgsigs = S_INPUT | S_RDBAND;
*pollwakeups = POLLIN | POLLRDBAND;
}
break;
case M_PCPROTO:
*firstmsgsigs = S_HIPRI;
*pollwakeups = POLLPRI;
break;
}
return (mp);
}
/*
* Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
* M_PASSFP messages.
* Determine which wakeups and signals are needed.
* This can be replaced by a user-specified procedure for kernel users
* of STREAMS.
*/
/* ARGSUSED */
mblk_t *
strrput_misc(vnode_t *vp, mblk_t *mp,
strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
*wakeups = 0;
*firstmsgsigs = 0;
*allmsgsigs = 0;
*pollwakeups = 0;
return (mp);
}
/*
* Stream read put procedure. Called from downstream driver/module
* with messages for the stream head. Data, protocol, and in-stream
* signal messages are placed on the queue, others are handled directly.
*/
int
strrput(queue_t *q, mblk_t *bp)
{
struct stdata *stp;
ulong_t rput_opt;
strwakeup_t wakeups;
strsigset_t firstmsgsigs; /* Signals if first message on queue */
strsigset_t allmsgsigs; /* Signals for all messages */
strsigset_t signals; /* Signals events to generate */
strpollset_t pollwakeups;
mblk_t *nextbp;
uchar_t band = 0;
int hipri_sig;
stp = (struct stdata *)q->q_ptr;
/*
* Use rput_opt for optimized access to the SR_ flags except
* SR_POLLIN. That flag has to be checked under sd_lock since it
* is modified by strpoll().
*/
rput_opt = stp->sd_rput_opt;
ASSERT(qclaimed(q));
TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
"strrput called with message type:q %p bp %p", q, bp);
/*
* Perform initial processing and pass to the parameterized functions.
*/
ASSERT(bp->b_next == NULL);
switch (bp->b_datap->db_type) {
case M_DATA:
/*
* sockfs is the only consumer of STREOF and when it is set,
* it implies that the receiver is not interested in receiving
* any more data, hence the mblk is freed to prevent unnecessary
* message queueing at the stream head.
*/
if (stp->sd_flag == STREOF) {
freemsg(bp);
return (0);
}
if ((rput_opt & SR_IGN_ZEROLEN) &&
bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
/*
* Ignore zero-length M_DATA messages. These might be
* generated by some transports.
* The zero-length M_DATA messages, even if they
* are ignored, should effect the atmark tracking and
* should wake up a thread sleeping in strwaitmark.
*/
mutex_enter(&stp->sd_lock);
if (bp->b_flag & MSGMARKNEXT) {
/*
* Record the position of the mark either
* in q_last or in STRATMARK.
*/
if (q->q_last != NULL) {
q->q_last->b_flag &= ~MSGNOTMARKNEXT;
q->q_last->b_flag |= MSGMARKNEXT;
} else {
stp->sd_flag &= ~STRNOTATMARK;
stp->sd_flag |= STRATMARK;
}
} else if (bp->b_flag & MSGNOTMARKNEXT) {
/*
* Record that this is not the position of
* the mark either in q_last or in
* STRNOTATMARK.
*/
if (q->q_last != NULL) {
q->q_last->b_flag &= ~MSGMARKNEXT;
q->q_last->b_flag |= MSGNOTMARKNEXT;
} else {
stp->sd_flag &= ~STRATMARK;
stp->sd_flag |= STRNOTATMARK;
}
}
if (stp->sd_flag & RSLEEP) {
stp->sd_flag &= ~RSLEEP;
cv_broadcast(&q->q_wait);
}
mutex_exit(&stp->sd_lock);
freemsg(bp);
return (0);
}
wakeups = RSLEEP;
if (bp->b_band == 0) {
firstmsgsigs = S_INPUT | S_RDNORM;
pollwakeups = POLLIN | POLLRDNORM;
} else {
firstmsgsigs = S_INPUT | S_RDBAND;
pollwakeups = POLLIN | POLLRDBAND;
}
if (rput_opt & SR_SIGALLDATA)
allmsgsigs = firstmsgsigs;
else
allmsgsigs = 0;
mutex_enter(&stp->sd_lock);
if ((rput_opt & SR_CONSOL_DATA) &&
(q->q_last != NULL) &&
(bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
/*
* Consolidate an M_DATA message onto an M_DATA,
* M_PROTO, or M_PCPROTO by merging it with q_last.
* The consolidation does not take place if
* the old message is marked with either of the
* marks or the delim flag or if the new
* message is marked with MSGMARK. The MSGMARK
* check is needed to handle the odd semantics of
* MSGMARK where essentially the whole message
* is to be treated as marked.
* Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the
* new message to the front of the b_cont chain.
*/
mblk_t *lbp = q->q_last;
unsigned char db_type = lbp->b_datap->db_type;
if ((db_type == M_DATA || db_type == M_PROTO ||
db_type == M_PCPROTO) &&
!(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
rmvq_noenab(q, lbp);
/*
* The first message in the b_cont list
* tracks MSGMARKNEXT and MSGNOTMARKNEXT.
* We need to handle the case where we
* are appending:
*
* 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
* 2) a MSGMARKNEXT to a plain message.
* 3) a MSGNOTMARKNEXT to a plain message
* 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
* message.
*
* Thus we never append a MSGMARKNEXT or
* MSGNOTMARKNEXT to a MSGMARKNEXT message.
*/
if (bp->b_flag & MSGMARKNEXT) {
lbp->b_flag |= MSGMARKNEXT;
lbp->b_flag &= ~MSGNOTMARKNEXT;
bp->b_flag &= ~MSGMARKNEXT;
} else if (bp->b_flag & MSGNOTMARKNEXT) {
lbp->b_flag |= MSGNOTMARKNEXT;
bp->b_flag &= ~MSGNOTMARKNEXT;
}
linkb(lbp, bp);
bp = lbp;
/*
* The new message logically isn't the first
* even though the q_first check below thinks
* it is. Clear the firstmsgsigs to make it
* not appear to be first.
*/
firstmsgsigs = 0;
}
}
break;
case M_PASSFP:
wakeups = RSLEEP;
allmsgsigs = 0;
if (bp->b_band == 0) {
firstmsgsigs = S_INPUT | S_RDNORM;
pollwakeups = POLLIN | POLLRDNORM;
} else {
firstmsgsigs = S_INPUT | S_RDBAND;
pollwakeups = POLLIN | POLLRDBAND;
}
mutex_enter(&stp->sd_lock);
break;
case M_PROTO:
case M_PCPROTO:
ASSERT(stp->sd_rprotofunc != NULL);
bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
#define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
#define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
POLLWRBAND)
ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
ASSERT((firstmsgsigs & ~ALLSIG) == 0);
ASSERT((allmsgsigs & ~ALLSIG) == 0);
ASSERT((pollwakeups & ~ALLPOLL) == 0);
mutex_enter(&stp->sd_lock);
break;
default:
ASSERT(stp->sd_rmiscfunc != NULL);
bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
ASSERT((firstmsgsigs & ~ALLSIG) == 0);
ASSERT((allmsgsigs & ~ALLSIG) == 0);
ASSERT((pollwakeups & ~ALLPOLL) == 0);
#undef ALLSIG
#undef ALLPOLL
mutex_enter(&stp->sd_lock);
break;
}
ASSERT(MUTEX_HELD(&stp->sd_lock));
/* By default generate superset of signals */
signals = (firstmsgsigs | allmsgsigs);
/*
* The proto and misc functions can return multiple messages
* as a b_next chain. Such messages are processed separately.
*/
one_more:
hipri_sig = 0;
if (bp == NULL) {
nextbp = NULL;
} else {
nextbp = bp->b_next;
bp->b_next = NULL;
switch (bp->b_datap->db_type) {
case M_PCPROTO:
/*
* Only one priority protocol message is allowed at the
* stream head at a time.
*/
if (stp->sd_flag & STRPRI) {
TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
"M_PCPROTO already at head");
freemsg(bp);
mutex_exit(&stp->sd_lock);
goto done;
}
stp->sd_flag |= STRPRI;
hipri_sig = 1;
/* FALLTHRU */
case M_DATA:
case M_PROTO:
case M_PASSFP:
band = bp->b_band;
/*
* Marking doesn't work well when messages
* are marked in more than one band. We only
* remember the last message received, even if
* it is placed on the queue ahead of other
* marked messages.
*/
if (bp->b_flag & MSGMARK)
stp->sd_mark = bp;
(void) putq(q, bp);
/*
* If message is a PCPROTO message, always use
* firstmsgsigs to determine if a signal should be
* sent as strrput is the only place to send
* signals for PCPROTO. Other messages are based on
* the STRGETINPROG flag. The flag determines if
* strrput or (k)strgetmsg will be responsible for
* sending the signals, in the firstmsgsigs case.
*/
if ((hipri_sig == 1) ||
(((stp->sd_flag & STRGETINPROG) == 0) &&
(q->q_first == bp)))
signals = (firstmsgsigs | allmsgsigs);
else
signals = allmsgsigs;
break;
default:
mutex_exit(&stp->sd_lock);
(void) strrput_nondata(q, bp);
mutex_enter(&stp->sd_lock);
break;
}
}
ASSERT(MUTEX_HELD(&stp->sd_lock));
/*
* Wake sleeping read/getmsg and cancel deferred wakeup
*/
if (wakeups & RSLEEP)
stp->sd_wakeq &= ~RSLEEP;
wakeups &= stp->sd_flag;
if (wakeups & RSLEEP) {
stp->sd_flag &= ~RSLEEP;
cv_broadcast(&q->q_wait);
}
if (wakeups & WSLEEP) {
stp->sd_flag &= ~WSLEEP;
cv_broadcast(&_WR(q)->q_wait);
}
if (pollwakeups != 0) {
if (pollwakeups == (POLLIN | POLLRDNORM)) {
/*
* Can't use rput_opt since it was not
* read when sd_lock was held and SR_POLLIN is changed
* by strpoll() under sd_lock.
*/
if (!(stp->sd_rput_opt & SR_POLLIN))
goto no_pollwake;
stp->sd_rput_opt &= ~SR_POLLIN;
}
mutex_exit(&stp->sd_lock);
pollwakeup(&stp->sd_pollist, pollwakeups);
mutex_enter(&stp->sd_lock);
}
no_pollwake:
/*
* strsendsig can handle multiple signals with a
* single call.
*/
if (stp->sd_sigflags & signals)
strsendsig(stp->sd_siglist, signals, band, 0);
mutex_exit(&stp->sd_lock);
done:
if (nextbp == NULL)
return (0);
/*
* Any signals were handled the first time.
* Wakeups and pollwakeups are redone to avoid any race
* conditions - all the messages are not queued until the
* last message has been processed by strrput.
*/
bp = nextbp;
signals = firstmsgsigs = allmsgsigs = 0;
mutex_enter(&stp->sd_lock);
goto one_more;
}
static void
log_dupioc(queue_t *rq, mblk_t *bp)
{
queue_t *wq, *qp;
char *modnames, *mnp, *dname;
size_t maxmodstr;
boolean_t islast;
/*
* Allocate a buffer large enough to hold the names of nstrpush modules
* and one driver, with spaces between and NUL terminator. If we can't
* get memory, then we'll just log the driver name.
*/
maxmodstr = nstrpush * (FMNAMESZ + 1);
mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
/* march down write side to print log message down to the driver */
wq = WR(rq);
/* make sure q_next doesn't shift around while we're grabbing data */
claimstr(wq);
qp = wq->q_next;
do {
dname = Q2NAME(qp);
islast = !SAMESTR(qp) || qp->q_next == NULL;
if (modnames == NULL) {
/*
* If we don't have memory, then get the driver name in
* the log where we can see it. Note that memory
* pressure is a possible cause of these sorts of bugs.
*/
if (islast) {
modnames = dname;
maxmodstr = 0;
}
} else {
mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
if (!islast)
*mnp++ = ' ';
}
qp = qp->q_next;
} while (!islast);
releasestr(wq);
/* Cannot happen unless stream head is corrupt. */
ASSERT(modnames != NULL);
(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
SL_CONSOLE|SL_TRACE|SL_ERROR,
"Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
(DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
if (maxmodstr != 0)
kmem_free(modnames, maxmodstr);
}
int
strrput_nondata(queue_t *q, mblk_t *bp)
{
struct stdata *stp;
struct iocblk *iocbp;
struct stroptions *sop;
struct copyreq *reqp;
struct copyresp *resp;
unsigned char bpri;
unsigned char flushed_already = 0;
stp = (struct stdata *)q->q_ptr;
ASSERT(!(stp->sd_flag & STPLEX));
ASSERT(qclaimed(q));
switch (bp->b_datap->db_type) {
case M_ERROR:
/*
* An error has occurred downstream, the errno is in the first
* bytes of the message.
*/
if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */
unsigned char rw = 0;
mutex_enter(&stp->sd_lock);
if (*bp->b_rptr != NOERROR) { /* read error */
if (*bp->b_rptr != 0) {
if (stp->sd_flag & STRDERR)
flushed_already |= FLUSHR;
stp->sd_flag |= STRDERR;
rw |= FLUSHR;
} else {
stp->sd_flag &= ~STRDERR;
}
stp->sd_rerror = *bp->b_rptr;
}
bp->b_rptr++;
if (*bp->b_rptr != NOERROR) { /* write error */
if (*bp->b_rptr != 0) {
if (stp->sd_flag & STWRERR)
flushed_already |= FLUSHW;
stp->sd_flag |= STWRERR;
rw |= FLUSHW;
} else {
stp->sd_flag &= ~STWRERR;
}
stp->sd_werror = *bp->b_rptr;
}
if (rw) {
TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
"strrput cv_broadcast:q %p, bp %p",
q, bp);
cv_broadcast(&q->q_wait); /* readers */
cv_broadcast(&_WR(q)->q_wait); /* writers */
cv_broadcast(&stp->sd_monitor); /* ioctllers */
mutex_exit(&stp->sd_lock);
pollwakeup(&stp->sd_pollist, POLLERR);
mutex_enter(&stp->sd_lock);
if (stp->sd_sigflags & S_ERROR)
strsendsig(stp->sd_siglist, S_ERROR, 0,
((rw & FLUSHR) ? stp->sd_rerror :
stp->sd_werror));
mutex_exit(&stp->sd_lock);
/*
* Send the M_FLUSH only
* for the first M_ERROR
* message on the stream
*/
if (flushed_already == rw) {
freemsg(bp);
return (0);
}
bp->b_datap->db_type = M_FLUSH;
*bp->b_rptr = rw;
bp->b_wptr = bp->b_rptr + 1;
/*
* Protect against the driver
* passing up messages after
* it has done a qprocsoff
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
return (0);
} else
mutex_exit(&stp->sd_lock);
} else if (*bp->b_rptr != 0) { /* Old flavor */
if (stp->sd_flag & (STRDERR|STWRERR))
flushed_already = FLUSHRW;
mutex_enter(&stp->sd_lock);
stp->sd_flag |= (STRDERR|STWRERR);
stp->sd_rerror = *bp->b_rptr;
stp->sd_werror = *bp->b_rptr;
TRACE_2(TR_FAC_STREAMS_FR,
TR_STRRPUT_WAKE2,
"strrput wakeup #2:q %p, bp %p", q, bp);
cv_broadcast(&q->q_wait); /* the readers */
cv_broadcast(&_WR(q)->q_wait); /* the writers */
cv_broadcast(&stp->sd_monitor); /* ioctllers */
mutex_exit(&stp->sd_lock);
pollwakeup(&stp->sd_pollist, POLLERR);
mutex_enter(&stp->sd_lock);
if (stp->sd_sigflags & S_ERROR)
strsendsig(stp->sd_siglist, S_ERROR, 0,
(stp->sd_werror ? stp->sd_werror :
stp->sd_rerror));
mutex_exit(&stp->sd_lock);
/*
* Send the M_FLUSH only
* for the first M_ERROR
* message on the stream
*/
if (flushed_already != FLUSHRW) {
bp->b_datap->db_type = M_FLUSH;
*bp->b_rptr = FLUSHRW;
/*
* Protect against the driver passing up
* messages after it has done a
* qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
return (0);
}
}
freemsg(bp);
return (0);
case M_HANGUP:
freemsg(bp);
mutex_enter(&stp->sd_lock);
stp->sd_werror = ENXIO;
stp->sd_flag |= STRHUP;
stp->sd_flag &= ~(WSLEEP|RSLEEP);
/*
* send signal if controlling tty
*/
if (stp->sd_sidp) {
prsignal(stp->sd_sidp, SIGHUP);
if (stp->sd_sidp != stp->sd_pgidp)
pgsignal(stp->sd_pgidp, SIGTSTP);
}
/*
* wake up read, write, and exception pollers and
* reset wakeup mechanism.
*/
cv_broadcast(&q->q_wait); /* the readers */
cv_broadcast(&_WR(q)->q_wait); /* the writers */
cv_broadcast(&stp->sd_monitor); /* the ioctllers */
strhup(stp);
mutex_exit(&stp->sd_lock);
return (0);
case M_UNHANGUP:
freemsg(bp);
mutex_enter(&stp->sd_lock);
stp->sd_werror = 0;
stp->sd_flag &= ~STRHUP;
mutex_exit(&stp->sd_lock);
return (0);
case M_SIG:
/*
* Someone downstream wants to post a signal. The
* signal to post is contained in the first byte of the
* message. If the message would go on the front of
* the queue, send a signal to the process group
* (if not SIGPOLL) or to the siglist processes
* (SIGPOLL). If something is already on the queue,
* OR if we are delivering a delayed suspend (*sigh*
* another "tty" hack) and there's no one sleeping already,
* just enqueue the message.
*/
mutex_enter(&stp->sd_lock);
if (q->q_first || (*bp->b_rptr == SIGTSTP &&
!(stp->sd_flag & RSLEEP))) {
(void) putq(q, bp);
mutex_exit(&stp->sd_lock);
return (0);
}
mutex_exit(&stp->sd_lock);
/* FALLTHRU */
case M_PCSIG:
/*
* Don't enqueue, just post the signal.
*/
strsignal(stp, *bp->b_rptr, 0L);
freemsg(bp);
return (0);
case M_CMD:
if (MBLKL(bp) != sizeof (cmdblk_t)) {
freemsg(bp);
return (0);
}
mutex_enter(&stp->sd_lock);
if (stp->sd_flag & STRCMDWAIT) {
ASSERT(stp->sd_cmdblk == NULL);
stp->sd_cmdblk = bp;
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
} else {
mutex_exit(&stp->sd_lock);
freemsg(bp);
}
return (0);
case M_FLUSH:
/*
* Flush queues. The indication of which queues to flush
* is in the first byte of the message. If the read queue
* is specified, then flush it. If FLUSHBAND is set, just
* flush the band specified by the second byte of the message.
*
* If a module has issued a M_SETOPT to not flush hi
* priority messages off of the stream head, then pass this
* flag into the flushq code to preserve such messages.
*/
if (*bp->b_rptr & FLUSHR) {
mutex_enter(&stp->sd_lock);
if (*bp->b_rptr & FLUSHBAND) {
ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
flushband(q, *(bp->b_rptr + 1), FLUSHALL);
} else
flushq_common(q, FLUSHALL,
stp->sd_read_opt & RFLUSHPCPROT);
if ((q->q_first == NULL) ||
(q->q_first->b_datap->db_type < QPCTL))
stp->sd_flag &= ~STRPRI;
else {
ASSERT(stp->sd_flag & STRPRI);
}
mutex_exit(&stp->sd_lock);
}
if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
*bp->b_rptr &= ~FLUSHR;
bp->b_flag |= MSGNOLOOP;
/*
* Protect against the driver passing up
* messages after it has done a qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
return (0);
}
freemsg(bp);
return (0);
case M_IOCACK:
case M_IOCNAK:
iocbp = (struct iocblk *)bp->b_rptr;
/*
* If not waiting for ACK or NAK then just free msg.
* If incorrect id sequence number then just free msg.
* If already have ACK or NAK for user then this is a
* duplicate, display a warning and free the msg.
*/
mutex_enter(&stp->sd_lock);
if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
(stp->sd_iocid != iocbp->ioc_id)) {
/*
* If the ACK/NAK is a dup, display a message
* Dup is when sd_iocid == ioc_id, and
* sd_iocblk == <valid ptr> or -1 (the former
* is when an ioctl has been put on the stream
* head, but has not yet been consumed, the
* later is when it has been consumed).
*/
if ((stp->sd_iocid == iocbp->ioc_id) &&
(stp->sd_iocblk != NULL)) {
log_dupioc(q, bp);
}
freemsg(bp);
mutex_exit(&stp->sd_lock);
return (0);
}
/*
* Assign ACK or NAK to user and wake up.
*/
stp->sd_iocblk = bp;
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
return (0);
case M_COPYIN:
case M_COPYOUT:
reqp = (struct copyreq *)bp->b_rptr;
/*
* If not waiting for ACK or NAK then just fail request.
* If already have ACK, NAK, or copy request, then just
* fail request.
* If incorrect id sequence number then just fail request.
*/
mutex_enter(&stp->sd_lock);
if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
(stp->sd_iocid != reqp->cq_id)) {
if (bp->b_cont) {
freemsg(bp->b_cont);
bp->b_cont = NULL;
}
bp->b_datap->db_type = M_IOCDATA;
bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
resp = (struct copyresp *)bp->b_rptr;
resp->cp_rval = (caddr_t)1; /* failure */
mutex_exit(&stp->sd_lock);
putnext(stp->sd_wrq, bp);
return (0);
}
/*
* Assign copy request to user and wake up.
*/
stp->sd_iocblk = bp;
cv_broadcast(&stp->sd_monitor);
mutex_exit(&stp->sd_lock);
return (0);
case M_SETOPTS:
/*
* Set stream head options (read option, write offset,
* min/max packet size, and/or high/low water marks for
* the read side only).
*/
bpri = 0;
sop = (struct stroptions *)bp->b_rptr;
mutex_enter(&stp->sd_lock);
if (sop->so_flags & SO_READOPT) {
switch (sop->so_readopt & RMODEMASK) {
case RNORM:
stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
break;
case RMSGD:
stp->sd_read_opt =
((stp->sd_read_opt & ~RD_MSGNODIS) |
RD_MSGDIS);
break;
case RMSGN:
stp->sd_read_opt =
((stp->sd_read_opt & ~RD_MSGDIS) |
RD_MSGNODIS);
break;
}
switch (sop->so_readopt & RPROTMASK) {
case RPROTNORM:
stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
break;
case RPROTDAT:
stp->sd_read_opt =
((stp->sd_read_opt & ~RD_PROTDIS) |
RD_PROTDAT);
break;
case RPROTDIS:
stp->sd_read_opt =
((stp->sd_read_opt & ~RD_PROTDAT) |
RD_PROTDIS);
break;
}
switch (sop->so_readopt & RFLUSHMASK) {
case RFLUSHPCPROT:
/*
* This sets the stream head to NOT flush
* M_PCPROTO messages.
*/
stp->sd_read_opt |= RFLUSHPCPROT;
break;
}
}
if (sop->so_flags & SO_ERROPT) {
switch (sop->so_erropt & RERRMASK) {
case RERRNORM:
stp->sd_flag &= ~STRDERRNONPERSIST;
break;
case RERRNONPERSIST:
stp->sd_flag |= STRDERRNONPERSIST;
break;
}
switch (sop->so_erropt & WERRMASK) {
case WERRNORM:
stp->sd_flag &= ~STWRERRNONPERSIST;
break;
case WERRNONPERSIST:
stp->sd_flag |= STWRERRNONPERSIST;
break;
}
}
if (sop->so_flags & SO_COPYOPT) {
if (sop->so_copyopt & ZCVMSAFE) {
stp->sd_copyflag |= STZCVMSAFE;
stp->sd_copyflag &= ~STZCVMUNSAFE;
} else if (sop->so_copyopt & ZCVMUNSAFE) {
stp->sd_copyflag |= STZCVMUNSAFE;
stp->sd_copyflag &= ~STZCVMSAFE;
}
if (sop->so_copyopt & COPYCACHED) {
stp->sd_copyflag |= STRCOPYCACHED;
}
}
if (sop->so_flags & SO_WROFF)
stp->sd_wroff = sop->so_wroff;
if (sop->so_flags & SO_TAIL)
stp->sd_tail = sop->so_tail;
if (sop->so_flags & SO_MINPSZ)
q->q_minpsz = sop->so_minpsz;
if (sop->so_flags & SO_MAXPSZ)
q->q_maxpsz = sop->so_maxpsz;
if (sop->so_flags & SO_MAXBLK)
stp->sd_maxblk = sop->so_maxblk;
if (sop->so_flags & SO_HIWAT) {
if (sop->so_flags & SO_BAND) {
if (strqset(q, QHIWAT,
sop->so_band, sop->so_hiwat)) {
cmn_err(CE_WARN, "strrput: could not "
"allocate qband\n");
} else {
bpri = sop->so_band;
}
} else {
q->q_hiwat = sop->so_hiwat;
}
}
if (sop->so_flags & SO_LOWAT) {
if (sop->so_flags & SO_BAND) {
if (strqset(q, QLOWAT,
sop->so_band, sop->so_lowat)) {
cmn_err(CE_WARN, "strrput: could not "
"allocate qband\n");
} else {
bpri = sop->so_band;
}
} else {
q->q_lowat = sop->so_lowat;
}
}
if (sop->so_flags & SO_MREADON)
stp->sd_flag |= SNDMREAD;
if (sop->so_flags & SO_MREADOFF)
stp->sd_flag &= ~SNDMREAD;
if (sop->so_flags & SO_NDELON)
stp->sd_flag |= OLDNDELAY;
if (sop->so_flags & SO_NDELOFF)
stp->sd_flag &= ~OLDNDELAY;
if (sop->so_flags & SO_ISTTY)
stp->sd_flag |= STRISTTY;
if (sop->so_flags & SO_ISNTTY)
stp->sd_flag &= ~STRISTTY;
if (sop->so_flags & SO_TOSTOP)
stp->sd_flag |= STRTOSTOP;
if (sop->so_flags & SO_TONSTOP)
stp->sd_flag &= ~STRTOSTOP;
if (sop->so_flags & SO_DELIM)
stp->sd_flag |= STRDELIM;
if (sop->so_flags & SO_NODELIM)
stp->sd_flag &= ~STRDELIM;
mutex_exit(&stp->sd_lock);
freemsg(bp);
/* Check backenable in case the water marks changed */
qbackenable(q, bpri);
return (0);
/*
* The following set of cases deal with situations where two stream
* heads are connected to each other (twisted streams). These messages
* have no meaning at the stream head.
*/
case M_BREAK:
case M_CTL:
case M_DELAY:
case M_START:
case M_STOP:
case M_IOCDATA:
case M_STARTI:
case M_STOPI:
freemsg(bp);
return (0);
case M_IOCTL:
/*
* Always NAK this condition
* (makes no sense)
* If there is one or more threads in the read side
* rwnext we have to defer the nacking until that thread
* returns (in strget).
*/
mutex_enter(&stp->sd_lock);
if (stp->sd_struiodnak != 0) {
/*
* Defer NAK to the streamhead. Queue at the end
* the list.
*/
mblk_t *mp = stp->sd_struionak;
while (mp && mp->b_next)
mp = mp->b_next;
if (mp)
mp->b_next = bp;
else
stp->sd_struionak = bp;
bp->b_next = NULL;
mutex_exit(&stp->sd_lock);
return (0);
}
mutex_exit(&stp->sd_lock);
bp->b_datap->db_type = M_IOCNAK;
/*
* Protect against the driver passing up
* messages after it has done a qprocsoff.
*/
if (_OTHERQ(q)->q_next == NULL)
freemsg(bp);
else
qreply(q, bp);
return (0);
default:
#ifdef DEBUG
cmn_err(CE_WARN,
"bad message type %x received at stream head\n",
bp->b_datap->db_type);
#endif
freemsg(bp);
return (0);
}
/* NOTREACHED */
}
/*
* Check if the stream pointed to by `stp' can be written to, and return an
* error code if not. If `eiohup' is set, then return EIO if STRHUP is set.
* If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
* then always return EPIPE and send a SIGPIPE to the invoking thread.
*/
static int
strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
{
int error;
ASSERT(MUTEX_HELD(&stp->sd_lock));
/*
* For modem support, POSIX states that on writes, EIO should
* be returned if the stream has been hung up.
*/
if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
error = EIO;
else
error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
if (error != 0) {
if (!(stp->sd_flag & STPLEX) &&
(stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
tsignal(curthread, SIGPIPE);
error = EPIPE;
}
}
return (error);
}
/*
* Copyin and send data down a stream.
* The caller will allocate and copyin any control part that precedes the
* message and pass that in as mctl.
*
* Caller should *not* hold sd_lock.
* When EWOULDBLOCK is returned the caller has to redo the canputnext
* under sd_lock in order to avoid missing a backenabling wakeup.
*
* Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
*
* Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
* For sync streams we can only ignore flow control by reverting to using
* putnext.
*
* If sd_maxblk is less than *iosize this routine might return without
* transferring all of *iosize. In all cases, on return *iosize will contain
* the amount of data that was transferred.
*/
static int
strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
int b_flag, int pri, int flags)
{
struiod_t uiod;
mblk_t *mp;
queue_t *wqp = stp->sd_wrq;
int error = 0;
ssize_t count = *iosize;
ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
if (uiop != NULL && count >= 0)
flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
if (!(flags & STRUIO_POSTPONE)) {
/*
* Use regular canputnext, strmakedata, putnext sequence.
*/
if (pri == 0) {
if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
freemsg(mctl);
return (EWOULDBLOCK);
}
} else {
if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
freemsg(mctl);
return (EWOULDBLOCK);
}
}
if ((error = strmakedata(iosize, uiop, stp, flags,
&mp)) != 0) {
freemsg(mctl);
/*
* need to change return code to ENOMEM
* so that this is not confused with
* flow control, EAGAIN.
*/
if (error == EAGAIN)
return (ENOMEM);
else
return (error);
}
if (mctl != NULL) {
if (mctl->b_cont == NULL)
mctl->b_cont = mp;
else if (mp != NULL)
linkb(mctl, mp);
mp = mctl;
} else if (mp == NULL)
return (0);
mp->b_flag |= b_flag;
mp->b_band = (uchar_t)pri;
if (flags & MSG_IGNFLOW) {
/*
* XXX Hack: Don't get stuck running service
* procedures. This is needed for sockfs when
* sending the unbind message out of the rput
* procedure - we don't want a put procedure
* to run service procedures.
*/
putnext(wqp, mp);
} else {
stream_willservice(stp);
putnext(wqp, mp);
stream_runservice(stp);
}
return (0);
}
/*
* Stream supports rwnext() for the write side.
*/
if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
freemsg(mctl);
/*
* map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
*/
return (error == EAGAIN ? ENOMEM : error);
}
if (mctl != NULL) {
if (mctl->b_cont == NULL)
mctl->b_cont = mp;
else if (mp != NULL)
linkb(mctl, mp);
mp = mctl;
} else if (mp == NULL) {
return (0);
}
mp->b_flag |= b_flag;
mp->b_band = (uchar_t)pri;
(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
uiod.d_uio.uio_offset = 0;
uiod.d_mp = mp;
error = rwnext(wqp, &uiod);
if (! uiod.d_mp) {
uioskip(uiop, *iosize);
return (error);
}
ASSERT(mp == uiod.d_mp);
if (error == EINVAL) {
/*
* The stream plumbing must have changed while
* we were away, so just turn off rwnext()s.
*/
error = 0;
} else if (error == EBUSY || error == EWOULDBLOCK) {
/*
* Couldn't enter a perimeter or took a page fault,
* so fall-back to putnext().
*/
error = 0;
} else {
freemsg(mp);
return (error);
}
/* Have to check canput before consuming data from the uio */
if (pri == 0) {
if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
freemsg(mp);
return (EWOULDBLOCK);
}
} else {
if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
freemsg(mp);
return (EWOULDBLOCK);
}
}
ASSERT(mp == uiod.d_mp);
/* Copyin data from the uio */
if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
freemsg(mp);
return (error);
}
uioskip(uiop, *iosize);
if (flags & MSG_IGNFLOW) {
/*
* XXX Hack: Don't get stuck running service procedures.
* This is needed for sockfs when sending the unbind message
* out of the rput procedure - we don't want a put procedure
* to run service procedures.
*/
putnext(wqp, mp);
} else {
stream_willservice(stp);
putnext(wqp, mp);
stream_runservice(stp);
}
return (0);
}
/*
* Write attempts to break the write request into messages conforming
* with the minimum and maximum packet sizes set downstream.
*
* Write will not block if downstream queue is full and
* O_NDELAY is set, otherwise it will block waiting for the queue to get room.
*
* A write of zero bytes gets packaged into a zero length message and sent
* downstream like any other message.
*
* If buffers of the requested sizes are not available, the write will
* sleep until the buffers become available.
*
* Write (if specified) will supply a write offset in a message if it
* makes sense. This can be specified by downstream modules as part of
* a M_SETOPTS message. Write will not supply the write offset if it
* cannot supply any data in a buffer. In other words, write will never
* send down an empty packet due to a write offset.
*/
/* ARGSUSED2 */
int
strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
{
return (strwrite_common(vp, uiop, crp, 0));
}
/* ARGSUSED2 */
int
strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
{
struct stdata *stp;
struct queue *wqp;
ssize_t rmin, rmax;
ssize_t iosize;
int waitflag;
int tempmode;
int error = 0;
int b_flag;
ASSERT(vp->v_stream);
stp = vp->v_stream;
mutex_enter(&stp->sd_lock);
if ((error = i_straccess(stp, JCWRITE)) != 0) {
mutex_exit(&stp->sd_lock);
return (error);
}
if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
error = strwriteable(stp, B_TRUE, B_TRUE);
if (error != 0) {
mutex_exit(&stp->sd_lock);
return (error);
}
}
mutex_exit(&stp->sd_lock);
wqp = stp->sd_wrq;
/* get these values from them cached in the stream head */
rmin = stp->sd_qn_minpsz;
rmax = stp->sd_qn_maxpsz;
/*
* Check the min/max packet size constraints. If min packet size
* is non-zero, the write cannot be