blob: 1bdddf2ecf33a0fe446264702cb19d45dbcef97e [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
jpk45916cd2006-03-24 12:29:20 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
Jonathan Adams56f33202010-01-12 17:06:34 -080022 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070023 * Use is subject to license terms.
24 */
Dan Kruchininb2d8fb82011-06-20 06:44:45 -070025/*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
Dan Kruchininbbaa8b62013-08-26 20:42:32 -080027 * Copyright (c) 2012 by Delphix. All rights reserved.
Jerry Jelinekd28d4712015-11-23 16:00:48 +000028 * Copyright 2015 Joyent, Inc.
Dan Kruchininb2d8fb82011-06-20 06:44:45 -070029 */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070030
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070031/*
32 * Multithreaded STREAMS Local Transport Provider.
33 *
34 * OVERVIEW
35 * ========
36 *
37 * This driver provides TLI as well as socket semantics. It provides
38 * connectionless, connection oriented, and connection oriented with orderly
39 * release transports for TLI and sockets. Each transport type has separate name
40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41 * this removes any name space conflicts when binding to socket style transport
42 * addresses.
43 *
44 * NOTE: There is one exception: Socket ticots and ticotsord transports share
45 * the same namespace. In fact, sockets always use ticotsord type transport.
46 *
47 * The driver mode is specified during open() by the minor number used for
48 * open.
49 *
50 * The sockets in addition have the following semantic differences:
51 * No support for passing up credentials (TL_SET[U]CRED).
52 *
53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55 * T_OPTDATA_IND.
56 *
57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58 * a T_CONN_RES is received from the acceptor. This means that a socket
59 * connect will complete before the peer has called accept.
60 *
61 *
62 * MULTITHREADING
63 * ==============
64 *
65 * The driver does not use STREAMS protection mechanisms. Instead it uses a
66 * generic "serializer" abstraction. Most of the operations are executed behind
67 * the serializer and are, essentially single-threaded. All functions executed
68 * behind the same serializer are strictly serialized. So if one thread calls
69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73 * same time.
74 *
75 * Connectionless transport use a single serializer per transport type (one for
76 * TLI and one for sockets. Connection-oriented transports use finer-grained
77 * serializers.
78 *
79 * All COTS-type endpoints start their life with private serializers. During
80 * connection request processing the endpoint serializer is switched to the
81 * listener's serializer and the rest of T_CONN_REQ processing is done on the
82 * listener serializer. During T_CONN_RES processing the eager serializer is
83 * switched from listener to acceptor serializer and after that point all
84 * processing for eager and acceptor happens on this serializer. To avoid races
85 * with endpoint closes while its serializer may be changing closes are blocked
86 * while serializers are manipulated.
87 *
88 * References accounting
89 * ---------------------
90 *
91 * Endpoints are reference counted and freed when the last reference is
92 * dropped. Functions within the serializer may access an endpoint state even
93 * after an endpoint closed. The te_closing being set on the endpoint indicates
94 * that the endpoint entered its close routine.
95 *
96 * One reference is held for each opened endpoint instance. The reference
97 * counter is incremented when the endpoint is linked to another endpoint and
98 * decremented when the link disappears. It is also incremented when the
99 * endpoint is found by the hash table lookup. This increment is atomic with the
100 * lookup itself and happens while the hash table read lock is held.
101 *
102 * Close synchronization
103 * ---------------------
104 *
105 * During close the endpoint as marked as closing using te_closing flag. It is
106 * usually enough to check for te_closing flag since all other state changes
107 * happen after this flag is set and the close entered serializer. Immediately
108 * after setting te_closing flag tl_close() enters serializer and waits until
109 * the callback finishes. This allows all functions called within serializer to
110 * simply check te_closing without any locks.
111 *
112 * Serializer management.
113 * ---------------------
114 *
115 * For COTS transports serializers are created when the endpoint is constructed
116 * and destroyed when the endpoint is destructed. CLTS transports use global
117 * serializers - one for sockets and one for TLI.
118 *
119 * COTS serializers have separate reference counts to deal with several
120 * endpoints sharing the same serializer. There is a subtle problem related to
121 * the serializer destruction. The serializer should never be destroyed by any
122 * function executed inside serializer. This means that close has to wait till
123 * all serializer activity for this endpoint is finished before it can drop the
124 * last reference on the endpoint (which may as well free the serializer). This
125 * is only relevant for COTS transports which manage serializers
126 * dynamically. For CLTS transports close may complete without waiting for all
127 * serializer activity to finish since serializer is only destroyed at driver
128 * detach time.
129 *
130 * COTS endpoints keep track of the number of outstanding requests on the
131 * serializer for the endpoint. The code handling accept() avoids changing
132 * client serializer if it has any pending messages on the serializer and
133 * instead moves acceptor to listener's serializer.
134 *
135 *
136 * Use of hash tables
137 * ------------------
138 *
139 * The driver uses modhash hash table implementation. Each transport uses two
140 * hash tables - one for finding endpoints by acceptor ID and another one for
141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142 * pair of hash tables since sockets only use TICOTSORD.
143 *
144 * All hash tables lookups increment a reference count for returned endpoints,
145 * so we may safely check the endpoint state even when the endpoint is removed
146 * from the hash by another thread immediately after it is found.
147 *
148 *
149 * CLOSE processing
150 * ================
151 *
152 * The driver enters serializer twice on close(). The close sequence is the
153 * following:
154 *
155 * 1) Wait until closing is safe (te_closewait becomes zero)
156 * This step is needed to prevent close during serializer switches. In most
157 * cases (close happening after connection establishment) te_closewait is
158 * zero.
159 * 1) Set te_closing.
160 * 2) Call tl_close_ser() within serializer and wait for it to complete.
161 *
162 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
163 * It also needs to clear write-side q_next pointers - this should be done
164 * before qprocsoff().
165 *
166 * This synchronous serializer entry during close is needed to ensure that
167 * the queue is valid everywhere inside the serializer.
168 *
169 * Note that in many cases close will execute tl_close_ser() synchronously,
170 * so it will not wait at all.
171 *
172 * 3) Calls qprocsoff().
173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174 * complete (for COTS transports). For CLTS transport there is no wait.
175 *
176 * tl_close_finish_ser() Finishes the close process and wakes up waiting
177 * close if there is any.
178 *
179 * Note that in most cases close will enter te_close_ser_finish()
180 * synchronously and will not wait at all.
181 *
182 *
183 * Flow Control
184 * ============
185 *
186 * The driver implements both read and write side service routines. No one calls
187 * putq() on the read queue. The read side service routine tl_rsrv() is called
188 * when the read side stream is back-enabled. It enters serializer synchronously
189 * (waits till serializer processing is complete). Within serializer it
190 * back-enables all endpoints blocked by the queue for connection-less
191 * transports and enables write side service processing for the peer for
192 * connection-oriented transports.
193 *
194 * Read and write side service routines use special mblk_sized space in the
195 * endpoint structure to enter perimeter.
196 *
197 * Write-side flow control
198 * -----------------------
199 *
200 * Write side flow control is a bit tricky. The driver needs to deal with two
201 * message queues - the explicit STREAMS message queue maintained by
202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203 * queues should be synchronized to preserve message ordering and should
204 * maintain a single order determined by the order in which messages enter
205 * tl_wput(). In order to maintain the ordering between these two queues the
206 * STREAMS queue is only manipulated within the serializer, so the ordering is
207 * provided by the serializer.
208 *
209 * Functions called from the tl_wsrv() sometimes may call putbq(). To
210 * immediately stop any further processing of the STREAMS message queues the
211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212 * side service processing stops when the flag is set.
213 *
214 * The tl_wsrv() function enters serializer synchronously and waits for it to
215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218 * always bounded by the amount of messages on the STREAMS queue at the time
219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220 * queue from another serialized entry which can't happen in parallel. This
221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222 * of it draining forever while writer places new messages on the STREAMS
223 * queue).
224 *
225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226 *
227 *
228 * Unix Domain Sockets
229 * ===================
230 *
231 * The driver knows the structure of Unix Domain sockets addresses and treats
232 * them differently from generic TLI addresses. For sockets implicit binds are
233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234 * instead of using address length of zero. Explicit binds specify
235 * SOU_MAGIC_EXPLICIT as magic.
236 *
237 * For implicit binds we always use minor number as soua_vp part of the address
238 * and avoid any hash table lookups. This saves two hash tables lookups per
239 * anonymous bind.
240 *
241 * For explicit address we hash the vnode pointer instead of hashing the
242 * full-scale address+zone+length. Hashing by pointer is more efficient then
243 * hashing by the full address.
244 *
245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246 * tep structure, so it should be never freed.
247 *
248 * Also for sockets the driver always uses minor number as acceptor id.
249 *
250 * TPI VIOLATIONS
251 * --------------
252 *
253 * This driver violates TPI in several respects for Unix Domain Sockets:
254 *
255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256 * is requested and the endpoint is already in use. There is no point in
257 * generating an unused address since this address will be rejected by
258 * sockfs anyway. For implicit binds it always generates a new address
259 * (sets soua_vp to its minor number).
260 *
261 * 2) It always uses minor number as acceptor ID and never uses queue
262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263 * message and they do not use the queue pointer.
264 *
265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266 * followed by listen(). The listen() should be issued with non-zero
267 * backlog, so sotpi_listen() issues unbind request followed by bind
268 * request to the same address but with a non-zero qlen value. Both
269 * tl_bind() and tl_unbind() require write lock on the hash table to
270 * insert/remove the address. The driver does not remove the address from
271 * the hash for endpoints that are bound to the explicit address and have
272 * backlog of zero. During T_BIND_REQ processing if the address requested
273 * is equal to the address the endpoint already has it updates the backlog
274 * without reinserting the address in the hash table. This optimization
275 * avoids two hash table updates for each listener created. It always
276 * avoids the problem of a "stolen" address when another listener may use
277 * the same address between the unbind and bind and suddenly listen() fails
278 * because address is in use even though the bind() succeeded.
279 *
280 *
281 * CONNECTIONLESS TRANSPORTS
282 * =========================
283 *
284 * Connectionless transports all share the same serializer (one for TLI and one
285 * for Sockets). Functions executing behind serializer can check or modify state
286 * of any endpoint.
287 *
288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289 * te_lastep field. The next time X talks to some address A it checks whether A
290 * is the same as Y's address and if it is there is no need to lookup Y. If the
291 * address is different or the state of Y is not appropriate (e.g. closed or not
292 * idle) X does a lookup using tl_find_peer() and caches the new address.
293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294 * on the endpoint found.
295 *
296 * During close of endpoint Y it doesn't try to remove itself from other
297 * endpoints caches. They will detect that Y is gone and will search the peer
298 * endpoint again.
299 *
300 * Flow Control Handling.
301 * ----------------------
302 *
303 * Each connectionless endpoint keeps a list of endpoints which are
304 * flow-controlled by its queue. It also keeps a pointer to the queue which
305 * flow-controls itself. Whenever flow control releases for endpoint X it
306 * enables all queues from the list. During close it also back-enables everyone
307 * in the list. If X is flow-controlled when it is closing it removes it from
308 * the peers list.
309 *
310 * DATA STRUCTURES
311 * ===============
312 *
313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314 * endpoint state. For connection-oriented transports it has a keeps a list
315 * of pending connections (tl_icon_t). For connectionless transports it keeps a
316 * list of endpoints flow controlled by this one.
317 *
318 * Each transport type is represented by a per-transport data structure
319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320 * endpoint address hash tables for each transport. It also contains pointer to
321 * transport serializer for connectionless transports.
322 *
323 * Each endpoint keeps a link to its transport structure, so the code can find
324 * all per-transport information quickly.
325 */
326
327#include <sys/types.h>
328#include <sys/inttypes.h>
329#include <sys/stream.h>
330#include <sys/stropts.h>
331#define _SUN_TPI_VERSION 2
332#include <sys/tihdr.h>
333#include <sys/strlog.h>
334#include <sys/debug.h>
335#include <sys/cred.h>
336#include <sys/errno.h>
337#include <sys/kmem.h>
338#include <sys/id_space.h>
339#include <sys/modhash.h>
340#include <sys/mkdev.h>
341#include <sys/tl.h>
342#include <sys/stat.h>
343#include <sys/conf.h>
344#include <sys/modctl.h>
345#include <sys/strsun.h>
346#include <sys/socket.h>
347#include <sys/socketvar.h>
348#include <sys/sysmacros.h>
349#include <sys/xti_xtiopt.h>
350#include <sys/ddi.h>
351#include <sys/sunddi.h>
352#include <sys/zone.h>
353#include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
354#include <inet/optcom.h>
355#include <sys/strsubr.h>
356#include <sys/ucred.h>
357#include <sys/suntpi.h>
358#include <sys/list.h>
359#include <sys/serializer.h>
360
361/*
362 * TBD List
363 * 14 Eliminate state changes through table
364 * 16. AF_UNIX socket options
365 * 17. connect() for ticlts
366 * 18. support for "netstat" to show AF_UNIX plus TLI local
367 * transport connections
368 * 21. sanity check to flushing on sending M_ERROR
369 */
370
371/*
372 * CONSTANT DECLARATIONS
373 * --------------------
374 */
375
376/*
377 * Local declarations
378 */
379#define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
380
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700381#define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
382#define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
383#define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
384/*
385 * Hash tables size.
386 */
387#define TL_HASH_SIZE 311
388
389/*
390 * Definitions for module_info
391 */
392#define TL_ID (104) /* module ID number */
393#define TL_NAME "tl" /* module name */
394#define TL_MINPSZ (0) /* min packet size */
395#define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
396#define TL_HIWAT (16*1024) /* hi water mark */
397#define TL_LOWAT (256) /* lo water mark */
398/*
399 * Definition of minor numbers/modes for new transport provider modes.
400 * We view the socket use as a separate mode to get a separate name space.
401 */
402#define TL_TICOTS 0 /* connection oriented transport */
403#define TL_TICOTSORD 1 /* COTS w/ orderly release */
404#define TL_TICLTS 2 /* connectionless transport */
405#define TL_UNUSED 3
406#define TL_SOCKET 4 /* Socket */
407#define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
408#define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
409#define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
410
411#define TL_MINOR_MASK 0x7
412#define TL_MINOR_START (TL_TICLTS + 1)
413
414/*
415 * LOCAL MACROS
416 */
417#define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
418
419/*
420 * EXTERNAL VARIABLE DECLARATIONS
421 * -----------------------------
422 */
423/*
424 * state table defined in the OS space.c
425 */
426extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
427
428/*
429 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
430 */
431static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
432static int tl_close(queue_t *, int, cred_t *);
433static void tl_wput(queue_t *, mblk_t *);
434static void tl_wsrv(queue_t *);
435static void tl_rsrv(queue_t *);
436
437static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
438static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
439static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
440
441
442/*
443 * GLOBAL DATA STRUCTURES AND VARIABLES
444 * -----------------------------------
445 */
446
447/*
448 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
449 * For now, we only manage the SO_RECVUCRED option but we also have
450 * harmless dummy options to make things work with some common code we access.
451 */
452opdes_t tl_opt_arr[] = {
453 /* The SO_TYPE is needed for the hack below */
454 {
455 SO_TYPE,
456 SOL_SOCKET,
457 OA_R,
458 OA_R,
459 OP_NP,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800460 0,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700461 sizeof (t_scalar_t),
462 0
463 },
464 {
465 SO_RECVUCRED,
466 SOL_SOCKET,
467 OA_RW,
468 OA_RW,
469 OP_NP,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800470 0,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700471 sizeof (int),
472 0
473 }
474};
475
476/*
477 * Table of all supported levels
478 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
479 * any supported options so we need this info separately.
480 *
481 * This is needed only for topmost tpi providers.
482 */
483optlevel_t tl_valid_levels_arr[] = {
484 XTI_GENERIC,
485 SOL_SOCKET,
486 TL_PROT_LEVEL
487};
488
489#define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
490/*
491 * Current upper bound on the amount of space needed to return all options.
492 * Additional options with data size of sizeof(long) are handled automatically.
493 * Others need hand job.
494 */
495#define TL_MAX_OPT_BUF_LEN \
496 ((A_CNT(tl_opt_arr) << 2) + \
497 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
498 + 64 + sizeof (struct T_optmgmt_ack))
499
500#define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
501
502/*
503 * transport addr structure
504 */
505typedef struct tl_addr {
506 zoneid_t ta_zoneid; /* Zone scope of address */
507 t_scalar_t ta_alen; /* length of abuf */
508 void *ta_abuf; /* the addr itself */
509} tl_addr_t;
510
511/*
512 * Refcounted version of serializer.
513 */
514typedef struct tl_serializer {
515 uint_t ts_refcnt;
516 serializer_t *ts_serializer;
517} tl_serializer_t;
518
519/*
520 * Each transport type has a separate state.
521 * Per-transport state.
522 */
523typedef struct tl_transport_state {
524 char *tr_name;
525 minor_t tr_minor;
526 uint32_t tr_defaddr;
527 mod_hash_t *tr_ai_hash;
528 mod_hash_t *tr_addr_hash;
529 tl_serializer_t *tr_serializer;
530} tl_transport_state_t;
531
532#define TL_DFADDR 0x1000
533
534static tl_transport_state_t tl_transports[] = {
535 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
536 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
537 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
538 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
539 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
540 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
541 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
542};
543
544#define TL_MAXTRANSPORT A_CNT(tl_transports)
545
546struct tl_endpt;
547typedef struct tl_endpt tl_endpt_t;
548
549typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
550
551/*
552 * Data structure used to represent pending connects.
553 * Records enough information so that the connecting peer can close
554 * before the connection gets accepted.
555 */
556typedef struct tl_icon {
557 list_node_t ti_node;
558 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
559 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
560 t_scalar_t ti_seqno; /* Sequence number */
561} tl_icon_t;
562
563typedef struct so_ux_addr soux_addr_t;
564#define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
565
566/*
akolbd87b1fa2006-08-02 17:08:49 -0700567 * Maximum number of unaccepted connection indications allowed per listener.
568 */
569#define TL_MAXQLEN 4096
570int tl_maxqlen = TL_MAXQLEN;
571
572/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700573 * transport endpoint structure
574 */
575struct tl_endpt {
576 queue_t *te_rq; /* stream read queue */
577 queue_t *te_wq; /* stream write queue */
578 uint32_t te_refcnt;
579 int32_t te_state; /* TPI state of endpoint */
580 minor_t te_minor; /* minor number */
581#define te_seqno te_minor
582 uint_t te_flag; /* flag field */
583 boolean_t te_nowsrv;
584 tl_serializer_t *te_ser; /* Serializer to use */
585#define te_serializer te_ser->ts_serializer
586
587 soux_addr_t te_uxaddr; /* Socket address */
588#define te_magic te_uxaddr.soua_magic
589#define te_vp te_uxaddr.soua_vp
590 tl_addr_t te_ap; /* addr bound to this endpt */
591#define te_zoneid te_ap.ta_zoneid
592#define te_alen te_ap.ta_alen
593#define te_abuf te_ap.ta_abuf
594
595 tl_transport_state_t *te_transport;
596#define te_addrhash te_transport->tr_addr_hash
597#define te_aihash te_transport->tr_ai_hash
598#define te_defaddr te_transport->tr_defaddr
599 cred_t *te_credp; /* endpoint user credentials */
600 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
601
602 /*
603 * State specific for connection-oriented and connectionless transports.
604 */
605 union {
606 /* Connection-oriented state. */
607 struct {
608 t_uscalar_t _te_nicon; /* count of conn requests */
609 t_uscalar_t _te_qlen; /* max conn requests */
610 tl_endpt_t *_te_oconp; /* conn request pending */
611 tl_endpt_t *_te_conp; /* connected endpt */
612#ifndef _ILP32
613 void *_te_pad;
614#endif
615 list_t _te_iconp; /* list of conn ind. pending */
616 } _te_cots_state;
617 /* Connection-less state. */
618 struct {
619 tl_endpt_t *_te_lastep; /* last dest. endpoint */
620 tl_endpt_t *_te_flowq; /* flow controlled on whom */
621 list_node_t _te_flows; /* lists of connections */
622 list_t _te_flowlist; /* Who flowcontrols on me */
623 } _te_clts_state;
624 } _te_transport_state;
625#define te_nicon _te_transport_state._te_cots_state._te_nicon
626#define te_qlen _te_transport_state._te_cots_state._te_qlen
627#define te_oconp _te_transport_state._te_cots_state._te_oconp
628#define te_conp _te_transport_state._te_cots_state._te_conp
629#define te_iconp _te_transport_state._te_cots_state._te_iconp
630#define te_lastep _te_transport_state._te_clts_state._te_lastep
631#define te_flowq _te_transport_state._te_clts_state._te_flowq
632#define te_flowlist _te_transport_state._te_clts_state._te_flowlist
633#define te_flows _te_transport_state._te_clts_state._te_flows
634
635 bufcall_id_t te_bufcid; /* outstanding bufcall id */
636 timeout_id_t te_timoutid; /* outstanding timeout id */
637 pid_t te_cpid; /* cached pid of endpoint */
638 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
639 /*
640 * Pieces of the endpoint state needed for closing.
641 */
642 kmutex_t te_closelock;
643 kcondvar_t te_closecv;
644 uint8_t te_closing; /* The endpoint started closing */
645 uint8_t te_closewait; /* Wait in close until zero */
646 mblk_t te_closemp; /* for entering serializer on close */
647 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
648 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
649 kmutex_t te_srv_lock;
650 kcondvar_t te_srv_cv;
651 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
652 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
653 /*
654 * Pieces of the endpoint state needed for serializer transitions.
655 */
656 kmutex_t te_ser_lock; /* Protects the count below */
657 uint_t te_ser_count; /* Number of messages on serializer */
658};
659
660/*
661 * Flag values. Lower 4 bits specify that transport used.
662 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
663 * they allow to identify the endpoint more easily.
664 */
665#define TL_LISTENER 0x00010 /* the listener endpoint */
666#define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
667#define TL_EAGER 0x00040 /* connecting endpoint */
668#define TL_ACCEPTED 0x00080 /* accepted connection */
669#define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
670#define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
671#define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
672#define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
673#define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
674/*
675 * Boolean checks for the endpoint type.
676 */
677#define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
678#define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
679#define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
680#define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
681
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700682/*
683 * Certain operations are always used together. These macros reduce the chance
684 * of missing a part of a combination.
685 */
686#define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
687#define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
688
689#define TL_PUTBQ(x, mp) { \
690 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
691 (x)->te_nowsrv = B_TRUE; \
692 (void) putbq((x)->te_wq, mp); \
693}
694
695#define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
696#define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
697
698/*
699 * STREAMS driver glue data structures.
700 */
701static struct module_info tl_minfo = {
702 TL_ID, /* mi_idnum */
703 TL_NAME, /* mi_idname */
704 TL_MINPSZ, /* mi_minpsz */
705 TL_MAXPSZ, /* mi_maxpsz */
706 TL_HIWAT, /* mi_hiwat */
707 TL_LOWAT /* mi_lowat */
708};
709
710static struct qinit tl_rinit = {
711 NULL, /* qi_putp */
712 (int (*)())tl_rsrv, /* qi_srvp */
713 tl_open, /* qi_qopen */
714 tl_close, /* qi_qclose */
715 NULL, /* qi_qadmin */
716 &tl_minfo, /* qi_minfo */
717 NULL /* qi_mstat */
718};
719
720static struct qinit tl_winit = {
721 (int (*)())tl_wput, /* qi_putp */
722 (int (*)())tl_wsrv, /* qi_srvp */
723 NULL, /* qi_qopen */
724 NULL, /* qi_qclose */
725 NULL, /* qi_qadmin */
726 &tl_minfo, /* qi_minfo */
727 NULL /* qi_mstat */
728};
729
730static struct streamtab tlinfo = {
731 &tl_rinit, /* st_rdinit */
732 &tl_winit, /* st_wrinit */
733 NULL, /* st_muxrinit */
734 NULL /* st_muxwrinit */
735};
736
737DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
Sherry Moore19397402008-09-22 16:30:26 -0700738 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700739
740static struct modldrv modldrv = {
741 &mod_driverops, /* Type of module -- pseudo driver here */
rh87107903a11e2008-07-31 15:02:18 -0700742 "TPI Local Transport (tl)",
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700743 &tl_devops, /* driver ops */
744};
745
746/*
747 * Module linkage information for the kernel.
748 */
749static struct modlinkage modlinkage = {
750 MODREV_1,
751 &modldrv,
752 NULL
753};
754
755/*
756 * Templates for response to info request
757 * Check sanity of unlimited connect data etc.
758 */
759
760#define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
761#define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
762
763static struct T_info_ack tl_cots_info_ack =
764 {
765 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
766 T_INFINITE, /* TSDU size */
767 T_INFINITE, /* ETSDU size */
768 T_INFINITE, /* CDATA_size */
769 T_INFINITE, /* DDATA_size */
770 T_INFINITE, /* ADDR_size */
771 T_INFINITE, /* OPT_size */
772 0, /* TIDU_size - fill at run time */
773 T_COTS, /* SERV_type */
774 -1, /* CURRENT_state */
775 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
776 };
777
778static struct T_info_ack tl_clts_info_ack =
779 {
780 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
781 0, /* TSDU_size - fill at run time */
782 -2, /* ETSDU_size -2 => not supported */
783 -2, /* CDATA_size -2 => not supported */
784 -2, /* DDATA_size -2 => not supported */
Dan Kruchininb2d8fb82011-06-20 06:44:45 -0700785 -1, /* ADDR_size -1 => infinite */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700786 -1, /* OPT_size */
787 0, /* TIDU_size - fill at run time */
788 T_CLTS, /* SERV_type */
789 -1, /* CURRENT_state */
790 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
791 };
792
793/*
794 * private copy of devinfo pointer used in tl_info
795 */
796static dev_info_t *tl_dip;
797
798/*
799 * Endpoints cache.
800 */
801static kmem_cache_t *tl_cache;
802/*
803 * Minor number space.
804 */
805static id_space_t *tl_minors;
806
807/*
808 * Default Data Unit size.
809 */
810static t_scalar_t tl_tidusz;
811
812/*
813 * Size of hash tables.
814 */
815static size_t tl_hash_size = TL_HASH_SIZE;
816
817/*
818 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
819 * for sockets.
820 */
821static int tl_disable_early_connect = 0;
822static int tl_client_closing_when_accepting;
823
824static int tl_serializer_noswitch;
825
826/*
827 * LOCAL FUNCTION PROTOTYPES
828 * -------------------------
829 */
830static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
831static void tl_do_proto(mblk_t *, tl_endpt_t *);
832static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
833static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
834static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
835 t_scalar_t);
836static void tl_bind(mblk_t *, tl_endpt_t *);
837static void tl_bind_ser(mblk_t *, tl_endpt_t *);
838static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
839static void tl_unbind(mblk_t *, tl_endpt_t *);
840static void tl_optmgmt(queue_t *, mblk_t *);
841static void tl_conn_req(queue_t *, mblk_t *);
842static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
843static void tl_conn_res(mblk_t *, tl_endpt_t *);
844static void tl_discon_req(mblk_t *, tl_endpt_t *);
845static void tl_capability_req(mblk_t *, tl_endpt_t *);
846static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
Dan Kruchininb2d8fb82011-06-20 06:44:45 -0700847static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700848static void tl_info_req(mblk_t *, tl_endpt_t *);
849static void tl_addr_req(mblk_t *, tl_endpt_t *);
850static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
851static void tl_data(mblk_t *, tl_endpt_t *);
852static void tl_exdata(mblk_t *, tl_endpt_t *);
853static void tl_ordrel(mblk_t *, tl_endpt_t *);
854static void tl_unitdata(mblk_t *, tl_endpt_t *);
855static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
856static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
857static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
858static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
859static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
860static void tl_cl_backenable(tl_endpt_t *);
861static void tl_co_unconnect(tl_endpt_t *);
862static mblk_t *tl_resizemp(mblk_t *, ssize_t);
863static void tl_discon_ind(tl_endpt_t *, uint32_t);
864static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
865static mblk_t *tl_ordrel_ind_alloc(void);
866static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
867static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
868static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
869static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
870static void tl_icon_freemsgs(mblk_t **);
871static void tl_merror(queue_t *, mblk_t *, int);
jpk45916cd2006-03-24 12:29:20 -0800872static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700873static int tl_default_opt(queue_t *, int, int, uchar_t *);
874static int tl_get_opt(queue_t *, int, int, uchar_t *);
875static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
Erik Nordmarkbd670b32009-11-11 11:49:49 -0800876 uchar_t *, void *, cred_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700877static void tl_memrecover(queue_t *, mblk_t *, size_t);
878static void tl_freetip(tl_endpt_t *, tl_icon_t *);
879static void tl_free(tl_endpt_t *);
880static int tl_constructor(void *, void *, int);
881static void tl_destructor(void *, void *);
882static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
883static tl_serializer_t *tl_serializer_alloc(int);
884static void tl_serializer_refhold(tl_serializer_t *);
885static void tl_serializer_refrele(tl_serializer_t *);
886static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
887static void tl_serializer_exit(tl_endpt_t *);
888static boolean_t tl_noclose(tl_endpt_t *);
889static void tl_closeok(tl_endpt_t *);
890static void tl_refhold(tl_endpt_t *);
891static void tl_refrele(tl_endpt_t *);
892static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
893static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
894static void tl_close_ser(mblk_t *, tl_endpt_t *);
895static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
896static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
897static void tl_proto_ser(mblk_t *, tl_endpt_t *);
898static void tl_putq_ser(mblk_t *, tl_endpt_t *);
899static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
900static void tl_wput_ser(mblk_t *, tl_endpt_t *);
901static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
902static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
903static void tl_addr_unbind(tl_endpt_t *);
904
905/*
906 * Intialize option database object for TL
907 */
908
909optdb_obj_t tl_opt_obj = {
910 tl_default_opt, /* TL default value function pointer */
911 tl_get_opt, /* TL get function pointer */
912 tl_set_opt, /* TL set function pointer */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700913 TL_OPT_ARR_CNT, /* TL option database count of entries */
914 tl_opt_arr, /* TL option database */
915 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
916 tl_valid_levels_arr /* TL valid level array */
917};
918
919/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700920 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
921 * ---------------------------------------
922 */
923
924/*
925 * Loadable module routines
926 */
927int
928_init(void)
929{
930 return (mod_install(&modlinkage));
931}
932
933int
934_fini(void)
935{
936 return (mod_remove(&modlinkage));
937}
938
939int
940_info(struct modinfo *modinfop)
941{
942 return (mod_info(&modlinkage, modinfop));
943}
944
945/*
946 * Driver Entry Points and Other routines
947 */
948static int
949tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
950{
951 int i;
952 char name[32];
953
954 /*
955 * Resume from a checkpoint state.
956 */
957 if (cmd == DDI_RESUME)
958 return (DDI_SUCCESS);
959
960 if (cmd != DDI_ATTACH)
961 return (DDI_FAILURE);
962
963 /*
964 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
965 * streams message sizes can be unlimited. We use a defined constant
966 * instead.
967 */
968 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
969
970 /*
971 * Create subdevices for each transport.
972 */
973 for (i = 0; i < TL_UNUSED; i++) {
974 if (ddi_create_minor_node(devi,
nordmarkfc80c0d2007-10-11 22:57:36 -0700975 tl_transports[i].tr_name,
976 S_IFCHR, tl_transports[i].tr_minor,
977 DDI_PSEUDO, NULL) == DDI_FAILURE) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700978 ddi_remove_minor_node(devi, NULL);
979 return (DDI_FAILURE);
980 }
981 }
982
983 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
984 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
985
986 if (tl_cache == NULL) {
987 ddi_remove_minor_node(devi, NULL);
988 return (DDI_FAILURE);
989 }
990
991 tl_minors = id_space_create("tl_minor_space",
992 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
993
994 /*
995 * Create ID space for minor numbers
996 */
997 for (i = 0; i < TL_MAXTRANSPORT; i++) {
998 tl_transport_state_t *t = &tl_transports[i];
999
1000 if (i == TL_UNUSED)
1001 continue;
1002
1003 /* Socket COTSORD shares namespace with COTS */
1004 if (i == TL_SOCK_COTSORD) {
1005 t->tr_ai_hash =
1006 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007 ASSERT(t->tr_ai_hash != NULL);
1008 t->tr_addr_hash =
1009 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010 ASSERT(t->tr_addr_hash != NULL);
1011 continue;
1012 }
1013
1014 /*
1015 * Create hash tables.
1016 */
1017 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1018 t->tr_name);
1019#ifdef _ILP32
1020 if (i & TL_SOCKET)
1021 t->tr_ai_hash =
1022 mod_hash_create_idhash(name, tl_hash_size - 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001023 mod_hash_null_valdtor);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001024 else
1025 t->tr_ai_hash =
1026 mod_hash_create_ptrhash(name, tl_hash_size,
nordmarkfc80c0d2007-10-11 22:57:36 -07001027 mod_hash_null_valdtor, sizeof (queue_t));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001028#else
1029 t->tr_ai_hash =
1030 mod_hash_create_idhash(name, tl_hash_size - 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001031 mod_hash_null_valdtor);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001032#endif /* _ILP32 */
1033
1034 if (i & TL_SOCKET) {
1035 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036 t->tr_name);
1037 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038 tl_hash_size, mod_hash_null_valdtor,
1039 sizeof (uintptr_t));
1040 } else {
1041 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1042 t->tr_name);
1043 t->tr_addr_hash = mod_hash_create_extended(name,
1044 tl_hash_size, mod_hash_null_keydtor,
1045 mod_hash_null_valdtor,
1046 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047 }
1048
1049 /* Create serializer for connectionless transports. */
1050 if (i & TL_TICLTS)
1051 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052 }
1053
1054 tl_dip = devi;
1055
1056 return (DDI_SUCCESS);
1057}
1058
1059static int
1060tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061{
1062 int i;
1063
1064 if (cmd == DDI_SUSPEND)
1065 return (DDI_SUCCESS);
1066
1067 if (cmd != DDI_DETACH)
1068 return (DDI_FAILURE);
1069
1070 /*
1071 * Destroy arenas and hash tables.
1072 */
1073 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074 tl_transport_state_t *t = &tl_transports[i];
1075
1076 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077 continue;
1078
Jonathan Adams56f33202010-01-12 17:06:34 -08001079 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001080 if (t->tr_serializer != NULL) {
1081 tl_serializer_refrele(t->tr_serializer);
1082 t->tr_serializer = NULL;
1083 }
1084
1085#ifdef _ILP32
1086 if (i & TL_SOCKET)
1087 mod_hash_destroy_idhash(t->tr_ai_hash);
1088 else
1089 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090#else
1091 mod_hash_destroy_idhash(t->tr_ai_hash);
1092#endif /* _ILP32 */
1093 t->tr_ai_hash = NULL;
1094 if (i & TL_SOCKET)
1095 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096 else
1097 mod_hash_destroy_hash(t->tr_addr_hash);
1098 t->tr_addr_hash = NULL;
1099 }
1100
1101 kmem_cache_destroy(tl_cache);
1102 tl_cache = NULL;
1103 id_space_destroy(tl_minors);
1104 tl_minors = NULL;
1105 ddi_remove_minor_node(devi, NULL);
1106 return (DDI_SUCCESS);
1107}
1108
1109/* ARGSUSED */
1110static int
1111tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112{
1113
1114 int retcode = DDI_FAILURE;
1115
1116 switch (infocmd) {
1117
1118 case DDI_INFO_DEVT2DEVINFO:
1119 if (tl_dip != NULL) {
1120 *result = (void *)tl_dip;
1121 retcode = DDI_SUCCESS;
1122 }
1123 break;
1124
1125 case DDI_INFO_DEVT2INSTANCE:
1126 *result = (void *)0;
1127 retcode = DDI_SUCCESS;
1128 break;
1129
1130 default:
1131 break;
1132 }
1133 return (retcode);
1134}
1135
1136/*
1137 * Endpoint reference management.
1138 */
1139static void
1140tl_refhold(tl_endpt_t *tep)
1141{
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -04001142 atomic_inc_32(&tep->te_refcnt);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001143}
1144
1145static void
1146tl_refrele(tl_endpt_t *tep)
1147{
1148 ASSERT(tep->te_refcnt != 0);
1149
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -04001150 if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001151 tl_free(tep);
1152}
1153
1154/*ARGSUSED*/
1155static int
1156tl_constructor(void *buf, void *cdrarg, int kmflags)
1157{
1158 tl_endpt_t *tep = buf;
1159
1160 bzero(tep, sizeof (tl_endpt_t));
1161 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166
1167 return (0);
1168}
1169
1170/*ARGSUSED*/
1171static void
1172tl_destructor(void *buf, void *cdrarg)
1173{
1174 tl_endpt_t *tep = buf;
1175
1176 mutex_destroy(&tep->te_closelock);
1177 cv_destroy(&tep->te_closecv);
1178 mutex_destroy(&tep->te_srv_lock);
1179 cv_destroy(&tep->te_srv_cv);
1180 mutex_destroy(&tep->te_ser_lock);
1181}
1182
1183static void
1184tl_free(tl_endpt_t *tep)
1185{
1186 ASSERT(tep->te_refcnt == 0);
1187 ASSERT(tep->te_transport != NULL);
1188 ASSERT(tep->te_rq == NULL);
1189 ASSERT(tep->te_wq == NULL);
1190 ASSERT(tep->te_ser != NULL);
1191 ASSERT(tep->te_ser_count == 0);
1192 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1193
1194 if (IS_SOCKET(tep)) {
1195 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199 } else if (tep->te_abuf != NULL) {
1200 kmem_free(tep->te_abuf, tep->te_alen);
1201 tep->te_alen = -1; /* uninitialized */
1202 tep->te_abuf = NULL;
1203 } else {
1204 ASSERT(tep->te_alen == -1);
1205 }
1206
1207 id_free(tl_minors, tep->te_minor);
1208 ASSERT(tep->te_credp == NULL);
1209
1210 if (tep->te_hash_hndl != NULL)
1211 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212
1213 if (IS_COTS(tep)) {
1214 TL_REMOVE_PEER(tep->te_conp);
1215 TL_REMOVE_PEER(tep->te_oconp);
1216 tl_serializer_refrele(tep->te_ser);
1217 tep->te_ser = NULL;
1218 ASSERT(tep->te_nicon == 0);
1219 ASSERT(list_head(&tep->te_iconp) == NULL);
1220 } else {
1221 ASSERT(tep->te_lastep == NULL);
1222 ASSERT(list_head(&tep->te_flowlist) == NULL);
1223 ASSERT(tep->te_flowq == NULL);
1224 }
1225
1226 ASSERT(tep->te_bufcid == 0);
1227 ASSERT(tep->te_timoutid == 0);
1228 bzero(&tep->te_ap, sizeof (tep->te_ap));
1229 tep->te_acceptor_id = 0;
1230
1231 ASSERT(tep->te_closewait == 0);
1232 ASSERT(!tep->te_rsrv_active);
1233 ASSERT(!tep->te_wsrv_active);
1234 tep->te_closing = 0;
1235 tep->te_nowsrv = B_FALSE;
1236 tep->te_flag = 0;
1237
1238 kmem_cache_free(tl_cache, tep);
1239}
1240
1241/*
1242 * Allocate/free reference-counted wrappers for serializers.
1243 */
1244static tl_serializer_t *
1245tl_serializer_alloc(int flags)
1246{
1247 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248 serializer_t *ser;
1249
1250 if (s == NULL)
1251 return (NULL);
1252
1253 ser = serializer_create(flags);
1254
1255 if (ser == NULL) {
1256 kmem_free(s, sizeof (tl_serializer_t));
1257 return (NULL);
1258 }
1259
1260 s->ts_refcnt = 1;
1261 s->ts_serializer = ser;
1262 return (s);
1263}
1264
1265static void
1266tl_serializer_refhold(tl_serializer_t *s)
1267{
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -04001268 atomic_inc_32(&s->ts_refcnt);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001269}
1270
1271static void
1272tl_serializer_refrele(tl_serializer_t *s)
1273{
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -04001274 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001275 serializer_destroy(s->ts_serializer);
1276 kmem_free(s, sizeof (tl_serializer_t));
1277 }
1278}
1279
1280/*
1281 * Post a request on the endpoint serializer. For COTS transports keep track of
1282 * the number of pending requests.
1283 */
1284static void
1285tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286{
1287 if (IS_COTS(tep)) {
1288 mutex_enter(&tep->te_ser_lock);
1289 tep->te_ser_count++;
1290 mutex_exit(&tep->te_ser_lock);
1291 }
1292 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293}
1294
1295/*
1296 * Complete processing the request on the serializer. Decrement the counter for
1297 * pending requests for COTS transports.
1298 */
1299static void
1300tl_serializer_exit(tl_endpt_t *tep)
1301{
1302 if (IS_COTS(tep)) {
1303 mutex_enter(&tep->te_ser_lock);
1304 ASSERT(tep->te_ser_count != 0);
1305 tep->te_ser_count--;
1306 mutex_exit(&tep->te_ser_lock);
1307 }
1308}
1309
1310/*
1311 * Hash management functions.
1312 */
1313
1314/*
1315 * Return TRUE if two addresses are equal, false otherwise.
1316 */
1317static boolean_t
1318tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319{
1320 return ((ap1->ta_alen > 0) &&
1321 (ap1->ta_alen == ap2->ta_alen) &&
1322 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324}
1325
1326/*
1327 * This function is called whenever an endpoint is found in the hash table.
1328 */
1329/* ARGSUSED0 */
1330static void
1331tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332{
1333 tl_refhold((tl_endpt_t *)val);
1334}
1335
1336/*
1337 * Address hash function.
1338 */
1339/* ARGSUSED */
1340static uint_t
1341tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342{
1343 tl_addr_t *ap = (tl_addr_t *)key;
1344 size_t len = ap->ta_alen;
1345 uchar_t *p = ap->ta_abuf;
1346 uint_t i, g;
1347
1348 ASSERT((len > 0) && (p != NULL));
1349
1350 for (i = ap->ta_zoneid; len -- != 0; p++) {
1351 i = (i << 4) + (*p);
1352 if ((g = (i & 0xf0000000U)) != 0) {
1353 i ^= (g >> 24);
1354 i ^= g;
1355 }
1356 }
1357 return (i);
1358}
1359
1360/*
1361 * This function is used by hash lookups. It compares two generic addresses.
1362 */
1363static int
1364tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365{
1366#ifdef DEBUG
1367 tl_addr_t *ap1 = (tl_addr_t *)key1;
1368 tl_addr_t *ap2 = (tl_addr_t *)key2;
1369
1370 ASSERT(key1 != NULL);
1371 ASSERT(key2 != NULL);
1372
1373 ASSERT(ap1->ta_abuf != NULL);
1374 ASSERT(ap2->ta_abuf != NULL);
1375 ASSERT(ap1->ta_alen > 0);
1376 ASSERT(ap2->ta_alen > 0);
1377#endif
1378
1379 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380}
1381
1382/*
1383 * Prevent endpoint from closing if possible.
1384 * Return B_TRUE on success, B_FALSE on failure.
1385 */
1386static boolean_t
1387tl_noclose(tl_endpt_t *tep)
1388{
1389 boolean_t rc = B_FALSE;
1390
1391 mutex_enter(&tep->te_closelock);
1392 if (! tep->te_closing) {
1393 ASSERT(tep->te_closewait == 0);
1394 tep->te_closewait++;
1395 rc = B_TRUE;
1396 }
1397 mutex_exit(&tep->te_closelock);
1398 return (rc);
1399}
1400
1401/*
1402 * Allow endpoint to close if needed.
1403 */
1404static void
1405tl_closeok(tl_endpt_t *tep)
1406{
1407 ASSERT(tep->te_closewait > 0);
1408 mutex_enter(&tep->te_closelock);
1409 ASSERT(tep->te_closewait == 1);
1410 tep->te_closewait--;
1411 cv_signal(&tep->te_closecv);
1412 mutex_exit(&tep->te_closelock);
1413}
1414
1415/*
1416 * STREAMS open entry point.
1417 */
1418/* ARGSUSED */
1419static int
1420tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1421{
1422 tl_endpt_t *tep;
1423 minor_t minor = getminor(*devp);
1424
1425 /*
1426 * Driver is called directly. Both CLONEOPEN and MODOPEN
1427 * are illegal
1428 */
1429 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430 return (ENXIO);
1431
1432 if (rq->q_ptr != NULL)
1433 return (0);
1434
1435 /* Minor number should specify the mode used for the driver. */
1436 if ((minor >= TL_UNUSED))
1437 return (ENXIO);
1438
1439 if (oflag & SO_SOCKSTR) {
1440 minor |= TL_SOCKET;
1441 }
1442
1443 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444 tep->te_refcnt = 1;
1445 tep->te_cpid = curproc->p_pid;
1446 rq->q_ptr = WR(rq)->q_ptr = tep;
1447 tep->te_state = TS_UNBND;
1448 tep->te_credp = credp;
1449 crhold(credp);
1450 tep->te_zoneid = getzoneid();
1451
1452 tep->te_flag = minor & TL_MINOR_MASK;
1453 tep->te_transport = &tl_transports[minor];
1454
1455 /* Allocate a unique minor number for this instance. */
1456 tep->te_minor = (minor_t)id_alloc(tl_minors);
1457
1458 /* Reserve hash handle for bind(). */
1459 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460
1461 /* Transport-specific initialization */
1462 if (IS_COTS(tep)) {
1463 /* Use private serializer */
1464 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465
1466 /* Create list for pending connections */
1467 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468 offsetof(tl_icon_t, ti_node));
1469 tep->te_qlen = 0;
1470 tep->te_nicon = 0;
1471 tep->te_oconp = NULL;
1472 tep->te_conp = NULL;
1473 } else {
1474 /* Use shared serializer */
1475 tep->te_ser = tep->te_transport->tr_serializer;
1476 bzero(&tep->te_flows, sizeof (list_node_t));
1477 /* Create list for flow control */
1478 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479 offsetof(tl_endpt_t, te_flows));
1480 tep->te_flowq = NULL;
1481 tep->te_lastep = NULL;
1482
1483 }
1484
1485 /* Initialize endpoint address */
1486 if (IS_SOCKET(tep)) {
1487 /* Socket-specific address handling. */
1488 tep->te_alen = TL_SOUX_ADDRLEN;
1489 tep->te_abuf = &tep->te_uxaddr;
1490 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491 tep->te_magic = SOU_MAGIC_IMPLICIT;
1492 } else {
1493 tep->te_alen = -1;
1494 tep->te_abuf = NULL;
1495 }
1496
1497 /* clone the driver */
1498 *devp = makedevice(getmajor(*devp), tep->te_minor);
1499
1500 tep->te_rq = rq;
1501 tep->te_wq = WR(rq);
1502
1503#ifdef _ILP32
1504 if (IS_SOCKET(tep))
1505 tep->te_acceptor_id = tep->te_minor;
1506 else
1507 tep->te_acceptor_id = (t_uscalar_t)rq;
1508#else
1509 tep->te_acceptor_id = tep->te_minor;
1510#endif /* _ILP32 */
1511
1512
1513 qprocson(rq);
1514
1515 /*
1516 * Insert acceptor ID in the hash. The AI hash always sleeps on
1517 * insertion so insertion can't fail.
1518 */
1519 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521 (mod_hash_val_t)tep);
1522
1523 return (0);
1524}
1525
1526/* ARGSUSED1 */
1527static int
1528tl_close(queue_t *rq, int flag, cred_t *credp)
1529{
1530 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531 tl_endpt_t *elp = NULL;
1532 queue_t *wq = tep->te_wq;
1533 int rc;
1534
1535 ASSERT(wq == WR(rq));
1536
1537 /*
1538 * Remove the endpoint from acceptor hash.
1539 */
1540 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542 (mod_hash_val_t *)&elp);
1543 ASSERT(rc == 0 && tep == elp);
1544 if ((rc != 0) || (tep != elp)) {
1545 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001546 SL_TRACE|SL_ERROR,
1547 "tl_close:inconsistency in AI hash"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001548 }
1549
1550 /*
1551 * Wait till close is safe, then mark endpoint as closing.
1552 */
1553 mutex_enter(&tep->te_closelock);
1554 while (tep->te_closewait)
1555 cv_wait(&tep->te_closecv, &tep->te_closelock);
1556 tep->te_closing = B_TRUE;
1557 /*
1558 * Will wait for the serializer part of the close to finish, so set
1559 * te_closewait now.
1560 */
1561 tep->te_closewait = 1;
1562 tep->te_nowsrv = B_FALSE;
1563 mutex_exit(&tep->te_closelock);
1564
1565 /*
1566 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567 * It is safe because close will wait for tl_close_ser to finish.
1568 */
1569 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570
1571 /*
1572 * Wait for the first phase of close to complete before qprocsoff().
1573 */
1574 mutex_enter(&tep->te_closelock);
1575 while (tep->te_closewait)
1576 cv_wait(&tep->te_closecv, &tep->te_closelock);
1577 mutex_exit(&tep->te_closelock);
1578
1579 qprocsoff(rq);
1580
1581 if (tep->te_bufcid) {
1582 qunbufcall(rq, tep->te_bufcid);
1583 tep->te_bufcid = 0;
1584 }
1585 if (tep->te_timoutid) {
1586 (void) quntimeout(rq, tep->te_timoutid);
1587 tep->te_timoutid = 0;
1588 }
1589
1590 /*
1591 * Finish close behind serializer.
1592 *
1593 * For a CLTS endpoint increase a refcount and continue close processing
1594 * with serializer protection. This processing may happen asynchronously
1595 * with the completion of tl_close().
1596 *
1597 * Fot a COTS endpoint wait before destroying tep since the serializer
1598 * may go away together with tep and we need to destroy serializer
1599 * outside of serializer context.
1600 */
1601 ASSERT(tep->te_closewait == 0);
1602 if (IS_COTS(tep))
1603 tep->te_closewait = 1;
1604 else
1605 tl_refhold(tep);
1606
1607 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608
1609 /*
1610 * For connection-oriented transports wait for all serializer activity
1611 * to settle down.
1612 */
1613 if (IS_COTS(tep)) {
1614 mutex_enter(&tep->te_closelock);
1615 while (tep->te_closewait)
1616 cv_wait(&tep->te_closecv, &tep->te_closelock);
1617 mutex_exit(&tep->te_closelock);
1618 }
1619
1620 crfree(tep->te_credp);
1621 tep->te_credp = NULL;
1622 tep->te_wq = NULL;
1623 tl_refrele(tep);
1624 /*
1625 * tep is likely to be destroyed now, so can't reference it any more.
1626 */
1627
1628 rq->q_ptr = wq->q_ptr = NULL;
1629 return (0);
1630}
1631
1632/*
1633 * First phase of close processing done behind the serializer.
1634 *
1635 * Do not drop the reference in the end - tl_close() wants this reference to
1636 * stay.
1637 */
1638/* ARGSUSED0 */
1639static void
1640tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641{
1642 ASSERT(tep->te_closing);
1643 ASSERT(tep->te_closewait == 1);
1644 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645
1646 tep->te_flag |= TL_CLOSE_SER;
1647
1648 /*
1649 * Drain out all messages on queue except for TL_TICOTS where the
1650 * abortive release semantics permit discarding of data on close
1651 */
1652 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653 tl_wsrv_ser(NULL, tep);
1654 }
1655
1656 /* Remove address from hash table. */
1657 tl_addr_unbind(tep);
1658 /*
1659 * qprocsoff() gets confused when q->q_next is not NULL on the write
1660 * queue of the driver, so clear these before qprocsoff() is called.
1661 * Also clear q_next for the peer since this queue is going away.
1662 */
1663 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664 tl_endpt_t *peer_tep = tep->te_conp;
1665
1666 tep->te_wq->q_next = NULL;
1667 if ((peer_tep != NULL) && !peer_tep->te_closing)
1668 peer_tep->te_wq->q_next = NULL;
1669 }
1670
1671 tep->te_rq = NULL;
1672
1673 /* wake up tl_close() */
1674 tl_closeok(tep);
1675 tl_serializer_exit(tep);
1676}
1677
1678/*
1679 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680 * the reference for CLTS.
1681 *
1682 * Called from serializer. Should drop reference count for CLTS only.
1683 */
1684/* ARGSUSED0 */
1685static void
1686tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687{
1688 ASSERT(tep->te_closing);
Jonathan Adams56f33202010-01-12 17:06:34 -08001689 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001691
1692 tep->te_state = -1; /* Uninitialized */
1693 if (IS_COTS(tep)) {
1694 tl_co_unconnect(tep);
1695 } else {
1696 /* Connectionless specific cleanup */
1697 TL_REMOVE_PEER(tep->te_lastep);
1698 /*
1699 * Backenable anybody that is flow controlled waiting for
1700 * this endpoint.
1701 */
1702 tl_cl_backenable(tep);
1703 if (tep->te_flowq != NULL) {
1704 list_remove(&(tep->te_flowq->te_flowlist), tep);
1705 tep->te_flowq = NULL;
1706 }
1707 }
1708
1709 tl_serializer_exit(tep);
1710 if (IS_COTS(tep))
1711 tl_closeok(tep);
1712 else
1713 tl_refrele(tep);
1714}
1715
1716/*
1717 * STREAMS write-side put procedure.
1718 * Enter serializer for most of the processing.
1719 *
1720 * The T_CONN_REQ is processed outside of serializer.
1721 */
1722static void
1723tl_wput(queue_t *wq, mblk_t *mp)
1724{
1725 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1726 ssize_t msz = MBLKL(mp);
1727 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1728 tlproc_t *tl_proc = NULL;
1729
1730 switch (DB_TYPE(mp)) {
1731 case M_DATA:
1732 /* Only valid for connection-oriented transports */
1733 if (IS_CLTS(tep)) {
1734 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001735 SL_TRACE|SL_ERROR,
1736 "tl_wput:M_DATA invalid for ticlts driver"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001737 tl_merror(wq, mp, EPROTO);
xy158873049304f2005-07-14 02:05:20 -07001738 return;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001739 }
1740 tl_proc = tl_wput_data_ser;
1741 break;
1742
1743 case M_IOCTL:
1744 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745 case TL_IOC_CREDOPT:
1746 /* FALLTHROUGH */
1747 case TL_IOC_UCREDOPT:
1748 /*
1749 * Serialize endpoint state change.
1750 */
1751 tl_proc = tl_do_ioctl_ser;
1752 break;
1753
1754 default:
1755 miocnak(wq, mp, 0, EINVAL);
1756 return;
1757 }
1758 break;
1759
1760 case M_FLUSH:
1761 /*
1762 * do canonical M_FLUSH processing
1763 */
1764 if (*mp->b_rptr & FLUSHW) {
1765 flushq(wq, FLUSHALL);
1766 *mp->b_rptr &= ~FLUSHW;
1767 }
1768 if (*mp->b_rptr & FLUSHR) {
1769 flushq(RD(wq), FLUSHALL);
1770 qreply(wq, mp);
1771 } else {
1772 freemsg(mp);
1773 }
1774 return;
1775
1776 case M_PROTO:
1777 if (msz < sizeof (prim->type)) {
1778 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001779 SL_TRACE|SL_ERROR,
1780 "tl_wput:M_PROTO data too short"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001781 tl_merror(wq, mp, EPROTO);
1782 return;
1783 }
1784 switch (prim->type) {
1785 case T_OPTMGMT_REQ:
1786 case T_SVR4_OPTMGMT_REQ:
1787 /*
1788 * Process TPI option management requests immediately
1789 * in put procedure regardless of in-order processing
1790 * of already queued messages.
1791 * (Note: This driver supports AF_UNIX socket
1792 * implementation. Unless we implement this processing,
1793 * setsockopt() on socket endpoint will block on flow
1794 * controlled endpoints which it should not. That is
1795 * required for successful execution of VSU socket tests
1796 * and is consistent with BSD socket behavior).
1797 */
1798 tl_optmgmt(wq, mp);
1799 return;
1800 case O_T_BIND_REQ:
1801 case T_BIND_REQ:
1802 tl_proc = tl_bind_ser;
1803 break;
1804 case T_CONN_REQ:
1805 if (IS_CLTS(tep)) {
1806 tl_merror(wq, mp, EPROTO);
1807 return;
1808 }
1809 tl_conn_req(wq, mp);
1810 return;
1811 case T_DATA_REQ:
1812 case T_OPTDATA_REQ:
1813 case T_EXDATA_REQ:
1814 case T_ORDREL_REQ:
1815 tl_proc = tl_putq_ser;
1816 break;
1817 case T_UNITDATA_REQ:
1818 if (IS_COTS(tep) ||
1819 (msz < sizeof (struct T_unitdata_req))) {
1820 tl_merror(wq, mp, EPROTO);
1821 return;
1822 }
1823 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824 tl_proc = tl_unitdata_ser;
1825 } else {
1826 tl_proc = tl_putq_ser;
1827 }
1828 break;
1829 default:
1830 /*
1831 * process in service procedure if message already
1832 * queued (maintain in-order processing)
1833 */
1834 if (wq->q_first != NULL) {
1835 tl_proc = tl_putq_ser;
1836 } else {
1837 tl_proc = tl_wput_ser;
1838 }
1839 break;
1840 }
1841 break;
1842
1843 case M_PCPROTO:
1844 /*
1845 * Check that the message has enough data to figure out TPI
1846 * primitive.
1847 */
1848 if (msz < sizeof (prim->type)) {
1849 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001850 SL_TRACE|SL_ERROR,
1851 "tl_wput:M_PCROTO data too short"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001852 tl_merror(wq, mp, EPROTO);
1853 return;
1854 }
1855 switch (prim->type) {
1856 case T_CAPABILITY_REQ:
1857 tl_capability_req(mp, tep);
1858 return;
1859 case T_INFO_REQ:
1860 tl_proc = tl_info_req_ser;
1861 break;
Dan Kruchininb2d8fb82011-06-20 06:44:45 -07001862 case T_ADDR_REQ:
1863 tl_proc = tl_addr_req_ser;
1864 break;
1865
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001866 default:
1867 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07001868 SL_TRACE|SL_ERROR,
1869 "tl_wput:unknown TPI msg primitive"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001870 tl_merror(wq, mp, EPROTO);
1871 return;
1872 }
1873 break;
1874 default:
1875 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
nordmarkfc80c0d2007-10-11 22:57:36 -07001876 "tl_wput:default:unexpected Streams message"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001877 freemsg(mp);
1878 return;
1879 }
1880
1881 /*
1882 * Continue processing via serializer.
1883 */
1884 ASSERT(tl_proc != NULL);
1885 tl_refhold(tep);
1886 tl_serializer_enter(tep, tl_proc, mp);
1887}
1888
1889/*
1890 * Place message on the queue while preserving order.
1891 */
1892static void
1893tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1894{
1895 if (tep->te_closing) {
1896 tl_wput_ser(mp, tep);
1897 } else {
1898 TL_PUTQ(tep, mp);
1899 tl_serializer_exit(tep);
1900 tl_refrele(tep);
1901 }
1902
1903}
1904
1905static void
1906tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1907{
1908 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1909
1910 switch (DB_TYPE(mp)) {
1911 case M_DATA:
1912 tl_data(mp, tep);
1913 break;
1914 case M_PROTO:
1915 tl_do_proto(mp, tep);
1916 break;
1917 default:
1918 freemsg(mp);
1919 break;
1920 }
1921}
1922
1923/*
1924 * Write side put procedure called from serializer.
1925 */
1926static void
1927tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1928{
1929 tl_wput_common_ser(mp, tep);
1930 tl_serializer_exit(tep);
1931 tl_refrele(tep);
1932}
1933
1934/*
1935 * M_DATA processing. Called from serializer.
1936 */
1937static void
1938tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1939{
1940 tl_endpt_t *peer_tep = tep->te_conp;
1941 queue_t *peer_rq;
1942
1943 ASSERT(DB_TYPE(mp) == M_DATA);
1944 ASSERT(IS_COTS(tep));
1945
Jonathan Adams56f33202010-01-12 17:06:34 -08001946 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001947
1948 /*
1949 * fastpath for data. Ignore flow control if tep is closing.
1950 */
1951 if ((peer_tep != NULL) &&
1952 !peer_tep->te_closing &&
1953 ((tep->te_state == TS_DATA_XFER) ||
nordmarkfc80c0d2007-10-11 22:57:36 -07001954 (tep->te_state == TS_WREQ_ORDREL)) &&
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001955 (tep->te_wq != NULL) &&
1956 (tep->te_wq->q_first == NULL) &&
1957 ((peer_tep->te_state == TS_DATA_XFER) ||
nordmarkfc80c0d2007-10-11 22:57:36 -07001958 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001959 ((peer_rq = peer_tep->te_rq) != NULL) &&
1960 (canputnext(peer_rq) || tep->te_closing)) {
1961 putnext(peer_rq, mp);
1962 } else if (tep->te_closing) {
1963 /*
1964 * It is possible that by the time we got here tep started to
1965 * close. If the write queue is not empty, and the state is
1966 * TS_DATA_XFER the data should be delivered in order, so we
1967 * call putq() instead of freeing the data.
1968 */
1969 if ((tep->te_wq != NULL) &&
1970 ((tep->te_state == TS_DATA_XFER) ||
nordmarkfc80c0d2007-10-11 22:57:36 -07001971 (tep->te_state == TS_WREQ_ORDREL))) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001972 TL_PUTQ(tep, mp);
1973 } else {
1974 freemsg(mp);
1975 }
1976 } else {
1977 TL_PUTQ(tep, mp);
1978 }
1979
1980 tl_serializer_exit(tep);
1981 tl_refrele(tep);
1982}
1983
1984/*
1985 * Write side service routine.
1986 *
1987 * All actual processing happens within serializer which is entered
1988 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1989 * messages that need processing may have arrived, so tl_wsrv repeats until
1990 * queue is empty or te_nowsrv is set.
1991 */
1992static void
1993tl_wsrv(queue_t *wq)
1994{
1995 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1996
1997 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1998 mutex_enter(&tep->te_srv_lock);
1999 ASSERT(tep->te_wsrv_active == B_FALSE);
2000 tep->te_wsrv_active = B_TRUE;
2001 mutex_exit(&tep->te_srv_lock);
2002
2003 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2004
2005 /*
2006 * Wait for serializer job to complete.
2007 */
2008 mutex_enter(&tep->te_srv_lock);
2009 while (tep->te_wsrv_active) {
2010 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2011 }
2012 cv_signal(&tep->te_srv_cv);
2013 mutex_exit(&tep->te_srv_lock);
2014 }
2015}
2016
2017/*
2018 * Serialized write side processing of the STREAMS queue.
2019 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2020 * is NULL.
2021 */
2022static void
2023tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2024{
2025 mblk_t *mp;
2026 queue_t *wq = tep->te_wq;
2027
2028 ASSERT(wq != NULL);
2029 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2030 tl_wput_common_ser(mp, tep);
2031 }
2032
2033 /*
2034 * Wakeup service routine unless called from close.
2035 * If ser_mp is specified, the caller is tl_wsrv().
2036 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2037 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2038 * be no matching tl_serializer_exit() in this case.
2039 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2040 * waiting on te_srv_cv.
2041 */
2042 if (ser_mp != NULL) {
2043 /*
2044 * We are called from tl_wsrv.
2045 */
2046 mutex_enter(&tep->te_srv_lock);
2047 ASSERT(tep->te_wsrv_active);
2048 tep->te_wsrv_active = B_FALSE;
2049 cv_signal(&tep->te_srv_cv);
2050 mutex_exit(&tep->te_srv_lock);
2051 tl_serializer_exit(tep);
2052 }
2053}
2054
2055/*
2056 * Called when the stream is backenabled. Enter serializer and qenable everyone
2057 * flow controlled by tep.
2058 *
2059 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2060 * is possible that two instances of tl_rsrv will be running reusing the same
2061 * rsrv mblk.
2062 */
2063static void
2064tl_rsrv(queue_t *rq)
2065{
2066 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2067
2068 ASSERT(rq->q_first == NULL);
2069 ASSERT(tep->te_rsrv_active == 0);
2070
2071 tep->te_rsrv_active = B_TRUE;
2072 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2073 /*
2074 * Wait for serializer job to complete.
2075 */
2076 mutex_enter(&tep->te_srv_lock);
2077 while (tep->te_rsrv_active) {
2078 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2079 }
2080 cv_signal(&tep->te_srv_cv);
2081 mutex_exit(&tep->te_srv_lock);
2082}
2083
2084/* ARGSUSED */
2085static void
2086tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2087{
2088 tl_endpt_t *peer_tep;
2089
2090 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2091 tl_cl_backenable(tep);
2092 } else if (
nordmarkfc80c0d2007-10-11 22:57:36 -07002093 IS_COTS(tep) &&
2094 ((peer_tep = tep->te_conp) != NULL) &&
2095 !peer_tep->te_closing &&
2096 ((tep->te_state == TS_DATA_XFER) ||
2097 (tep->te_state == TS_WIND_ORDREL)||
2098 (tep->te_state == TS_WREQ_ORDREL))) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002099 TL_QENABLE(peer_tep);
2100 }
2101
2102 /*
2103 * Wakeup read side service routine.
2104 */
2105 mutex_enter(&tep->te_srv_lock);
2106 ASSERT(tep->te_rsrv_active);
2107 tep->te_rsrv_active = B_FALSE;
2108 cv_signal(&tep->te_srv_cv);
2109 mutex_exit(&tep->te_srv_lock);
2110 tl_serializer_exit(tep);
2111}
2112
2113/*
2114 * process M_PROTO messages. Always called from serializer.
2115 */
2116static void
2117tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2118{
2119 ssize_t msz = MBLKL(mp);
2120 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2121
2122 /* Message size was validated by tl_wput(). */
2123 ASSERT(msz >= sizeof (prim->type));
2124
2125 switch (prim->type) {
2126 case T_UNBIND_REQ:
2127 tl_unbind(mp, tep);
2128 break;
2129
2130 case T_ADDR_REQ:
2131 tl_addr_req(mp, tep);
2132 break;
2133
2134 case O_T_CONN_RES:
2135 case T_CONN_RES:
2136 if (IS_CLTS(tep)) {
2137 tl_merror(tep->te_wq, mp, EPROTO);
2138 break;
2139 }
2140 tl_conn_res(mp, tep);
2141 break;
2142
2143 case T_DISCON_REQ:
2144 if (IS_CLTS(tep)) {
2145 tl_merror(tep->te_wq, mp, EPROTO);
2146 break;
2147 }
2148 tl_discon_req(mp, tep);
2149 break;
2150
2151 case T_DATA_REQ:
2152 if (IS_CLTS(tep)) {
2153 tl_merror(tep->te_wq, mp, EPROTO);
2154 break;
2155 }
2156 tl_data(mp, tep);
2157 break;
2158
2159 case T_OPTDATA_REQ:
2160 if (IS_CLTS(tep)) {
2161 tl_merror(tep->te_wq, mp, EPROTO);
2162 break;
2163 }
2164 tl_data(mp, tep);
2165 break;
2166
2167 case T_EXDATA_REQ:
2168 if (IS_CLTS(tep)) {
2169 tl_merror(tep->te_wq, mp, EPROTO);
2170 break;
2171 }
2172 tl_exdata(mp, tep);
2173 break;
2174
2175 case T_ORDREL_REQ:
2176 if (! IS_COTSORD(tep)) {
2177 tl_merror(tep->te_wq, mp, EPROTO);
2178 break;
2179 }
2180 tl_ordrel(mp, tep);
2181 break;
2182
2183 case T_UNITDATA_REQ:
2184 if (IS_COTS(tep)) {
2185 tl_merror(tep->te_wq, mp, EPROTO);
2186 break;
2187 }
2188 tl_unitdata(mp, tep);
2189 break;
2190
2191 default:
2192 tl_merror(tep->te_wq, mp, EPROTO);
2193 break;
2194 }
2195}
2196
2197/*
2198 * Process ioctl from serializer.
2199 * This is a wrapper around tl_do_ioctl().
2200 */
2201static void
2202tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2203{
2204 if (! tep->te_closing)
2205 tl_do_ioctl(mp, tep);
2206 else
2207 freemsg(mp);
2208
2209 tl_serializer_exit(tep);
2210 tl_refrele(tep);
2211}
2212
2213static void
2214tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2215{
2216 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2217 int cmd = iocbp->ioc_cmd;
2218 queue_t *wq = tep->te_wq;
2219 int error;
2220 int thisopt, otheropt;
2221
2222 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2223
2224 switch (cmd) {
2225 case TL_IOC_CREDOPT:
2226 if (cmd == TL_IOC_CREDOPT) {
2227 thisopt = TL_SETCRED;
2228 otheropt = TL_SETUCRED;
2229 } else {
2230 /* FALLTHROUGH */
2231 case TL_IOC_UCREDOPT:
2232 thisopt = TL_SETUCRED;
2233 otheropt = TL_SETCRED;
2234 }
2235 /*
2236 * The credentials passing does not apply to sockets.
2237 * Only one of the cred options can be set at a given time.
2238 */
2239 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2240 miocnak(wq, mp, 0, EINVAL);
2241 return;
2242 }
2243
2244 /*
2245 * Turn on generation of credential options for
2246 * T_conn_req, T_conn_con, T_unidata_ind.
2247 */
2248 error = miocpullup(mp, sizeof (uint32_t));
2249 if (error != 0) {
2250 miocnak(wq, mp, 0, error);
2251 return;
2252 }
2253 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2254 miocnak(wq, mp, 0, EINVAL);
2255 return;
2256 }
2257
2258 if (*(uint32_t *)mp->b_cont->b_rptr)
2259 tep->te_flag |= thisopt;
2260 else
2261 tep->te_flag &= ~thisopt;
2262
2263 miocack(wq, mp, 0, 0);
2264 break;
2265
2266 default:
2267 /* Should not be here */
2268 miocnak(wq, mp, 0, EINVAL);
2269 break;
2270 }
2271}
2272
2273
2274/*
2275 * send T_ERROR_ACK
2276 * Note: assumes enough memory or caller passed big enough mp
2277 * - no recovery from allocb failures
2278 */
2279
2280static void
2281tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2282 t_scalar_t unix_err, t_scalar_t type)
2283{
2284 struct T_error_ack *err_ack;
2285 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2286 M_PCPROTO, T_ERROR_ACK);
2287
2288 if (ackmp == NULL) {
2289 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
nordmarkfc80c0d2007-10-11 22:57:36 -07002290 "tl_error_ack:out of mblk memory"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002291 tl_merror(wq, NULL, ENOSR);
2292 return;
2293 }
2294 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2295 err_ack->ERROR_prim = type;
2296 err_ack->TLI_error = tli_err;
2297 err_ack->UNIX_error = unix_err;
2298
2299 /*
2300 * send error ack message
2301 */
2302 qreply(wq, ackmp);
2303}
2304
2305
2306
2307/*
2308 * send T_OK_ACK
2309 * Note: assumes enough memory or caller passed big enough mp
2310 * - no recovery from allocb failures
2311 */
2312static void
2313tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2314{
2315 struct T_ok_ack *ok_ack;
2316 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2317 M_PCPROTO, T_OK_ACK);
2318
2319 if (ackmp == NULL) {
2320 tl_merror(wq, NULL, ENOMEM);
2321 return;
2322 }
2323
2324 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2325 ok_ack->CORRECT_prim = type;
2326
2327 (void) qreply(wq, ackmp);
2328}
2329
2330/*
2331 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2332 * This is a wrapper around tl_bind().
2333 */
2334static void
2335tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2336{
2337 if (! tep->te_closing)
2338 tl_bind(mp, tep);
2339 else
2340 freemsg(mp);
2341
2342 tl_serializer_exit(tep);
2343 tl_refrele(tep);
2344}
2345
2346/*
2347 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2348 * Assumes that the endpoint is in the unbound.
2349 */
2350static void
2351tl_bind(mblk_t *mp, tl_endpt_t *tep)
2352{
2353 queue_t *wq = tep->te_wq;
2354 struct T_bind_ack *b_ack;
2355 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2356 mblk_t *ackmp, *bamp;
2357 soux_addr_t ux_addr;
2358 t_uscalar_t qlen = 0;
2359 t_scalar_t alen, aoff;
2360 tl_addr_t addr_req;
2361 void *addr_startp;
2362 ssize_t msz = MBLKL(mp), basize;
2363 t_scalar_t tli_err = 0, unix_err = 0;
2364 t_scalar_t save_prim_type = bind->PRIM_type;
2365 t_scalar_t save_state = tep->te_state;
2366
2367 if (tep->te_state != TS_UNBND) {
2368 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002369 SL_TRACE|SL_ERROR,
2370 "tl_wput:bind_request:out of state, state=%d",
2371 tep->te_state));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002372 tli_err = TOUTSTATE;
2373 goto error;
2374 }
2375
2376 if (msz < sizeof (struct T_bind_req)) {
2377 tli_err = TSYSERR; unix_err = EINVAL;
2378 goto error;
2379 }
2380
2381 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2382
2383 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2384 (bind->PRIM_type == T_BIND_REQ));
2385
2386 alen = bind->ADDR_length;
2387 aoff = bind->ADDR_offset;
2388
2389 /* negotiate max conn req pending */
2390 if (IS_COTS(tep)) {
2391 qlen = bind->CONIND_number;
akolbd87b1fa2006-08-02 17:08:49 -07002392 if (qlen > tl_maxqlen)
2393 qlen = tl_maxqlen;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002394 }
2395
2396 /*
2397 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2398 * and bound again.
2399 */
2400 if ((tep->te_hash_hndl == NULL) &&
2401 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2402 mod_hash_reserve_nosleep(tep->te_addrhash,
nordmarkfc80c0d2007-10-11 22:57:36 -07002403 &tep->te_hash_hndl) != 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002404 tli_err = TSYSERR; unix_err = ENOSR;
2405 goto error;
2406 }
2407
2408 /*
2409 * Verify address correctness.
2410 */
2411 if (IS_SOCKET(tep)) {
2412 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2413
2414 if ((alen != TL_SOUX_ADDRLEN) ||
2415 (aoff < 0) ||
2416 (aoff + alen > msz)) {
2417 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002418 1, SL_TRACE|SL_ERROR,
2419 "tl_bind: invalid socket addr"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002420 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2421 tli_err = TSYSERR; unix_err = EINVAL;
2422 goto error;
2423 }
2424 /* Copy address from message to local buffer. */
2425 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2426 /*
2427 * Check that we got correct address from sockets
2428 */
2429 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2430 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2431 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002432 1, SL_TRACE|SL_ERROR,
2433 "tl_bind: invalid socket magic"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002434 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2435 tli_err = TSYSERR; unix_err = EINVAL;
2436 goto error;
2437 }
2438 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2439 (ux_addr.soua_vp != NULL)) {
2440 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002441 1, SL_TRACE|SL_ERROR,
2442 "tl_bind: implicit addr non-empty"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002443 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2444 tli_err = TSYSERR; unix_err = EINVAL;
2445 goto error;
2446 }
2447 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2448 (ux_addr.soua_vp == NULL)) {
2449 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002450 1, SL_TRACE|SL_ERROR,
2451 "tl_bind: explicit addr empty"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002452 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2453 tli_err = TSYSERR; unix_err = EINVAL;
2454 goto error;
2455 }
2456 } else {
2457 if ((alen > 0) && ((aoff < 0) ||
nordmarkfc80c0d2007-10-11 22:57:36 -07002458 ((ssize_t)(aoff + alen) > msz) ||
2459 ((aoff + alen) < 0))) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002460 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002461 1, SL_TRACE|SL_ERROR,
2462 "tl_bind: invalid message"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002463 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2464 tli_err = TSYSERR; unix_err = EINVAL;
2465 goto error;
2466 }
2467 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2468 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002469 1, SL_TRACE|SL_ERROR,
2470 "tl_bind: bad addr in message"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002471 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2472 tli_err = TBADADDR;
2473 goto error;
2474 }
2475#ifdef DEBUG
2476 /*
2477 * Mild form of ASSERT()ion to detect broken TPI apps.
2478 * if (! assertion)
2479 * log warning;
2480 */
2481 if (! ((alen == 0 && aoff == 0) ||
2482 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2483 (void) (STRLOG(TL_ID, tep->te_minor,
2484 3, SL_TRACE|SL_ERROR,
2485 "tl_bind: addr overlaps TPI message"));
2486 }
2487#endif
2488 }
2489
2490 /*
2491 * Bind the address provided or allocate one if requested.
2492 * Allow rebinds with a new qlen value.
2493 */
2494 if (IS_SOCKET(tep)) {
2495 /*
2496 * For anonymous requests the te_ap is already set up properly
2497 * so use minor number as an address.
2498 * For explicit requests need to check whether the address is
2499 * already in use.
2500 */
2501 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2502 int rc;
2503
2504 if (tep->te_flag & TL_ADDRHASHED) {
2505 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2506 if (tep->te_vp == ux_addr.soua_vp)
2507 goto skip_addr_bind;
2508 else /* Rebind to a new address. */
2509 tl_addr_unbind(tep);
2510 }
2511 /*
2512 * Insert address in the hash if it is not already
2513 * there. Since we use preallocated handle, the insert
2514 * can fail only if the key is already present.
2515 */
2516 rc = mod_hash_insert_reserve(tep->te_addrhash,
2517 (mod_hash_key_t)ux_addr.soua_vp,
2518 (mod_hash_val_t)tep, tep->te_hash_hndl);
2519
2520 if (rc != 0) {
2521 ASSERT(rc == MH_ERR_DUPLICATE);
2522 /*
2523 * Violate O_T_BIND_REQ semantics and fail with
2524 * TADDRBUSY - sockets will not use any address
2525 * other than supplied one for explicit binds.
2526 */
2527 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002528 SL_TRACE|SL_ERROR,
2529 "tl_bind:requested addr %p is busy",
2530 ux_addr.soua_vp));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002531 tli_err = TADDRBUSY; unix_err = 0;
2532 goto error;
2533 }
2534 tep->te_uxaddr = ux_addr;
2535 tep->te_flag |= TL_ADDRHASHED;
2536 tep->te_hash_hndl = NULL;
2537 }
2538 } else if (alen == 0) {
2539 /*
2540 * assign any free address
2541 */
2542 if (! tl_get_any_addr(tep, NULL)) {
2543 (void) (STRLOG(TL_ID, tep->te_minor,
nordmarkfc80c0d2007-10-11 22:57:36 -07002544 1, SL_TRACE|SL_ERROR,
2545 "tl_bind:failed to get buffer for any "
2546 "address"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002547 tli_err = TSYSERR; unix_err = ENOSR;
2548 goto error;
2549 }
2550 } else {
2551 addr_req.ta_alen = alen;
2552 addr_req.ta_abuf = (mp->b_rptr + aoff);
2553 addr_req.ta_zoneid = tep->te_zoneid;
2554
2555 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2556 if (tep->te_abuf == NULL) {
2557 tli_err = TSYSERR; unix_err = ENOSR;
2558 goto error;
2559 }
2560 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2561 tep->te_alen = alen;
2562
2563 if (mod_hash_insert_reserve(tep->te_addrhash,
nordmarkfc80c0d2007-10-11 22:57:36 -07002564 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2565 tep->te_hash_hndl) != 0) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002566 if (save_prim_type == T_BIND_REQ) {
2567 /*
2568 * The bind semantics for this primitive
2569 * require a failure if the exact address
2570 * requested is busy
2571 */
2572 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002573 SL_TRACE|SL_ERROR,
2574 "tl_bind:requested addr is busy"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002575 tli_err = TADDRBUSY; unix_err = 0;
2576 goto error;
2577 }
2578
2579 /*
2580 * O_T_BIND_REQ semantics say if address if requested
2581 * address is busy, bind to any available free address
2582 */
2583 if (! tl_get_any_addr(tep, &addr_req)) {
2584 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002585 SL_TRACE|SL_ERROR,
2586 "tl_bind:unable to get any addr buf"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002587 tli_err = TSYSERR; unix_err = ENOMEM;
2588 goto error;
2589 }
2590 } else {
2591 tep->te_flag |= TL_ADDRHASHED;
2592 tep->te_hash_hndl = NULL;
2593 }
2594 }
2595
2596 ASSERT(tep->te_alen >= 0);
2597
2598skip_addr_bind:
2599 /*
2600 * prepare T_BIND_ACK TPI message
2601 */
2602 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2603 bamp = reallocb(mp, basize, 0);
2604 if (bamp == NULL) {
2605 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
nordmarkfc80c0d2007-10-11 22:57:36 -07002606 "tl_wput:tl_bind: allocb failed"));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002607 /*
2608 * roll back state changes
2609 */
2610 tl_addr_unbind(tep);
2611 tep->te_state = TS_UNBND;
2612 tl_memrecover(wq, mp, basize);
2613 return;
2614 }
2615
2616 DB_TYPE(bamp) = M_PCPROTO;
2617 bamp->b_wptr = bamp->b_rptr + basize;
2618 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2619 b_ack->PRIM_type = T_BIND_ACK;
2620 b_ack->CONIND_number = qlen;
2621 b_ack->ADDR_length = tep->te_alen;
2622 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2623 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2624 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2625
2626 if (IS_COTS(tep)) {
2627 tep->te_qlen = qlen;
2628 if (qlen > 0)
2629 tep->te_flag |= TL_LISTENER;
2630 }
2631
2632 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2633 /*
2634 * send T_BIND_ACK message
2635 */
2636 (void) qreply(wq, bamp);
2637 return;
2638
2639error:
2640 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2641 if (ackmp == NULL) {
2642 /*
2643 * roll back state changes
2644 */
2645 tep->te_state = save_state;
2646 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2647 return;
2648 }
2649 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2650 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2651}
2652
2653/*
2654 * Process T_UNBIND_REQ.
2655 * Called from serializer.
2656 */
2657static void
2658tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2659{
2660 queue_t *wq;
2661 mblk_t *ackmp;
2662
2663 if (tep->te_closing) {
2664 freemsg(mp);
2665 return;
2666 }
2667
2668 wq = tep->te_wq;
2669
2670 /*
2671 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2672 * ==> allocate for T_ERROR_ACK (known max)
2673 */
2674 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2675 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2676 return;
2677 }
2678 /*
2679 * memory resources committed
2680 * Note: no message validation. T_UNBIND_REQ message is
2681 * same size as PRIM_type field so already verified earlier.
2682 */
2683
2684 /*
2685 * validate state
2686 */
2687 if (tep->te_state != TS_IDLE) {
2688 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002689 SL_TRACE|SL_ERROR,
2690 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2691 tep->te_state));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002692 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2693 return;
2694 }
2695 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2696
2697 /*
2698 * TPI says on T_UNBIND_REQ:
2699 * send up a M_FLUSH to flush both
2700 * read and write queues
2701 */
2702 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2703
2704 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2705 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2706
2707 /*
2708 * Sockets use bind with qlen==0 followed by bind() to
2709 * the same address with qlen > 0 for listeners.
2710 * We allow rebind with a new qlen value.
2711 */
2712 tl_addr_unbind(tep);
2713 }
2714
2715 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2716 /*
2717 * send T_OK_ACK
2718 */
2719 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2720}
2721
2722
2723/*
2724 * Option management code from drv/ip is used here
2725 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2726 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2727 * However, that is what we want as that option is 'unorthodox'
2728 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2729 * and not in T_SVR4_OPTMGMT_REQ/ACK
2730 * Note2: use of optcom_req means this routine is an exception to
2731 * recovery from allocb() failures.
2732 */
2733
2734static void
2735tl_optmgmt(queue_t *wq, mblk_t *mp)
2736{
2737 tl_endpt_t *tep;
2738 mblk_t *ackmp;
2739 union T_primitives *prim;
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08002740 cred_t *cr;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002741
2742 tep = (tl_endpt_t *)wq->q_ptr;
2743 prim = (union T_primitives *)mp->b_rptr;
2744
Erik Nordmarkde8c4a12009-02-12 08:42:06 -08002745 /*
2746 * All Solaris components should pass a db_credp
2747 * for this TPI message, hence we ASSERT.
2748 * But in case there is some other M_PROTO that looks
2749 * like a TPI message sent by some other kernel
2750 * component, we check and return an error.
2751 */
2752 cr = msg_getcred(mp, NULL);
2753 ASSERT(cr != NULL);
2754 if (cr == NULL) {
2755 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2756 return;
2757 }
2758
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002759 /* all states OK for AF_UNIX options ? */
2760 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2761 prim->type == T_SVR4_OPTMGMT_REQ) {
2762 /*
2763 * Broken TLI semantics that options can only be managed
2764 * in TS_IDLE state. Needed for Sparc ABI test suite that
2765 * tests this TLI (mis)feature using this device driver.
2766 */
2767 (void) (STRLOG(TL_ID, tep->te_minor, 1,
nordmarkfc80c0d2007-10-11 22:57:36 -07002768 SL_TRACE|SL_ERROR,
2769 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2770 tep->te_state));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002771 /*
2772 * preallocate memory for T_ERROR_ACK
2773 */
2774 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2775 if (! ackmp) {
2776 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2777