blob: 419432ac7db7271d1f4726e2a4ac486c23766022 [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
yz1470640ef0bcf2006-03-01 20:41:12 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
Sowmini Varadhan550b6e42010-07-01 17:10:52 -040022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
Luqman Aden86e5bb72023-01-25 16:12:10 -080023 * Copyright 2023 Oxide Computer Company
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070024 */
25
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070026/*
27 * Data-Link Driver
28 */
29
Eric Chengda14ceb2008-12-04 18:16:10 -080030#include <inet/common.h>
31#include <sys/strsubr.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070032#include <sys/stropts.h>
33#include <sys/strsun.h>
ericheng210db222005-08-01 12:58:24 -070034#include <sys/vlan.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070035#include <sys/dld_impl.h>
Eric Chengda14ceb2008-12-04 18:16:10 -080036#include <sys/cpuvar.h>
37#include <sys/callb.h>
38#include <sys/list.h>
39#include <sys/mac_client.h>
40#include <sys/mac_client_priv.h>
Sowmini Varadhan550b6e42010-07-01 17:10:52 -040041#include <sys/mac_flow.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070042
43static int str_constructor(void *, void *, int);
44static void str_destructor(void *, void *);
dg199075605445d2006-09-19 11:16:27 -070045static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070046static void str_notify_promisc_on_phys(dld_str_t *);
47static void str_notify_promisc_off_phys(dld_str_t *);
Sebastien Roy2b24ab62009-09-22 22:04:45 -040048static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070049static void str_notify_link_up(dld_str_t *);
50static void str_notify_link_down(dld_str_t *);
51static void str_notify_capab_reneg(dld_str_t *);
52static void str_notify_speed(dld_str_t *, uint32_t);
ericheng210db222005-08-01 12:58:24 -070053
xc1513550ba2cbe2006-11-20 22:51:46 -080054static void ioc_native(dld_str_t *, mblk_t *);
yz147064d62bc4b2008-01-23 18:09:15 -080055static void ioc_margin(dld_str_t *, mblk_t *);
ericheng210db222005-08-01 12:58:24 -070056static void ioc_raw(dld_str_t *, mblk_t *);
57static void ioc_fast(dld_str_t *, mblk_t *);
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -040058static void ioc_lowlink(dld_str_t *, mblk_t *);
ericheng210db222005-08-01 12:58:24 -070059static void ioc(dld_str_t *, mblk_t *);
Eric Chengda14ceb2008-12-04 18:16:10 -080060static void dld_ioc(dld_str_t *, mblk_t *);
yz147064d62bc4b2008-01-23 18:09:15 -080061static void dld_wput_nondata(dld_str_t *, mblk_t *);
Eric Chengda14ceb2008-12-04 18:16:10 -080062
63static void str_mdata_raw_put(dld_str_t *, mblk_t *);
Sebastien Roye75f0912009-02-20 21:38:08 -050064static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t,
65 link_tagmode_t);
Nicolas Droux72782352009-05-26 15:03:51 -070066static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070067
68static uint32_t str_count;
69static kmem_cache_t *str_cachep;
erichengc0192a52006-04-13 11:40:49 -070070static mod_hash_t *str_hashp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070071
erichengc0192a52006-04-13 11:40:49 -070072#define STR_HASHSZ 64
73#define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
74
Eric Chengda14ceb2008-12-04 18:16:10 -080075#define dld_taskq system_taskq
76
77static kmutex_t dld_taskq_lock;
78static kcondvar_t dld_taskq_cv;
79static list_t dld_taskq_list; /* List of dld_str_t */
80boolean_t dld_taskq_quit;
81boolean_t dld_taskq_done;
82
83static void dld_taskq_dispatch(void);
yz147064d62bc4b2008-01-23 18:09:15 -080084
85/*
Eric Chengda14ceb2008-12-04 18:16:10 -080086 * Some notes on entry points, flow-control, queueing.
ericheng210db222005-08-01 12:58:24 -070087 *
88 * This driver exports the traditional STREAMS put entry point as well as
89 * the non-STREAMS fast-path transmit routine which is provided to IP via
90 * the DL_CAPAB_POLL negotiation. The put procedure handles all control
91 * and data operations, while the fast-path routine deals only with M_DATA
92 * fast-path packets. Regardless of the entry point, all outbound packets
Eric Chengda14ceb2008-12-04 18:16:10 -080093 * will end up in DLD_TX(), where they will be delivered to the MAC layer.
ericheng210db222005-08-01 12:58:24 -070094 *
Eric Chengda14ceb2008-12-04 18:16:10 -080095 * The transmit logic operates in the following way: All packets coming
96 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control
97 * happens when the MAC layer indicates the packets couldn't be
98 * transmitted due to 1) lack of resources (e.g. running out of
99 * descriptors), or 2) reaching the allowed bandwidth limit for this
100 * particular flow. The indication comes in the form of a Tx cookie that
101 * identifies the blocked ring. In such case, DLD will place a
102 * dummy message on its write-side STREAMS queue so that the queue is
103 * marked as "full". Any subsequent packets arriving at the driver will
104 * still be sent to the MAC layer where it either gets queued in the Tx
105 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS
106 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX.
107 * When the write service procedure runs, it will remove the dummy
108 * message from the write-side STREAMS queue; in effect this will trigger
109 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0,
110 * respectively, due to the above reasons.
ericheng210db222005-08-01 12:58:24 -0700111 *
Eric Chengda14ceb2008-12-04 18:16:10 -0800112 * All non-data operations, both DLPI and ioctls are single threaded on a per
113 * dld_str_t endpoint. This is done using a taskq so that the control operation
114 * has kernel context and can cv_wait for resources. In addition all set type
115 * operations that involve mac level state modification are serialized on a
116 * per mac end point using the perimeter mechanism provided by the mac layer.
117 * This serializes all mac clients trying to modify a single mac end point over
118 * the entire sequence of mac calls made by that client as an atomic unit. The
119 * mac framework locking is described in mac.c. A critical element is that
120 * DLD/DLS does not hold any locks across the mac perimeter.
ericheng210db222005-08-01 12:58:24 -0700121 *
erichengc0192a52006-04-13 11:40:49 -0700122 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular
123 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that
124 * match dev_t. If a stream is found and it is attached, its dev_info_t *
Eric Chengda14ceb2008-12-04 18:16:10 -0800125 * is returned. If the mac handle is non-null, it can be safely accessed
126 * below. The mac handle won't be freed until the mac_unregister which
127 * won't happen until the driver detaches. The DDI framework ensures that
128 * the detach won't happen while a getinfo is in progress.
erichengc0192a52006-04-13 11:40:49 -0700129 */
130typedef struct i_dld_str_state_s {
131 major_t ds_major;
132 minor_t ds_minor;
Garrett D'Amore61af1952009-09-25 19:43:05 -0700133 int ds_instance;
erichengc0192a52006-04-13 11:40:49 -0700134 dev_info_t *ds_dip;
135} i_dld_str_state_t;
136
137/* ARGSUSED */
138static uint_t
139i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
140{
141 i_dld_str_state_t *statep = arg;
142 dld_str_t *dsp = (dld_str_t *)val;
Eric Chengda14ceb2008-12-04 18:16:10 -0800143 mac_handle_t mh;
erichengc0192a52006-04-13 11:40:49 -0700144
145 if (statep->ds_major != dsp->ds_major)
146 return (MH_WALK_CONTINUE);
147
148 ASSERT(statep->ds_minor != 0);
Eric Chengda14ceb2008-12-04 18:16:10 -0800149 mh = dsp->ds_mh;
erichengc0192a52006-04-13 11:40:49 -0700150
yz147064d62bc4b2008-01-23 18:09:15 -0800151 if (statep->ds_minor == dsp->ds_minor) {
erichengc0192a52006-04-13 11:40:49 -0700152 /*
153 * Clone: a clone minor is unique. we can terminate the
154 * walk if we find a matching stream -- even if we fail
155 * to obtain the devinfo.
156 */
Garrett D'Amore61af1952009-09-25 19:43:05 -0700157 if (mh != NULL) {
Eric Chengda14ceb2008-12-04 18:16:10 -0800158 statep->ds_dip = mac_devinfo_get(mh);
Garrett D'Amore3ade6e82009-10-07 07:12:17 -0700159 statep->ds_instance = DLS_MINOR2INST(mac_minor(mh));
Garrett D'Amore61af1952009-09-25 19:43:05 -0700160 }
yz147064d62bc4b2008-01-23 18:09:15 -0800161 return (MH_WALK_TERMINATE);
erichengc0192a52006-04-13 11:40:49 -0700162 }
erichengc0192a52006-04-13 11:40:49 -0700163 return (MH_WALK_CONTINUE);
164}
165
ericheng210db222005-08-01 12:58:24 -0700166static dev_info_t *
167dld_finddevinfo(dev_t dev)
168{
Eric Chengda14ceb2008-12-04 18:16:10 -0800169 dev_info_t *dip;
erichengc0192a52006-04-13 11:40:49 -0700170 i_dld_str_state_t state;
ericheng210db222005-08-01 12:58:24 -0700171
yz147064d62bc4b2008-01-23 18:09:15 -0800172 if (getminor(dev) == 0)
173 return (NULL);
174
175 /*
176 * See if it's a minor node of a link
177 */
Eric Chengda14ceb2008-12-04 18:16:10 -0800178 if ((dip = dls_link_devinfo(dev)) != NULL)
yz147064d62bc4b2008-01-23 18:09:15 -0800179 return (dip);
180
erichengc0192a52006-04-13 11:40:49 -0700181 state.ds_minor = getminor(dev);
182 state.ds_major = getmajor(dev);
183 state.ds_dip = NULL;
Garrett D'Amore61af1952009-09-25 19:43:05 -0700184 state.ds_instance = -1;
erichengc0192a52006-04-13 11:40:49 -0700185
erichengc0192a52006-04-13 11:40:49 -0700186 mod_hash_walk(str_hashp, i_dld_str_walker, &state);
yz147064d62bc4b2008-01-23 18:09:15 -0800187 return (state.ds_dip);
dh155122f4b3ec62007-01-19 16:59:38 -0800188}
erichengc0192a52006-04-13 11:40:49 -0700189
Garrett D'Amore61af1952009-09-25 19:43:05 -0700190int
191dld_devt_to_instance(dev_t dev)
192{
193 minor_t minor;
194 i_dld_str_state_t state;
195
196 /*
197 * GLDv3 numbers DLPI style 1 node as the instance number + 1.
198 * Minor number 0 is reserved for the DLPI style 2 unattached
199 * node.
200 */
201
202 if ((minor = getminor(dev)) == 0)
203 return (-1);
204
205 /*
Garrett D'Amore3ade6e82009-10-07 07:12:17 -0700206 * Check for unopened style 1 node.
207 * Note that this doesn't *necessarily* work for legacy
Garrett D'Amore61af1952009-09-25 19:43:05 -0700208 * devices, but this code is only called within the
209 * getinfo(9e) implementation for true GLDv3 devices, so it
210 * doesn't matter.
211 */
212 if (minor > 0 && minor <= DLS_MAX_MINOR) {
213 return (DLS_MINOR2INST(minor));
214 }
215
216 state.ds_minor = getminor(dev);
217 state.ds_major = getmajor(dev);
218 state.ds_dip = NULL;
219 state.ds_instance = -1;
220
221 mod_hash_walk(str_hashp, i_dld_str_walker, &state);
222 return (state.ds_instance);
223}
224
ericheng210db222005-08-01 12:58:24 -0700225/*
226 * devo_getinfo: getinfo(9e)
Garrett D'Amore61af1952009-09-25 19:43:05 -0700227 *
228 * NB: This may be called for a provider before the provider's
229 * instances are attached. Hence, if a particular provider needs a
230 * special mapping (the mac instance != ddi_get_instance()), then it
Luqman Aden86e5bb72023-01-25 16:12:10 -0800231 * may need to provide its own implementation using the
Garrett D'Amore3ade6e82009-10-07 07:12:17 -0700232 * mac_devt_to_instance() function, and translating the returned mac
Garrett D'Amore61af1952009-09-25 19:43:05 -0700233 * instance to a devinfo instance. For dev_t's where the minor number
234 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this
235 * function indirectly via the mac_getinfo() function.
ericheng210db222005-08-01 12:58:24 -0700236 */
237/*ARGSUSED*/
238int
239dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
240{
241 dev_info_t *devinfo;
242 minor_t minor = getminor((dev_t)arg);
243 int rc = DDI_FAILURE;
244
245 switch (cmd) {
246 case DDI_INFO_DEVT2DEVINFO:
247 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) {
248 *(dev_info_t **)resp = devinfo;
249 rc = DDI_SUCCESS;
250 }
251 break;
252 case DDI_INFO_DEVT2INSTANCE:
yz147064d62bc4b2008-01-23 18:09:15 -0800253 if (minor > 0 && minor <= DLS_MAX_MINOR) {
erichengc0192a52006-04-13 11:40:49 -0700254 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor);
255 rc = DDI_SUCCESS;
yz147064d62bc4b2008-01-23 18:09:15 -0800256 } else if (minor > DLS_MAX_MINOR &&
erichengc0192a52006-04-13 11:40:49 -0700257 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) {
258 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo);
ericheng210db222005-08-01 12:58:24 -0700259 rc = DDI_SUCCESS;
260 }
261 break;
262 }
263 return (rc);
264}
265
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700266void *
267dld_str_private(queue_t *q)
268{
269 return (((dld_str_t *)(q->q_ptr))->ds_private);
270}
271
ericheng210db222005-08-01 12:58:24 -0700272int
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700273dld_str_open(queue_t *rq, dev_t *devp, void *private)
ericheng210db222005-08-01 12:58:24 -0700274{
275 dld_str_t *dsp;
276 major_t major;
277 minor_t minor;
278 int err;
279
ericheng210db222005-08-01 12:58:24 -0700280 major = getmajor(*devp);
281 minor = getminor(*devp);
ericheng210db222005-08-01 12:58:24 -0700282
283 /*
Luqman Aden86e5bb72023-01-25 16:12:10 -0800284 * Half the 32-bit minor space is reserved for private use by the driver
285 * so we bail out here with `ENOSTR` to indicate specfs should retry the
286 * open with the driver's character based `open(9E)`. For a typical
287 * STREAMS driver, that would just be `nodev` which would simply return
288 * `ENODEV`. But a dual-personality device can choose to implement the
289 * character based `open(9E)` for some minor nodes. A driver wanting a
290 * separate STREAMS interface altogether would presumably have already
291 * provided its own `streamtab`.
292 */
293 if (minor >= mac_private_minor())
294 return (ENOSTR);
295
296 /*
ericheng210db222005-08-01 12:58:24 -0700297 * Create a new dld_str_t for the stream. This will grab a new minor
298 * number that will be handed back in the cloned dev_t. Creation may
299 * fail if we can't allocate the dummy mblk used for flow-control.
300 */
301 dsp = dld_str_create(rq, DLD_DLPI, major,
302 ((minor == 0) ? DL_STYLE2 : DL_STYLE1));
303 if (dsp == NULL)
304 return (ENOSR);
305
306 ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700307 dsp->ds_private = private;
ericheng210db222005-08-01 12:58:24 -0700308 if (minor != 0) {
309 /*
310 * Style 1 open
311 */
yz147064d62bc4b2008-01-23 18:09:15 -0800312 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0)
ericheng210db222005-08-01 12:58:24 -0700313 goto failed;
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700314
ericheng210db222005-08-01 12:58:24 -0700315 ASSERT(dsp->ds_dlstate == DL_UNBOUND);
erichengcd930902005-08-29 18:49:03 -0700316 } else {
317 (void) qassociate(rq, -1);
ericheng210db222005-08-01 12:58:24 -0700318 }
319
320 /*
321 * Enable the queue srv(9e) routine.
322 */
323 qprocson(rq);
324
325 /*
326 * Construct a cloned dev_t to hand back.
327 */
328 *devp = makedevice(getmajor(*devp), dsp->ds_minor);
329 return (0);
330
331failed:
332 dld_str_destroy(dsp);
333 return (err);
334}
335
ericheng210db222005-08-01 12:58:24 -0700336int
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700337dld_str_close(queue_t *rq)
ericheng210db222005-08-01 12:58:24 -0700338{
339 dld_str_t *dsp = rq->q_ptr;
340
ericheng6a0b2ba2006-01-30 18:57:13 -0800341 /*
Eric Chengda14ceb2008-12-04 18:16:10 -0800342 * All modules on top have been popped off. So there can't be any
343 * threads from the top.
344 */
345 ASSERT(dsp->ds_datathr_cnt == 0);
346
347 /*
348 * Wait until pending DLPI requests are processed.
349 */
350 mutex_enter(&dsp->ds_lock);
351 while (dsp->ds_dlpi_pending)
352 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock);
353 mutex_exit(&dsp->ds_lock);
354
ericheng210db222005-08-01 12:58:24 -0700355
356 /*
357 * This stream was open to a provider node. Check to see
358 * if it has been cleanly shut down.
359 */
360 if (dsp->ds_dlstate != DL_UNATTACHED) {
361 /*
362 * The stream is either open to a style 1 provider or
363 * this is not clean shutdown. Detach from the PPA.
364 * (This is still ok even in the style 1 case).
365 */
366 dld_str_detach(dsp);
367 }
368
369 dld_str_destroy(dsp);
370 return (0);
371}
372
373/*
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700374 * qi_qopen: open(9e)
375 */
376/*ARGSUSED*/
377int
378dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
379{
380 if (sflag == MODOPEN)
381 return (ENOTSUP);
382
383 /*
384 * This is a cloning driver and therefore each queue should only
385 * ever get opened once.
386 */
387 if (rq->q_ptr != NULL)
388 return (EBUSY);
389
390 return (dld_str_open(rq, devp, NULL));
391}
392
393/*
394 * qi_qclose: close(9e)
395 */
Toomas Soome5e1743f2018-10-15 22:13:49 +0300396/* ARGSUSED */
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700397int
Toomas Soome5e1743f2018-10-15 22:13:49 +0300398dld_close(queue_t *rq, int flags __unused, cred_t *credp __unused)
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700399{
400 /*
401 * Disable the queue srv(9e) routine.
402 */
403 qprocsoff(rq);
404
405 return (dld_str_close(rq));
406}
407
408/*
ericheng210db222005-08-01 12:58:24 -0700409 * qi_qputp: put(9e)
410 */
Toomas Soomef1ccfd82018-10-19 23:35:32 +0300411int
ericheng210db222005-08-01 12:58:24 -0700412dld_wput(queue_t *wq, mblk_t *mp)
413{
Eric Chengda14ceb2008-12-04 18:16:10 -0800414 dld_str_t *dsp = (dld_str_t *)wq->q_ptr;
415 dld_str_mode_t mode;
ericheng210db222005-08-01 12:58:24 -0700416
417 switch (DB_TYPE(mp)) {
Eric Chengda14ceb2008-12-04 18:16:10 -0800418 case M_DATA:
419 mutex_enter(&dsp->ds_lock);
Cathy Zhoub53ab682009-02-25 13:11:34 -0800420 mode = dsp->ds_mode;
421 if ((dsp->ds_dlstate != DL_IDLE) ||
422 (mode != DLD_FASTPATH && mode != DLD_RAW)) {
423 mutex_exit(&dsp->ds_lock);
424 freemsg(mp);
425 break;
Eric Chengda14ceb2008-12-04 18:16:10 -0800426 }
Eric Chengda14ceb2008-12-04 18:16:10 -0800427
Cathy Zhoub53ab682009-02-25 13:11:34 -0800428 DLD_DATATHR_INC(dsp);
429 mutex_exit(&dsp->ds_lock);
430 if (mode == DLD_FASTPATH) {
431 if (dsp->ds_mip->mi_media == DL_ETHER &&
432 (MBLKL(mp) < sizeof (struct ether_header))) {
433 freemsg(mp);
434 } else {
435 (void) str_mdata_fastpath_put(dsp, mp, 0, 0);
436 }
437 } else {
438 str_mdata_raw_put(dsp, mp);
439 }
440 DLD_DATATHR_DCR(dsp);
441 break;
ericheng210db222005-08-01 12:58:24 -0700442 case M_PROTO:
yz147064d62bc4b2008-01-23 18:09:15 -0800443 case M_PCPROTO: {
444 t_uscalar_t prim;
yz147064d62bc4b2008-01-23 18:09:15 -0800445
Eric Chengda14ceb2008-12-04 18:16:10 -0800446 if (MBLKL(mp) < sizeof (t_uscalar_t))
447 break;
yz147064d62bc4b2008-01-23 18:09:15 -0800448
449 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
yz147064d62bc4b2008-01-23 18:09:15 -0800450
Eric Chengda14ceb2008-12-04 18:16:10 -0800451 if (prim == DL_UNITDATA_REQ) {
452 proto_unitdata_req(dsp, mp);
453 } else {
454 dld_wput_nondata(dsp, mp);
455 }
ericheng210db222005-08-01 12:58:24 -0700456 break;
yz147064d62bc4b2008-01-23 18:09:15 -0800457 }
Eric Chengda14ceb2008-12-04 18:16:10 -0800458
ericheng210db222005-08-01 12:58:24 -0700459 case M_IOCTL:
yz147064d62bc4b2008-01-23 18:09:15 -0800460 dld_wput_nondata(dsp, mp);
ericheng210db222005-08-01 12:58:24 -0700461 break;
Eric Chengda14ceb2008-12-04 18:16:10 -0800462
ericheng210db222005-08-01 12:58:24 -0700463 case M_FLUSH:
464 if (*mp->b_rptr & FLUSHW) {
Eric Chengda14ceb2008-12-04 18:16:10 -0800465 DLD_CLRQFULL(dsp);
ericheng210db222005-08-01 12:58:24 -0700466 *mp->b_rptr &= ~FLUSHW;
467 }
468
469 if (*mp->b_rptr & FLUSHR) {
470 qreply(wq, mp);
471 } else {
472 freemsg(mp);
473 }
474 break;
Eric Chengda14ceb2008-12-04 18:16:10 -0800475
ericheng210db222005-08-01 12:58:24 -0700476 default:
477 freemsg(mp);
478 break;
479 }
Toomas Soomef1ccfd82018-10-19 23:35:32 +0300480 return (0);
yz147064d62bc4b2008-01-23 18:09:15 -0800481}
ericheng210db222005-08-01 12:58:24 -0700482
yz147064d62bc4b2008-01-23 18:09:15 -0800483/*
ericheng210db222005-08-01 12:58:24 -0700484 * qi_srvp: srv(9e)
485 */
Toomas Soomef1ccfd82018-10-19 23:35:32 +0300486int
ericheng210db222005-08-01 12:58:24 -0700487dld_wsrv(queue_t *wq)
488{
ericheng210db222005-08-01 12:58:24 -0700489 dld_str_t *dsp = wq->q_ptr;
490
Eric Chengda14ceb2008-12-04 18:16:10 -0800491 DLD_CLRQFULL(dsp);
Toomas Soomef1ccfd82018-10-19 23:35:32 +0300492 return (0);
ericheng210db222005-08-01 12:58:24 -0700493}
494
495void
496dld_init_ops(struct dev_ops *ops, const char *name)
497{
498 struct streamtab *stream;
499 struct qinit *rq, *wq;
500 struct module_info *modinfo;
501
502 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP);
503 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP);
504 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name);
505 modinfo->mi_minpsz = 0;
506 modinfo->mi_maxpsz = 64*1024;
507 modinfo->mi_hiwat = 1;
508 modinfo->mi_lowat = 0;
509
510 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP);
511 rq->qi_qopen = dld_open;
512 rq->qi_qclose = dld_close;
513 rq->qi_minfo = modinfo;
514
515 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP);
516 wq->qi_putp = (pfi_t)dld_wput;
517 wq->qi_srvp = (pfi_t)dld_wsrv;
518 wq->qi_minfo = modinfo;
519
520 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP);
521 stream->st_rdinit = rq;
522 stream->st_wrinit = wq;
523 ops->devo_cb_ops->cb_str = stream;
524
Sebastien Royeae72b52008-08-26 19:16:34 -0400525 if (ops->devo_getinfo == NULL)
526 ops->devo_getinfo = &dld_getinfo;
ericheng210db222005-08-01 12:58:24 -0700527}
528
529void
530dld_fini_ops(struct dev_ops *ops)
531{
532 struct streamtab *stream;
533 struct qinit *rq, *wq;
534 struct module_info *modinfo;
535
536 stream = ops->devo_cb_ops->cb_str;
537 rq = stream->st_rdinit;
538 wq = stream->st_wrinit;
539 modinfo = rq->qi_minfo;
540 ASSERT(wq->qi_minfo == modinfo);
541
542 kmem_free(stream, sizeof (struct streamtab));
543 kmem_free(wq, sizeof (struct qinit));
544 kmem_free(rq, sizeof (struct qinit));
545 kmem_free(modinfo->mi_idname, FMNAMESZ);
546 kmem_free(modinfo, sizeof (struct module_info));
547}
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700548
549/*
550 * Initialize this module's data structures.
551 */
552void
553dld_str_init(void)
554{
555 /*
556 * Create dld_str_t object cache.
557 */
558 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t),
559 0, str_constructor, str_destructor, NULL, NULL, NULL, 0);
560 ASSERT(str_cachep != NULL);
ericheng210db222005-08-01 12:58:24 -0700561
562 /*
erichengc0192a52006-04-13 11:40:49 -0700563 * Create a hash table for maintaining dld_str_t's.
564 * The ds_minor field (the clone minor number) of a dld_str_t
565 * is used as a key for this hash table because this number is
dh155122f4b3ec62007-01-19 16:59:38 -0800566 * globally unique (allocated from "dls_minor_arena").
erichengc0192a52006-04-13 11:40:49 -0700567 */
568 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ,
569 mod_hash_null_valdtor);
Eric Chengda14ceb2008-12-04 18:16:10 -0800570
571 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL);
572 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL);
573
574 dld_taskq_quit = B_FALSE;
575 dld_taskq_done = B_FALSE;
576 list_create(&dld_taskq_list, sizeof (dld_str_t),
577 offsetof(dld_str_t, ds_tqlist));
578 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0,
579 &p0, TS_RUN, minclsyspri);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700580}
581
582/*
583 * Tear down this module's data structures.
584 */
585int
586dld_str_fini(void)
587{
588 /*
589 * Make sure that there are no objects in use.
590 */
591 if (str_count != 0)
592 return (EBUSY);
593
Eric Chengda14ceb2008-12-04 18:16:10 -0800594 /*
595 * Ask the dld_taskq thread to quit and wait for it to be done
596 */
597 mutex_enter(&dld_taskq_lock);
598 dld_taskq_quit = B_TRUE;
599 cv_signal(&dld_taskq_cv);
600 while (!dld_taskq_done)
601 cv_wait(&dld_taskq_cv, &dld_taskq_lock);
602 mutex_exit(&dld_taskq_lock);
603 list_destroy(&dld_taskq_list);
ericheng210db222005-08-01 12:58:24 -0700604 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700605 * Destroy object cache.
606 */
607 kmem_cache_destroy(str_cachep);
erichengc0192a52006-04-13 11:40:49 -0700608 mod_hash_destroy_idhash(str_hashp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700609 return (0);
610}
611
612/*
613 * Create a new dld_str_t object.
614 */
615dld_str_t *
ericheng210db222005-08-01 12:58:24 -0700616dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700617{
618 dld_str_t *dsp;
erichengc0192a52006-04-13 11:40:49 -0700619 int err;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700620
621 /*
622 * Allocate an object from the cache.
623 */
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -0400624 atomic_inc_32(&str_count);
ericheng210db222005-08-01 12:58:24 -0700625 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP);
626
627 /*
628 * Allocate the dummy mblk for flow-control.
629 */
630 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI);
631 if (dsp->ds_tx_flow_mp == NULL) {
632 kmem_cache_free(str_cachep, dsp);
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -0400633 atomic_dec_32(&str_count);
ericheng210db222005-08-01 12:58:24 -0700634 return (NULL);
635 }
636 dsp->ds_type = type;
637 dsp->ds_major = major;
638 dsp->ds_style = style;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700639
640 /*
641 * Initialize the queue pointers.
642 */
643 ASSERT(RD(rq) == rq);
644 dsp->ds_rq = rq;
645 dsp->ds_wq = WR(rq);
646 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp;
647
ericheng210db222005-08-01 12:58:24 -0700648 /*
649 * We want explicit control over our write-side STREAMS queue
650 * where the dummy mblk gets added/removed for flow-control.
651 */
652 noenable(WR(rq));
653
erichengc0192a52006-04-13 11:40:49 -0700654 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor),
655 (mod_hash_val_t)dsp);
656 ASSERT(err == 0);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700657 return (dsp);
658}
659
660/*
661 * Destroy a dld_str_t object.
662 */
663void
664dld_str_destroy(dld_str_t *dsp)
665{
666 queue_t *rq;
667 queue_t *wq;
erichengc0192a52006-04-13 11:40:49 -0700668 mod_hash_val_t val;
Eric Chengda14ceb2008-12-04 18:16:10 -0800669
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700670 /*
671 * Clear the queue pointers.
672 */
673 rq = dsp->ds_rq;
674 wq = dsp->ds_wq;
675 ASSERT(wq == WR(rq));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700676 rq->q_ptr = wq->q_ptr = NULL;
677 dsp->ds_rq = dsp->ds_wq = NULL;
678
Eric Chengda14ceb2008-12-04 18:16:10 -0800679 ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
680 ASSERT(dsp->ds_sap == 0);
681 ASSERT(dsp->ds_mh == NULL);
682 ASSERT(dsp->ds_mch == NULL);
683 ASSERT(dsp->ds_promisc == 0);
684 ASSERT(dsp->ds_mph == NULL);
685 ASSERT(dsp->ds_mip == NULL);
686 ASSERT(dsp->ds_mnh == NULL);
ericheng210db222005-08-01 12:58:24 -0700687
Eric Chengda14ceb2008-12-04 18:16:10 -0800688 ASSERT(dsp->ds_polling == B_FALSE);
689 ASSERT(dsp->ds_direct == B_FALSE);
690 ASSERT(dsp->ds_lso == B_FALSE);
691 ASSERT(dsp->ds_lso_max == 0);
Cathy Zhou5d460ea2009-03-17 20:14:50 -0700692 ASSERT(dsp->ds_passivestate != DLD_ACTIVE);
ericheng210db222005-08-01 12:58:24 -0700693
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700694 /*
yz147064fd69bb12005-06-17 02:25:54 -0700695 * Reinitialize all the flags.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700696 */
697 dsp->ds_notifications = 0;
yz147064fd69bb12005-06-17 02:25:54 -0700698 dsp->ds_passivestate = DLD_UNINITIALIZED;
699 dsp->ds_mode = DLD_UNITDATA;
xc1513550ba2cbe2006-11-20 22:51:46 -0800700 dsp->ds_native = B_FALSE;
8d4cf8d2009-11-09 20:01:32 -0800701 dsp->ds_nonip = B_FALSE;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700702
Eric Chengda14ceb2008-12-04 18:16:10 -0800703 ASSERT(dsp->ds_datathr_cnt == 0);
704 ASSERT(dsp->ds_pending_head == NULL);
705 ASSERT(dsp->ds_pending_tail == NULL);
706 ASSERT(!dsp->ds_dlpi_pending);
707
708 ASSERT(dsp->ds_dlp == NULL);
709 ASSERT(dsp->ds_dmap == NULL);
710 ASSERT(dsp->ds_rx == NULL);
711 ASSERT(dsp->ds_rx_arg == NULL);
712 ASSERT(dsp->ds_next == NULL);
713 ASSERT(dsp->ds_head == NULL);
714
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700715 /*
ericheng210db222005-08-01 12:58:24 -0700716 * Free the dummy mblk if exists.
717 */
718 if (dsp->ds_tx_flow_mp != NULL) {
719 freeb(dsp->ds_tx_flow_mp);
720 dsp->ds_tx_flow_mp = NULL;
721 }
erichengc0192a52006-04-13 11:40:49 -0700722
723 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val);
724 ASSERT(dsp == (dld_str_t *)val);
725
ericheng210db222005-08-01 12:58:24 -0700726 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700727 * Free the object back to the cache.
728 */
729 kmem_cache_free(str_cachep, dsp);
Josef 'Jeff' Sipek1a5e2582014-08-08 10:50:14 -0400730 atomic_dec_32(&str_count);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700731}
732
733/*
734 * kmem_cache contructor function: see kmem_cache_create(9f).
735 */
736/*ARGSUSED*/
737static int
738str_constructor(void *buf, void *cdrarg, int kmflags)
739{
740 dld_str_t *dsp = buf;
741
742 bzero(buf, sizeof (dld_str_t));
743
744 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700745 * Allocate a new minor number.
746 */
yz147064d62bc4b2008-01-23 18:09:15 -0800747 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700748 return (-1);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700749
750 /*
751 * Initialize the DLPI state machine.
752 */
753 dsp->ds_dlstate = DL_UNATTACHED;
754
Eric Chengda14ceb2008-12-04 18:16:10 -0800755 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL);
756 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL);
757 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL);
ericheng210db222005-08-01 12:58:24 -0700758
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700759 return (0);
760}
761
762/*
763 * kmem_cache destructor function.
764 */
765/*ARGSUSED*/
766static void
767str_destructor(void *buf, void *cdrarg)
768{
769 dld_str_t *dsp = buf;
770
771 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700772 * Release the minor number.
773 */
yz147064d62bc4b2008-01-23 18:09:15 -0800774 mac_minor_rele(dsp->ds_minor);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700775
ericheng210db222005-08-01 12:58:24 -0700776 ASSERT(dsp->ds_tx_flow_mp == NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700777
Eric Chengda14ceb2008-12-04 18:16:10 -0800778 mutex_destroy(&dsp->ds_lock);
779 cv_destroy(&dsp->ds_datathr_cv);
780 cv_destroy(&dsp->ds_dlpi_pending_cv);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700781}
782
783/*
dg199075605445d2006-09-19 11:16:27 -0700784 * Update the priority bits and VID (may need to insert tag if mp points
Eric Chengda14ceb2008-12-04 18:16:10 -0800785 * to an untagged packet.
dg199075605445d2006-09-19 11:16:27 -0700786 * If vid is VLAN_ID_NONE, use the VID encoded in the packet.
787 */
788static mblk_t *
Sebastien Roye75f0912009-02-20 21:38:08 -0500789i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid,
790 link_tagmode_t tagmode)
dg199075605445d2006-09-19 11:16:27 -0700791{
792 mblk_t *hmp;
793 struct ether_vlan_header *evhp;
794 struct ether_header *ehp;
795 uint16_t old_tci = 0;
796 size_t len;
797
798 ASSERT(pri != 0 || vid != VLAN_ID_NONE);
799
800 evhp = (struct ether_vlan_header *)mp->b_rptr;
801 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) {
802 /*
803 * Tagged packet, update the priority bits.
804 */
dg199075605445d2006-09-19 11:16:27 -0700805 len = sizeof (struct ether_vlan_header);
806
807 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) {
808 /*
809 * In case some drivers only check the db_ref
810 * count of the first mblk, we pullup the
811 * message into a single mblk.
812 */
813 hmp = msgpullup(mp, -1);
814 if ((hmp == NULL) || (MBLKL(hmp) < len)) {
815 freemsg(hmp);
816 return (NULL);
817 } else {
818 freemsg(mp);
819 mp = hmp;
820 }
821 }
822
823 evhp = (struct ether_vlan_header *)mp->b_rptr;
Cathy Zhoub53ab682009-02-25 13:11:34 -0800824 old_tci = ntohs(evhp->ether_tci);
dg199075605445d2006-09-19 11:16:27 -0700825 } else {
826 /*
Sebastien Roye75f0912009-02-20 21:38:08 -0500827 * Untagged packet. Two factors will cause us to insert a
828 * VLAN header:
829 * - This is a VLAN link (vid is specified)
830 * - The link supports user priority tagging and the priority
831 * is non-zero.
dg199075605445d2006-09-19 11:16:27 -0700832 */
Sebastien Roye75f0912009-02-20 21:38:08 -0500833 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY)
834 return (mp);
835
dg199075605445d2006-09-19 11:16:27 -0700836 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED);
837 if (hmp == NULL)
838 return (NULL);
839
840 evhp = (struct ether_vlan_header *)hmp->b_rptr;
841 ehp = (struct ether_header *)mp->b_rptr;
842
843 /*
844 * Copy the MAC addresses and typelen
845 */
846 bcopy(ehp, evhp, (ETHERADDRL * 2));
847 evhp->ether_type = ehp->ether_type;
848 evhp->ether_tpid = htons(ETHERTYPE_VLAN);
849
850 hmp->b_wptr += sizeof (struct ether_vlan_header);
851 mp->b_rptr += sizeof (struct ether_header);
852
853 /*
854 * Free the original message if it's now empty. Link the
yz147064d62bc4b2008-01-23 18:09:15 -0800855 * rest of the messages to the header message.
dg199075605445d2006-09-19 11:16:27 -0700856 */
857 if (MBLKL(mp) == 0) {
858 hmp->b_cont = mp->b_cont;
859 freeb(mp);
860 } else {
861 hmp->b_cont = mp;
862 }
863 mp = hmp;
864 }
865
866 if (pri == 0)
867 pri = VLAN_PRI(old_tci);
868 if (vid == VLAN_ID_NONE)
869 vid = VLAN_ID(old_tci);
870 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid));
871 return (mp);
872}
873
874/*
Eric Chengda14ceb2008-12-04 18:16:10 -0800875 * M_DATA put (IP fast-path mode)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700876 */
Eric Chengda14ceb2008-12-04 18:16:10 -0800877mac_tx_cookie_t
878str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint,
879 uint16_t flag)
dg199075605445d2006-09-19 11:16:27 -0700880{
881 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
882 mblk_t *newmp;
883 uint_t pri;
Eric Chengda14ceb2008-12-04 18:16:10 -0800884 mac_tx_cookie_t cookie;
dg199075605445d2006-09-19 11:16:27 -0700885
886 if (is_ethernet) {
887 /*
888 * Update the priority bits to the assigned priority.
889 */
890 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp);
891
892 if (pri != 0) {
893 newmp = i_dld_ether_header_update_tag(mp, pri,
Sebastien Roye75f0912009-02-20 21:38:08 -0500894 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode);
dg199075605445d2006-09-19 11:16:27 -0700895 if (newmp == NULL)
896 goto discard;
897 mp = newmp;
898 }
899 }
900
Toomas Soome8648b7d2019-01-18 13:50:43 +0200901 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != 0) {
Eric Chengda14ceb2008-12-04 18:16:10 -0800902 DLD_SETQFULL(dsp);
903 }
904 return (cookie);
dg199075605445d2006-09-19 11:16:27 -0700905
906discard:
907 /* TODO: bump kstat? */
908 freemsg(mp);
Toomas Soome8648b7d2019-01-18 13:50:43 +0200909 return (0);
dg199075605445d2006-09-19 11:16:27 -0700910}
911
912/*
Eric Chengda14ceb2008-12-04 18:16:10 -0800913 * M_DATA put (DLIOCRAW mode)
dg199075605445d2006-09-19 11:16:27 -0700914 */
Eric Chengda14ceb2008-12-04 18:16:10 -0800915static void
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700916str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
917{
dg199075605445d2006-09-19 11:16:27 -0700918 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
919 mblk_t *bp, *newmp;
920 size_t size;
921 mac_header_info_t mhi;
Eric Chengda14ceb2008-12-04 18:16:10 -0800922 uint_t pri, vid, dvid;
sowminie7801d52008-01-24 14:49:21 -0800923 uint_t max_sdu;
sebba2e4442006-06-30 12:39:57 -0700924
925 /*
926 * Certain MAC type plugins provide an illusion for raw DLPI
927 * consumers. They pretend that the MAC layer is something that
xc1513550ba2cbe2006-11-20 22:51:46 -0800928 * it's not for the benefit of observability tools. For example,
929 * mac_wifi pretends that it's Ethernet for such consumers.
930 * Here, unless native mode is enabled, we call into the MAC layer so
931 * that this illusion can be maintained. The plugin will optionally
932 * transform the MAC header here into something that can be passed
933 * down. The header goes from raw mode to "cooked" mode.
sebba2e4442006-06-30 12:39:57 -0700934 */
xc1513550ba2cbe2006-11-20 22:51:46 -0800935 if (!dsp->ds_native) {
936 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL)
937 goto discard;
938 mp = newmp;
939 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700940
941 size = MBLKL(mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700942
943 /*
944 * Check the packet is not too big and that any remaining
945 * fragment list is composed entirely of M_DATA messages. (We
946 * know the first fragment was M_DATA otherwise we could not
947 * have got here).
948 */
ericheng210db222005-08-01 12:58:24 -0700949 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700950 if (DB_TYPE(bp) != M_DATA)
951 goto discard;
952 size += MBLKL(bp);
953 }
954
Eric Cheng25ec3e32009-10-07 14:39:04 -0700955 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700956 goto discard;
957
sowminie7801d52008-01-24 14:49:21 -0800958 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
yl15005183476012006-11-13 20:44:19 -0800959 /*
960 * If LSO is enabled, check the size against lso_max. Otherwise,
sowminie7801d52008-01-24 14:49:21 -0800961 * compare the packet size with max_sdu.
yl15005183476012006-11-13 20:44:19 -0800962 */
sowminie7801d52008-01-24 14:49:21 -0800963 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu;
964 if (size > max_sdu + mhi.mhi_hdrsize)
sebba2e4442006-06-30 12:39:57 -0700965 goto discard;
966
dg199075605445d2006-09-19 11:16:27 -0700967 if (is_ethernet) {
Eric Chengda14ceb2008-12-04 18:16:10 -0800968 dvid = mac_client_vid(dsp->ds_mch);
969
sebba2e4442006-06-30 12:39:57 -0700970 /*
dg199075605445d2006-09-19 11:16:27 -0700971 * Discard the packet if this is a VLAN stream but the VID in
972 * the packet is not correct.
sebba2e4442006-06-30 12:39:57 -0700973 */
dg199075605445d2006-09-19 11:16:27 -0700974 vid = VLAN_ID(mhi.mhi_tci);
Eric Chengda14ceb2008-12-04 18:16:10 -0800975 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
dg199075605445d2006-09-19 11:16:27 -0700976 goto discard;
977
978 /*
979 * Discard the packet if this packet is a tagged packet
980 * but both pri and VID are 0.
981 */
982 pri = VLAN_PRI(mhi.mhi_tci);
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -0400983 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 &&
984 vid == VLAN_ID_NONE)
dg199075605445d2006-09-19 11:16:27 -0700985 goto discard;
986
987 /*
988 * Update the priority bits to the per-stream priority if
989 * priority is not set in the packet. Update the VID for
990 * packets on a VLAN stream.
991 */
992 pri = (pri == 0) ? dsp->ds_pri : 0;
Eric Chengda14ceb2008-12-04 18:16:10 -0800993 if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
Sebastien Roye75f0912009-02-20 21:38:08 -0500994 if ((newmp = i_dld_ether_header_update_tag(mp, pri,
995 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
dg199075605445d2006-09-19 11:16:27 -0700996 goto discard;
997 }
998 mp = newmp;
999 }
sebba2e4442006-06-30 12:39:57 -07001000 }
1001
Toomas Soome8648b7d2019-01-18 13:50:43 +02001002 if (DLD_TX(dsp, mp, 0, 0) != 0) {
Eric Chengda14ceb2008-12-04 18:16:10 -08001003 /* Turn on flow-control for dld */
1004 DLD_SETQFULL(dsp);
1005 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001006 return;
1007
1008discard:
dg199075605445d2006-09-19 11:16:27 -07001009 /* TODO: bump kstat? */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001010 freemsg(mp);
1011}
1012
1013/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001014 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1).
1015 */
1016int
ericheng210db222005-08-01 12:58:24 -07001017dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001018{
Eric Chengda14ceb2008-12-04 18:16:10 -08001019 dev_t dev;
1020 int err;
1021 const char *drvname;
Cathy Zhou5d460ea2009-03-17 20:14:50 -07001022 mac_perim_handle_t mph = NULL;
Eric Chengda14ceb2008-12-04 18:16:10 -08001023 boolean_t qassociated = B_FALSE;
1024 dls_link_t *dlp = NULL;
1025 dls_dl_handle_t ddp = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001026
ericheng210db222005-08-01 12:58:24 -07001027 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL)
1028 return (EINVAL);
1029
Eric Chengda14ceb2008-12-04 18:16:10 -08001030 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA)
1031 return (ENOTSUP);
1032
yz147064d62bc4b2008-01-23 18:09:15 -08001033 /*
1034 * /dev node access. This will still be supported for backward
1035 * compatibility reason.
1036 */
1037 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) &&
1038 (strcmp(drvname, "vnic") != 0)) {
1039 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0)
1040 return (EINVAL);
1041 qassociated = B_TRUE;
1042 }
ericheng210db222005-08-01 12:58:24 -07001043
Eric Chengda14ceb2008-12-04 18:16:10 -08001044 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1);
1045 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0)
1046 goto failed;
1047
1048 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0)
1049 goto failed;
Eric Chengda14ceb2008-12-04 18:16:10 -08001050
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001051 /*
1052 * Open a channel.
1053 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001054 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0)
1055 goto failed;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001056
Eric Chengda14ceb2008-12-04 18:16:10 -08001057 if ((err = dls_open(dlp, ddp, dsp)) != 0)
1058 goto failed;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001059
1060 /*
1061 * Set the default packet priority.
1062 */
1063 dsp->ds_pri = 0;
1064
1065 /*
1066 * Add a notify function so that the we get updates from the MAC.
1067 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001068 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp);
ericheng210db222005-08-01 12:58:24 -07001069 dsp->ds_dlstate = DL_UNBOUND;
Eric Chengda14ceb2008-12-04 18:16:10 -08001070 mac_perim_exit(mph);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001071 return (0);
Eric Chengda14ceb2008-12-04 18:16:10 -08001072
1073failed:
1074 if (dlp != NULL)
1075 dls_link_rele(dlp);
Cathy Zhou5d460ea2009-03-17 20:14:50 -07001076 if (mph != NULL)
Eric Chengda14ceb2008-12-04 18:16:10 -08001077 mac_perim_exit(mph);
1078 if (ddp != NULL)
1079 dls_devnet_rele(ddp);
1080 if (qassociated)
1081 (void) qassociate(dsp->ds_wq, -1);
1082
1083 return (err);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001084}
1085
1086/*
1087 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called
1088 * from close(2) for style 2.
1089 */
1090void
1091dld_str_detach(dld_str_t *dsp)
1092{
Eric Chengda14ceb2008-12-04 18:16:10 -08001093 mac_perim_handle_t mph;
1094 int err;
1095
1096 ASSERT(dsp->ds_datathr_cnt == 0);
1097
1098 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001099 /*
1100 * Remove the notify function.
Eric Chengda14ceb2008-12-04 18:16:10 -08001101 *
1102 * Note that we cannot wait for the notification callback to be removed
1103 * since it could cause the deadlock with str_notify() since they both
1104 * need the mac perimeter. Continue if we cannot remove the
1105 * notification callback right now and wait after we leave the
1106 * perimeter.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001107 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001108 err = mac_notify_remove(dsp->ds_mnh, B_FALSE);
1109 dsp->ds_mnh = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001110
1111 /*
Eric Chengda14ceb2008-12-04 18:16:10 -08001112 * Disable the capabilities
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001113 */
yz1470648fb46f22007-09-21 07:56:36 -07001114 dld_capabilities_disable(dsp);
yz147064d62bc4b2008-01-23 18:09:15 -08001115
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001116 /*
yl15005183476012006-11-13 20:44:19 -08001117 * Clear LSO flags.
1118 */
1119 dsp->ds_lso = B_FALSE;
1120 dsp->ds_lso_max = 0;
1121
Eric Chengda14ceb2008-12-04 18:16:10 -08001122 dls_close(dsp);
1123 mac_perim_exit(mph);
1124
1125 /*
1126 * Now we leave the mac perimeter. If mac_notify_remove() failed
1127 * because the notification callback was in progress, wait for
1128 * it to finish before we proceed.
1129 */
1130 if (err != 0)
1131 mac_notify_remove_wait(dsp->ds_mh);
1132
1133 /*
1134 * An unreferenced tagged (non-persistent) vlan gets destroyed
1135 * automatically in the call to dls_devnet_rele.
1136 */
1137 dls_devnet_rele(dsp->ds_ddh);
1138
1139 dsp->ds_sap = 0;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001140 dsp->ds_mh = NULL;
Eric Chengda14ceb2008-12-04 18:16:10 -08001141 dsp->ds_mch = NULL;
1142 dsp->ds_mip = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001143
yz147064d62bc4b2008-01-23 18:09:15 -08001144 if (dsp->ds_style == DL_STYLE2)
1145 (void) qassociate(dsp->ds_wq, -1);
ericheng6a0b2ba2006-01-30 18:57:13 -08001146
1147 /*
1148 * Re-initialize the DLPI state machine.
1149 */
1150 dsp->ds_dlstate = DL_UNATTACHED;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001151}
1152
1153/*
dg199075605445d2006-09-19 11:16:27 -07001154 * This function is only called for VLAN streams. In raw mode, we strip VLAN
1155 * tags before sending packets up to the DLS clients, with the exception of
1156 * special priority tagged packets, in that case, we set the VID to 0.
1157 * mp must be a VLAN tagged packet.
1158 */
1159static mblk_t *
Nicolas Droux72782352009-05-26 15:03:51 -07001160i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri)
dg199075605445d2006-09-19 11:16:27 -07001161{
1162 mblk_t *newmp;
1163 struct ether_vlan_header *evhp;
1164 uint16_t tci, new_tci;
1165
1166 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
1167 if (DB_REF(mp) > 1) {
1168 newmp = copymsg(mp);
1169 if (newmp == NULL)
1170 return (NULL);
1171 freemsg(mp);
1172 mp = newmp;
1173 }
1174 evhp = (struct ether_vlan_header *)mp->b_rptr;
1175
1176 tci = ntohs(evhp->ether_tci);
Nicolas Droux72782352009-05-26 15:03:51 -07001177 if (VLAN_PRI(tci) == 0 || !keep_pri) {
dg199075605445d2006-09-19 11:16:27 -07001178 /*
1179 * Priority is 0, strip the tag.
1180 */
1181 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL);
1182 mp->b_rptr += VLAN_TAGSZ;
1183 } else {
1184 /*
1185 * Priority is not 0, update the VID to 0.
1186 */
1187 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE);
1188 evhp->ether_tci = htons(new_tci);
1189 }
1190 return (mp);
1191}
1192
1193/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001194 * Raw mode receive function.
1195 */
1196/*ARGSUSED*/
1197void
1198dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
dg199075605445d2006-09-19 11:16:27 -07001199 mac_header_info_t *mhip)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001200{
dg199075605445d2006-09-19 11:16:27 -07001201 dld_str_t *dsp = (dld_str_t *)arg;
1202 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
1203 mblk_t *next, *newmp;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001204
1205 ASSERT(mp != NULL);
1206 do {
1207 /*
1208 * Get the pointer to the next packet in the chain and then
1209 * clear b_next before the packet gets passed on.
1210 */
1211 next = mp->b_next;
1212 mp->b_next = NULL;
1213
1214 /*
1215 * Wind back b_rptr to point at the MAC header.
1216 */
dg199075605445d2006-09-19 11:16:27 -07001217 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
1218 mp->b_rptr -= mhip->mhi_hdrsize;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001219
1220 /*
sebba2e4442006-06-30 12:39:57 -07001221 * Certain MAC type plugins provide an illusion for raw
1222 * DLPI consumers. They pretend that the MAC layer is
1223 * something that it's not for the benefit of observability
xc1513550ba2cbe2006-11-20 22:51:46 -08001224 * tools. For example, mac_wifi pretends that it's Ethernet
1225 * for such consumers. Here, unless native mode is enabled,
1226 * we call into the MAC layer so that this illusion can be
1227 * maintained. The plugin will optionally transform the MAC
1228 * header here into something that can be passed up to raw
1229 * consumers. The header goes from "cooked" mode to raw mode.
sebba2e4442006-06-30 12:39:57 -07001230 */
xc1513550ba2cbe2006-11-20 22:51:46 -08001231 if (!dsp->ds_native) {
1232 newmp = mac_header_uncook(dsp->ds_mh, mp);
1233 if (newmp == NULL) {
1234 freemsg(mp);
1235 goto next;
1236 }
1237 mp = newmp;
sebba2e4442006-06-30 12:39:57 -07001238 }
sebba2e4442006-06-30 12:39:57 -07001239
dg199075605445d2006-09-19 11:16:27 -07001240 /*
1241 * Strip the VLAN tag for VLAN streams.
1242 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001243 if (is_ethernet &&
1244 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) {
Nicolas Droux72782352009-05-26 15:03:51 -07001245 /*
1246 * The priority should be kept only for VLAN
1247 * data-links.
1248 */
1249 newmp = i_dld_ether_header_strip_tag(mp,
1250 mac_client_is_vlan_vnic(dsp->ds_mch));
dg199075605445d2006-09-19 11:16:27 -07001251 if (newmp == NULL) {
1252 freemsg(mp);
1253 goto next;
sebba2e4442006-06-30 12:39:57 -07001254 }
dg199075605445d2006-09-19 11:16:27 -07001255 mp = newmp;
sebba2e4442006-06-30 12:39:57 -07001256 }
dg199075605445d2006-09-19 11:16:27 -07001257
sebba2e4442006-06-30 12:39:57 -07001258 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001259 * Pass the packet on.
1260 */
erichengc0192a52006-04-13 11:40:49 -07001261 if (canputnext(dsp->ds_rq))
1262 putnext(dsp->ds_rq, mp);
1263 else
1264 freemsg(mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001265
dg199075605445d2006-09-19 11:16:27 -07001266next:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001267 /*
1268 * Move on to the next packet in the chain.
1269 */
1270 mp = next;
1271 } while (mp != NULL);
1272}
1273
1274/*
1275 * Fast-path receive function.
1276 */
1277/*ARGSUSED*/
1278void
1279dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
dg199075605445d2006-09-19 11:16:27 -07001280 mac_header_info_t *mhip)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001281{
dg199075605445d2006-09-19 11:16:27 -07001282 dld_str_t *dsp = (dld_str_t *)arg;
1283 mblk_t *next;
1284 size_t offset = 0;
1285
1286 /*
1287 * MAC header stripping rules:
1288 * - Tagged packets:
1289 * a. VLAN streams. Strip the whole VLAN header including the tag.
1290 * b. Physical streams
1291 * - VLAN packets (non-zero VID). The stream must be either a
1292 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener.
1293 * Strip the Ethernet header but keep the VLAN header.
1294 * - Special tagged packets (zero VID)
1295 * * The stream is either a DL_PROMISC_SAP listener or a
1296 * ETHERTYPE_VLAN listener, strip the Ethernet header but
1297 * keep the VLAN header.
1298 * * Otherwise, strip the whole VLAN header.
1299 * - Untagged packets. Strip the whole MAC header.
1300 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001301 if (mhip->mhi_istagged &&
1302 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) &&
dg199075605445d2006-09-19 11:16:27 -07001303 ((dsp->ds_sap == ETHERTYPE_VLAN) ||
1304 (dsp->ds_promisc & DLS_PROMISC_SAP))) {
1305 offset = VLAN_TAGSZ;
1306 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001307
1308 ASSERT(mp != NULL);
1309 do {
1310 /*
1311 * Get the pointer to the next packet in the chain and then
1312 * clear b_next before the packet gets passed on.
1313 */
1314 next = mp->b_next;
1315 mp->b_next = NULL;
1316
1317 /*
dg199075605445d2006-09-19 11:16:27 -07001318 * Wind back b_rptr to point at the VLAN header.
1319 */
1320 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset);
1321 mp->b_rptr -= offset;
1322
1323 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001324 * Pass the packet on.
1325 */
erichengc0192a52006-04-13 11:40:49 -07001326 if (canputnext(dsp->ds_rq))
1327 putnext(dsp->ds_rq, mp);
1328 else
1329 freemsg(mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001330 /*
1331 * Move on to the next packet in the chain.
1332 */
1333 mp = next;
1334 } while (mp != NULL);
1335}
1336
1337/*
1338 * Default receive function (send DL_UNITDATA_IND messages).
1339 */
1340/*ARGSUSED*/
1341void
1342dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
dg199075605445d2006-09-19 11:16:27 -07001343 mac_header_info_t *mhip)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001344{
1345 dld_str_t *dsp = (dld_str_t *)arg;
1346 mblk_t *ud_mp;
1347 mblk_t *next;
dg199075605445d2006-09-19 11:16:27 -07001348 size_t offset = 0;
1349 boolean_t strip_vlan = B_TRUE;
1350
1351 /*
1352 * See MAC header stripping rules in the dld_str_rx_fastpath() function.
1353 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001354 if (mhip->mhi_istagged &&
1355 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) &&
dg199075605445d2006-09-19 11:16:27 -07001356 ((dsp->ds_sap == ETHERTYPE_VLAN) ||
1357 (dsp->ds_promisc & DLS_PROMISC_SAP))) {
1358 offset = VLAN_TAGSZ;
1359 strip_vlan = B_FALSE;
1360 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001361
1362 ASSERT(mp != NULL);
1363 do {
1364 /*
1365 * Get the pointer to the next packet in the chain and then
1366 * clear b_next before the packet gets passed on.
1367 */
1368 next = mp->b_next;
1369 mp->b_next = NULL;
1370
1371 /*
1372 * Wind back b_rptr to point at the MAC header.
1373 */
dg199075605445d2006-09-19 11:16:27 -07001374 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
1375 mp->b_rptr -= mhip->mhi_hdrsize;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001376
1377 /*
1378 * Create the DL_UNITDATA_IND M_PROTO.
1379 */
dg199075605445d2006-09-19 11:16:27 -07001380 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001381 freemsgchain(mp);
1382 return;
1383 }
1384
1385 /*
dg199075605445d2006-09-19 11:16:27 -07001386 * Advance b_rptr to point at the payload (or the VLAN header).
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001387 */
dg199075605445d2006-09-19 11:16:27 -07001388 mp->b_rptr += (mhip->mhi_hdrsize - offset);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001389
1390 /*
1391 * Prepend the DL_UNITDATA_IND.
1392 */
1393 ud_mp->b_cont = mp;
1394
1395 /*
1396 * Send the message.
1397 */
erichengc0192a52006-04-13 11:40:49 -07001398 if (canputnext(dsp->ds_rq))
1399 putnext(dsp->ds_rq, ud_mp);
1400 else
1401 freemsg(ud_mp);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001402
1403 /*
1404 * Move on to the next packet in the chain.
1405 */
1406 mp = next;
1407 } while (mp != NULL);
1408}
1409
1410/*
sowminie7801d52008-01-24 14:49:21 -08001411 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE
1412 */
1413static void
Erik Nordmark1eee1702010-08-16 15:30:54 -07001414str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu)
sowminie7801d52008-01-24 14:49:21 -08001415{
1416 mblk_t *mp;
1417 dl_notify_ind_t *dlip;
1418
Erik Nordmark1eee1702010-08-16 15:30:54 -07001419 if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2)))
sowminie7801d52008-01-24 14:49:21 -08001420 return;
1421
1422 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1423 M_PROTO, 0)) == NULL)
1424 return;
1425
1426 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1427 dlip = (dl_notify_ind_t *)mp->b_rptr;
1428 dlip->dl_primitive = DL_NOTIFY_IND;
Erik Nordmark1eee1702010-08-16 15:30:54 -07001429 if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) {
1430 dlip->dl_notification = DL_NOTE_SDU_SIZE2;
1431 dlip->dl_data1 = max_sdu;
1432 dlip->dl_data2 = multicast_sdu;
1433 } else {
1434 dlip->dl_notification = DL_NOTE_SDU_SIZE;
1435 dlip->dl_data = max_sdu;
1436 }
sowminie7801d52008-01-24 14:49:21 -08001437
1438 qreply(dsp->ds_wq, mp);
1439}
1440
1441/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001442 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the
1443 * current state of the interface.
1444 */
1445void
1446dld_str_notify_ind(dld_str_t *dsp)
1447{
1448 mac_notify_type_t type;
1449
1450 for (type = 0; type < MAC_NNOTE; type++)
1451 str_notify(dsp, type);
1452}
1453
1454typedef struct dl_unitdata_ind_wrapper {
1455 dl_unitdata_ind_t dl_unitdata;
sebba2e4442006-06-30 12:39:57 -07001456 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)];
1457 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)];
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001458} dl_unitdata_ind_wrapper_t;
1459
1460/*
1461 * Create a DL_UNITDATA_IND M_PROTO message.
1462 */
1463static mblk_t *
dg199075605445d2006-09-19 11:16:27 -07001464str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001465{
1466 mblk_t *nmp;
1467 dl_unitdata_ind_wrapper_t *dlwp;
1468 dl_unitdata_ind_t *dlp;
sebba2e4442006-06-30 12:39:57 -07001469 mac_header_info_t mhi;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001470 uint_t addr_length;
1471 uint8_t *daddr;
1472 uint8_t *saddr;
1473
1474 /*
1475 * Get the packet header information.
1476 */
Eric Cheng25ec3e32009-10-07 14:39:04 -07001477 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
sebba2e4442006-06-30 12:39:57 -07001478 return (NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001479
1480 /*
1481 * Allocate a message large enough to contain the wrapper structure
1482 * defined above.
1483 */
1484 if ((nmp = mexchange(dsp->ds_wq, NULL,
1485 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO,
1486 DL_UNITDATA_IND)) == NULL)
1487 return (NULL);
1488
1489 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr;
1490
1491 dlp = &(dlwp->dl_unitdata);
1492 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr);
1493 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND);
1494
1495 /*
1496 * Copy in the destination address.
1497 */
1498 addr_length = dsp->ds_mip->mi_addr_length;
1499 daddr = dlwp->dl_dest_addr;
1500 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp;
sebba2e4442006-06-30 12:39:57 -07001501 bcopy(mhi.mhi_daddr, daddr, addr_length);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001502
1503 /*
dg199075605445d2006-09-19 11:16:27 -07001504 * Set the destination DLSAP to the SAP value encoded in the packet.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001505 */
dg199075605445d2006-09-19 11:16:27 -07001506 if (mhi.mhi_istagged && !strip_vlan)
1507 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN;
1508 else
1509 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001510 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t);
1511
1512 /*
sebba2e4442006-06-30 12:39:57 -07001513 * If the destination address was multicast or broadcast then the
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001514 * dl_group_address field should be non-zero.
1515 */
sebba2e4442006-06-30 12:39:57 -07001516 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) ||
1517 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001518
1519 /*
sebba2e4442006-06-30 12:39:57 -07001520 * Copy in the source address if one exists. Some MAC types (DL_IB
1521 * for example) may not have access to source information.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001522 */
sebba2e4442006-06-30 12:39:57 -07001523 if (mhi.mhi_saddr == NULL) {
1524 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0;
1525 } else {
1526 saddr = dlwp->dl_src_addr;
1527 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp;
1528 bcopy(mhi.mhi_saddr, saddr, addr_length);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001529
sebba2e4442006-06-30 12:39:57 -07001530 /*
1531 * Set the source DLSAP to the packet ethertype.
1532 */
1533 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap;
1534 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t);
1535 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001536
1537 return (nmp);
1538}
1539
1540/*
1541 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS
1542 */
1543static void
1544str_notify_promisc_on_phys(dld_str_t *dsp)
1545{
1546 mblk_t *mp;
1547 dl_notify_ind_t *dlip;
1548
1549 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS))
1550 return;
1551
1552 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1553 M_PROTO, 0)) == NULL)
1554 return;
1555
1556 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1557 dlip = (dl_notify_ind_t *)mp->b_rptr;
1558 dlip->dl_primitive = DL_NOTIFY_IND;
1559 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS;
1560
1561 qreply(dsp->ds_wq, mp);
1562}
1563
1564/*
1565 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS
1566 */
1567static void
1568str_notify_promisc_off_phys(dld_str_t *dsp)
1569{
1570 mblk_t *mp;
1571 dl_notify_ind_t *dlip;
1572
1573 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS))
1574 return;
1575
1576 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1577 M_PROTO, 0)) == NULL)
1578 return;
1579
1580 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1581 dlip = (dl_notify_ind_t *)mp->b_rptr;
1582 dlip->dl_primitive = DL_NOTIFY_IND;
1583 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS;
1584
1585 qreply(dsp->ds_wq, mp);
1586}
1587
1588/*
1589 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR
1590 */
1591static void
Sebastien Roy2b24ab62009-09-22 22:04:45 -04001592str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001593{
1594 mblk_t *mp;
1595 dl_notify_ind_t *dlip;
1596 uint_t addr_length;
1597 uint16_t ethertype;
1598
1599 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR))
1600 return;
1601
1602 addr_length = dsp->ds_mip->mi_addr_length;
1603 if ((mp = mexchange(dsp->ds_wq, NULL,
1604 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t),
1605 M_PROTO, 0)) == NULL)
1606 return;
1607
1608 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1609 dlip = (dl_notify_ind_t *)mp->b_rptr;
1610 dlip->dl_primitive = DL_NOTIFY_IND;
1611 dlip->dl_notification = DL_NOTE_PHYS_ADDR;
Sebastien Roy2b24ab62009-09-22 22:04:45 -04001612 dlip->dl_data = addr_type;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001613 dlip->dl_addr_offset = sizeof (dl_notify_ind_t);
1614 dlip->dl_addr_length = addr_length + sizeof (uint16_t);
1615
1616 bcopy(addr, &dlip[1], addr_length);
1617
1618 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap;
johnlev843e1982007-09-18 15:46:43 -07001619 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001620
1621 qreply(dsp->ds_wq, mp);
1622}
1623
1624/*
1625 * DL_NOTIFY_IND: DL_NOTE_LINK_UP
1626 */
1627static void
1628str_notify_link_up(dld_str_t *dsp)
1629{
1630 mblk_t *mp;
1631 dl_notify_ind_t *dlip;
1632
1633 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP))
1634 return;
1635
1636 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1637 M_PROTO, 0)) == NULL)
1638 return;
1639
1640 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1641 dlip = (dl_notify_ind_t *)mp->b_rptr;
1642 dlip->dl_primitive = DL_NOTIFY_IND;
1643 dlip->dl_notification = DL_NOTE_LINK_UP;
1644
1645 qreply(dsp->ds_wq, mp);
1646}
1647
1648/*
1649 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN
1650 */
1651static void
1652str_notify_link_down(dld_str_t *dsp)
1653{
1654 mblk_t *mp;
1655 dl_notify_ind_t *dlip;
1656
1657 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN))
1658 return;
1659
1660 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1661 M_PROTO, 0)) == NULL)
1662 return;
1663
1664 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1665 dlip = (dl_notify_ind_t *)mp->b_rptr;
1666 dlip->dl_primitive = DL_NOTIFY_IND;
1667 dlip->dl_notification = DL_NOTE_LINK_DOWN;
1668
1669 qreply(dsp->ds_wq, mp);
1670}
1671
1672/*
1673 * DL_NOTIFY_IND: DL_NOTE_SPEED
1674 */
1675static void
1676str_notify_speed(dld_str_t *dsp, uint32_t speed)
1677{
1678 mblk_t *mp;
1679 dl_notify_ind_t *dlip;
1680
1681 if (!(dsp->ds_notifications & DL_NOTE_SPEED))
1682 return;
1683
1684 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1685 M_PROTO, 0)) == NULL)
1686 return;
1687
1688 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1689 dlip = (dl_notify_ind_t *)mp->b_rptr;
1690 dlip->dl_primitive = DL_NOTIFY_IND;
1691 dlip->dl_notification = DL_NOTE_SPEED;
1692 dlip->dl_data = speed;
1693
1694 qreply(dsp->ds_wq, mp);
1695}
1696
1697/*
1698 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG
1699 */
1700static void
1701str_notify_capab_reneg(dld_str_t *dsp)
1702{
1703 mblk_t *mp;
1704 dl_notify_ind_t *dlip;
1705
1706 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG))
1707 return;
1708
1709 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1710 M_PROTO, 0)) == NULL)
1711 return;
1712
1713 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1714 dlip = (dl_notify_ind_t *)mp->b_rptr;
1715 dlip->dl_primitive = DL_NOTIFY_IND;
1716 dlip->dl_notification = DL_NOTE_CAPAB_RENEG;
1717
1718 qreply(dsp->ds_wq, mp);
1719}
1720
1721/*
sebba2e4442006-06-30 12:39:57 -07001722 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH
1723 */
1724static void
1725str_notify_fastpath_flush(dld_str_t *dsp)
1726{
1727 mblk_t *mp;
1728 dl_notify_ind_t *dlip;
1729
1730 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH))
1731 return;
1732
1733 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1734 M_PROTO, 0)) == NULL)
1735 return;
1736
1737 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1738 dlip = (dl_notify_ind_t *)mp->b_rptr;
1739 dlip->dl_primitive = DL_NOTIFY_IND;
1740 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH;
1741
1742 qreply(dsp->ds_wq, mp);
1743}
1744
Sowmini Varadhan550b6e42010-07-01 17:10:52 -04001745static void
1746str_notify_allowed_ips(dld_str_t *dsp)
1747{
1748 mblk_t *mp;
1749 dl_notify_ind_t *dlip;
1750 size_t mp_size;
Sowmini Varadhan89c61302010-07-13 15:48:42 -04001751 mac_protect_t *mrp;
Sowmini Varadhan550b6e42010-07-01 17:10:52 -04001752
1753 if (!(dsp->ds_notifications & DL_NOTE_ALLOWED_IPS))
1754 return;
1755
1756 mp_size = sizeof (mac_protect_t) + sizeof (dl_notify_ind_t);
1757 if ((mp = mexchange(dsp->ds_wq, NULL, mp_size, M_PROTO, 0)) == NULL)
1758 return;
1759
Sowmini Varadhan89c61302010-07-13 15:48:42 -04001760 mrp = mac_protect_get(dsp->ds_mh);
Sowmini Varadhan550b6e42010-07-01 17:10:52 -04001761 bzero(mp->b_rptr, mp_size);
1762 dlip = (dl_notify_ind_t *)mp->b_rptr;
1763 dlip->dl_primitive = DL_NOTIFY_IND;
1764 dlip->dl_notification = DL_NOTE_ALLOWED_IPS;
1765 dlip->dl_data = 0;
1766 dlip->dl_addr_offset = sizeof (dl_notify_ind_t);
1767 dlip->dl_addr_length = sizeof (mac_protect_t);
Sowmini Varadhan89c61302010-07-13 15:48:42 -04001768 bcopy(mrp, mp->b_rptr + sizeof (dl_notify_ind_t),
Sowmini Varadhan550b6e42010-07-01 17:10:52 -04001769 sizeof (mac_protect_t));
1770
1771 qreply(dsp->ds_wq, mp);
1772}
1773
sebba2e4442006-06-30 12:39:57 -07001774/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001775 * MAC notification callback.
1776 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001777void
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001778str_notify(void *arg, mac_notify_type_t type)
1779{
1780 dld_str_t *dsp = (dld_str_t *)arg;
1781 queue_t *q = dsp->ds_wq;
Eric Chengda14ceb2008-12-04 18:16:10 -08001782 mac_handle_t mh = dsp->ds_mh;
1783 mac_client_handle_t mch = dsp->ds_mch;
1784 uint8_t addr[MAXMACADDRLEN];
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001785
1786 switch (type) {
1787 case MAC_NOTE_TX:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001788 qenable(q);
1789 break;
1790
1791 case MAC_NOTE_DEVPROMISC:
1792 /*
1793 * Send the appropriate DL_NOTIFY_IND.
1794 */
Girish Moodalbaild91a22b2009-05-18 10:01:40 -04001795 if (mac_promisc_get(mh))
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001796 str_notify_promisc_on_phys(dsp);
1797 else
1798 str_notify_promisc_off_phys(dsp);
1799 break;
1800
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001801 case MAC_NOTE_UNICST:
1802 /*
Eric Chengda14ceb2008-12-04 18:16:10 -08001803 * This notification is sent whenever the MAC unicast
1804 * address changes.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001805 */
Eric Chengda14ceb2008-12-04 18:16:10 -08001806 mac_unicast_primary_get(mh, addr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001807
1808 /*
1809 * Send the appropriate DL_NOTIFY_IND.
1810 */
Sebastien Roy2b24ab62009-09-22 22:04:45 -04001811 str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr);
1812 break;
1813
1814 case MAC_NOTE_DEST:
1815 /*
1816 * Only send up DL_NOTE_DEST_ADDR if the link has a
1817 * destination address.
1818 */
1819 if (mac_dst_get(dsp->ds_mh, addr))
1820 str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001821 break;
1822
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -04001823 case MAC_NOTE_LOWLINK:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001824 case MAC_NOTE_LINK:
1825 /*
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -04001826 * LOWLINK refers to the actual link status. For links that
1827 * are not part of a bridge instance LOWLINK and LINK state
1828 * are the same. But for a link part of a bridge instance
1829 * LINK state refers to the aggregate link status: "up" when
1830 * at least one link part of the bridge is up and is "down"
1831 * when all links part of the bridge are down.
1832 *
1833 * Clients can request to be notified of the LOWLINK state
1834 * using the DLIOCLOWLINK ioctl. Clients such as the bridge
1835 * daemon request lowlink state changes and upper layer clients
1836 * receive notifications of the aggregate link state changes
1837 * which is the default when requesting LINK UP/DOWN state
1838 * notifications.
1839 */
1840
1841 /*
1842 * Check that the notification type matches the one that we
1843 * want. If we want lower-level link notifications, and this
1844 * is upper, or if we want upper and this is lower, then
1845 * ignore.
1846 */
1847 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink)
1848 break;
1849 /*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001850 * This notification is sent every time the MAC driver
1851 * updates the link state.
1852 */
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -04001853 switch (mac_client_stat_get(mch, dsp->ds_lowlink ?
1854 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) {
sebba2e4442006-06-30 12:39:57 -07001855 case LINK_STATE_UP: {
1856 uint64_t speed;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001857 /*
1858 * The link is up so send the appropriate
1859 * DL_NOTIFY_IND.
1860 */
1861 str_notify_link_up(dsp);
1862
Eric Chengda14ceb2008-12-04 18:16:10 -08001863 speed = mac_stat_get(mh, MAC_STAT_IFSPEED);
sebba2e4442006-06-30 12:39:57 -07001864 str_notify_speed(dsp, (uint32_t)(speed / 1000ull));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001865 break;
sebba2e4442006-06-30 12:39:57 -07001866 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001867 case LINK_STATE_DOWN:
1868 /*
1869 * The link is down so send the appropriate
1870 * DL_NOTIFY_IND.
1871 */
1872 str_notify_link_down(dsp);
1873 break;
1874
1875 default:
1876 break;
1877 }
1878 break;
1879
Eric Chengda14ceb2008-12-04 18:16:10 -08001880 case MAC_NOTE_CAPAB_CHG:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001881 /*
1882 * This notification is sent whenever the MAC resources
johnlev843e1982007-09-18 15:46:43 -07001883 * change or capabilities change. We need to renegotiate
1884 * the capabilities. Send the appropriate DL_NOTIFY_IND.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001885 */
1886 str_notify_capab_reneg(dsp);
1887 break;
1888
sowminie7801d52008-01-24 14:49:21 -08001889 case MAC_NOTE_SDU_SIZE: {
1890 uint_t max_sdu;
Erik Nordmark1eee1702010-08-16 15:30:54 -07001891 uint_t multicast_sdu;
1892 mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu);
1893 str_notify_sdu_size(dsp, max_sdu, multicast_sdu);
sowminie7801d52008-01-24 14:49:21 -08001894 break;
1895 }
1896
sebba2e4442006-06-30 12:39:57 -07001897 case MAC_NOTE_FASTPATH_FLUSH:
1898 str_notify_fastpath_flush(dsp);
1899 break;
1900
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -04001901 /* Unused notifications */
yz147064d62bc4b2008-01-23 18:09:15 -08001902 case MAC_NOTE_MARGIN:
1903 break;
sowminie7801d52008-01-24 14:49:21 -08001904
Sowmini Varadhan550b6e42010-07-01 17:10:52 -04001905 case MAC_NOTE_ALLOWED_IPS:
1906 str_notify_allowed_ips(dsp);
1907 break;
1908
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001909 default:
1910 ASSERT(B_FALSE);
1911 break;
1912 }
1913}
1914
yz147064d62bc4b2008-01-23 18:09:15 -08001915/*
Eric Chengda14ceb2008-12-04 18:16:10 -08001916 * This function is called via a taskq mechansim to process all control
1917 * messages on a per 'dsp' end point.
yz147064d62bc4b2008-01-23 18:09:15 -08001918 */
1919static void
1920dld_wput_nondata_task(void *arg)
1921{
Eric Chengda14ceb2008-12-04 18:16:10 -08001922 dld_str_t *dsp = arg;
yz147064d62bc4b2008-01-23 18:09:15 -08001923 mblk_t *mp;
1924
Eric Chengda14ceb2008-12-04 18:16:10 -08001925 mutex_enter(&dsp->ds_lock);
1926 while (dsp->ds_pending_head != NULL) {
1927 mp = dsp->ds_pending_head;
1928 dsp->ds_pending_head = mp->b_next;
1929 mp->b_next = NULL;
1930 if (dsp->ds_pending_head == NULL)
1931 dsp->ds_pending_tail = NULL;
1932 mutex_exit(&dsp->ds_lock);
yz147064d62bc4b2008-01-23 18:09:15 -08001933
Eric Chengda14ceb2008-12-04 18:16:10 -08001934 switch (DB_TYPE(mp)) {
1935 case M_PROTO:
1936 case M_PCPROTO:
1937 dld_proto(dsp, mp);
yz147064d62bc4b2008-01-23 18:09:15 -08001938 break;
Eric Chengda14ceb2008-12-04 18:16:10 -08001939 case M_IOCTL:
1940 dld_ioc(dsp, mp);
yz147064d62bc4b2008-01-23 18:09:15 -08001941 break;
1942 default:
Eric Chengda14ceb2008-12-04 18:16:10 -08001943 ASSERT(0);
yz147064d62bc4b2008-01-23 18:09:15 -08001944 }
Eric Chengda14ceb2008-12-04 18:16:10 -08001945
1946 mutex_enter(&dsp->ds_lock);
yz147064d62bc4b2008-01-23 18:09:15 -08001947 }
Eric Chengda14ceb2008-12-04 18:16:10 -08001948 ASSERT(dsp->ds_pending_tail == NULL);
1949 dsp->ds_dlpi_pending = 0;
1950 cv_broadcast(&dsp->ds_dlpi_pending_cv);
1951 mutex_exit(&dsp->ds_lock);
yz147064d62bc4b2008-01-23 18:09:15 -08001952}
1953
1954/*
Eric Chengda14ceb2008-12-04 18:16:10 -08001955 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This
1956 * thread is started at boot time.
yz147064d62bc4b2008-01-23 18:09:15 -08001957 */
1958static void
Eric Chengda14ceb2008-12-04 18:16:10 -08001959dld_taskq_dispatch(void)
yz147064d62bc4b2008-01-23 18:09:15 -08001960{
Eric Chengda14ceb2008-12-04 18:16:10 -08001961 callb_cpr_t cprinfo;
1962 dld_str_t *dsp;
1963
1964 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr,
1965 "dld_taskq_dispatch");
1966 mutex_enter(&dld_taskq_lock);
1967
1968 while (!dld_taskq_quit) {
1969 dsp = list_head(&dld_taskq_list);
1970 while (dsp != NULL) {
1971 list_remove(&dld_taskq_list, dsp);
1972 mutex_exit(&dld_taskq_lock);
1973 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task,
Toomas Soomefc8ae2e2019-03-20 17:29:38 +02001974 dsp, TQ_SLEEP) != TASKQID_INVALID);
Eric Chengda14ceb2008-12-04 18:16:10 -08001975 mutex_enter(&dld_taskq_lock);
1976 dsp = list_head(&dld_taskq_list);
1977 }
1978
1979 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1980 cv_wait(&dld_taskq_cv, &dld_taskq_lock);
1981 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock);
1982 }
1983
1984 dld_taskq_done = B_TRUE;
1985 cv_signal(&dld_taskq_cv);
1986 CALLB_CPR_EXIT(&cprinfo);
1987 thread_exit();
1988}
1989
1990/*
1991 * All control operations are serialized on the 'dsp' and are also funneled
1992 * through a taskq mechanism to ensure that subsequent processing has kernel
1993 * context and can safely use cv_wait.
1994 *
1995 * Mechanisms to handle taskq dispatch failures
1996 *
1997 * The only way to be sure that taskq dispatch does not fail is to either
1998 * specify TQ_SLEEP or to use a static taskq and prepopulate it with
1999 * some number of entries and make sure that the number of outstanding requests
2000 * are less than that number. We can't use TQ_SLEEP since we don't know the
2001 * context. Nor can we bound the total number of 'dsp' end points. So we are
2002 * unable to use either of the above schemes, and are forced to deal with
2003 * taskq dispatch failures. Note that even dynamic taskq could fail in
2004 * dispatch if TQ_NOSLEEP is specified, since this flag is translated
2005 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq
2006 * framework.
2007 *
2008 * We maintain a queue of 'dsp's that encountered taskq dispatch failure.
2009 * We also have a single global thread to retry the taskq dispatch. This
2010 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but
2011 * uses TQ_SLEEP to ensure eventual success of the dispatch operation.
2012 */
2013static void
2014dld_wput_nondata(dld_str_t *dsp, mblk_t *mp)
2015{
2016 ASSERT(mp->b_next == NULL);
2017 mutex_enter(&dsp->ds_lock);
2018 if (dsp->ds_pending_head != NULL) {
2019 ASSERT(dsp->ds_dlpi_pending);
2020 dsp->ds_pending_tail->b_next = mp;
2021 dsp->ds_pending_tail = mp;
2022 mutex_exit(&dsp->ds_lock);
2023 return;
2024 }
2025 ASSERT(dsp->ds_pending_tail == NULL);
2026 dsp->ds_pending_head = dsp->ds_pending_tail = mp;
2027 /*
2028 * At this point if ds_dlpi_pending is set, it implies that the taskq
2029 * thread is still active and is processing the last message, though
2030 * the pending queue has been emptied.
2031 */
2032 if (dsp->ds_dlpi_pending) {
2033 mutex_exit(&dsp->ds_lock);
2034 return;
2035 }
2036
2037 dsp->ds_dlpi_pending = 1;
2038 mutex_exit(&dsp->ds_lock);
2039
2040 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp,
Toomas Soomefc8ae2e2019-03-20 17:29:38 +02002041 TQ_NOSLEEP) != TASKQID_INVALID)
Eric Chengda14ceb2008-12-04 18:16:10 -08002042 return;
2043
2044 mutex_enter(&dld_taskq_lock);
2045 list_insert_tail(&dld_taskq_list, dsp);
2046 cv_signal(&dld_taskq_cv);
2047 mutex_exit(&dld_taskq_lock);
2048}
2049
2050/*
2051 * Process an M_IOCTL message.
2052 */
2053static void
2054dld_ioc(dld_str_t *dsp, mblk_t *mp)
2055{
2056 uint_t cmd;
2057
2058 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd;
2059 ASSERT(dsp->ds_type == DLD_DLPI);
2060
2061 switch (cmd) {
2062 case DLIOCNATIVE:
2063 ioc_native(dsp, mp);
2064 break;
2065 case DLIOCMARGININFO:
2066 ioc_margin(dsp, mp);
2067 break;
2068 case DLIOCRAW:
2069 ioc_raw(dsp, mp);
2070 break;
2071 case DLIOCHDRINFO:
2072 ioc_fast(dsp, mp);
2073 break;
Rishi Srivatsavai4eaa4712009-09-10 15:11:49 -04002074 case DLIOCLOWLINK:
2075 ioc_lowlink(dsp, mp);
2076 break;
Eric Chengda14ceb2008-12-04 18:16:10 -08002077 default:
2078 ioc(dsp, mp);
2079 }
ericheng210db222005-08-01 12:58:24 -07002080}
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07002081
ericheng210db222005-08-01 12:58:24 -07002082/*
xc1513550ba2cbe2006-11-20 22:51:46 -08002083 * DLIOCNATIVE
2084 */
2085static void
2086ioc_native(dld_str_t *dsp, mblk_t *mp)
2087{
2088 queue_t *q = dsp->ds_wq;
2089 const mac_info_t *mip = dsp->ds_mip;
2090
xc1513550ba2cbe2006-11-20 22:51:46 -08002091 /*
2092 * Native mode can be enabled if it's disabled and if the
2093 * native media type is different.
2094 */
2095 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia)
2096 dsp->ds_native = B_TRUE;
2097
xc1513550ba2cbe2006-11-20 22:51:46 -08002098 if (dsp->ds_native)
2099 miocack(q, mp, 0, mip->mi_nativemedia);
2100 else
2101 miocnak(q, mp, 0, ENOTSUP);
2102}
2103
2104/*
yz147064d62bc4b2008-01-23 18:09:15 -08002105 * DLIOCMARGININFO
2106 */
2107static void
2108ioc_margin(dld_str_t *dsp, mblk_t *mp)
2109{
2110 queue_t *q = dsp->ds_wq;
2111 uint32_t margin;
2112 int err;
2113
2114 if (dsp->ds_dlstate == DL_UNATTACHED) {
2115 err = EINVAL;
2116 goto failed;
2117 }
2118 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0)
2119 goto failed;
2120
2121 mac_margin_get(dsp->ds_mh, &margin);
2122 *((uint32_t *)mp->b_cont->b_rptr) = margin;
2123 miocack(q, mp, sizeof (uint32_t), 0);
2124 return;
2125
2126failed:
2127 miocnak(q, mp, 0, err);
2128}
2129
2130/*
ericheng210db222005-08-01 12:58:24 -07002131 * DLIOCRAW
2132 */
2133static void
2134ioc_raw(dld_str_t *dsp, mblk_t *mp)
2135{
2136 queue_t *q = dsp->ds_wq;
Eric Chengda14ceb2008-12-04 18:16:10 -08002137 mac_perim_handle_t mph;
ericheng210db222005-08-01 12:58:24 -07002138
Eric Chengda14ceb2008-12-04 18:16:10 -08002139 if (dsp->ds_mh == NULL) {
2140 dsp->ds_mode = DLD_RAW;
2141 miocack(q, mp, 0, 0);
2142 return;
2143 }
2144
2145 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
2146 if (dsp->ds_polling || dsp->ds_direct) {
2147 mac_perim_exit(mph);
ericheng210db222005-08-01 12:58:24 -07002148 miocnak(q, mp, 0, EPROTO);
2149 return;
2150 }
2151
Eric Chengda14ceb2008-12-04 18:16:10 -08002152 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) {
ericheng210db222005-08-01 12:58:24 -07002153 /*
2154 * Set the receive callback.
2155 */
Eric Chengda14ceb2008-12-04 18:16:10 -08002156 dls_rx_set(dsp, dld_str_rx_raw, dsp);
ericheng210db222005-08-01 12:58:24 -07002157 }
Eric Chengda14ceb2008-12-04 18:16:10 -08002158
2159 /*
2160 * Note that raw mode is enabled.
2161 */
yz147064490ed222006-04-20 23:53:19 -07002162 dsp->ds_mode = DLD_RAW;
Eric Chengda14ceb2008-12-04 18:16:10 -08002163 mac_perim_exit(mph);
2164
ericheng210db222005-08-01 12:58:24 -07002165 miocack(q, mp, 0, 0);
2166}
2167
2168/*
2169 * DLIOCHDRINFO
2170 */
2171static void
2172ioc_fast(dld_str_t *dsp, mblk_t *mp)
2173{
2174 dl_unitdata_req_t *dlp;
2175 off_t off;
2176 size_t len;
2177 const uint8_t *addr;
2178 uint16_t sap;
2179 mblk_t *nmp;
2180</