blob: 459914ab5dfdc4afcf06a47ed264b2c98b3dcb7f [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
meembd118332005-08-30 13:07:43 -070023 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070024 * Use is subject to license terms.
Daniel Hoffman48bbca82017-02-17 11:48:20 -080025 * Copyright (c) 2016 by Delphix. All rights reserved.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070026 */
27
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070028/*
29 * Multidata, as described in the following papers:
30 *
31 * Adi Masputra,
32 * Multidata V.2: VA-Disjoint Packet Extents Framework Interface
33 * Design Specification. August 2004.
34 * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf.
35 *
36 * Adi Masputra,
37 * Multidata Interface Design Specification. Sep 2002.
38 * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf.
39 *
40 * Adi Masputra, Frank DiMambro, Kacheong Poon,
41 * An Efficient Networking Transmit Mechanism for Solaris:
42 * Multidata Transmit (MDT). May 2002.
43 * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf.
44 */
45
46#include <sys/types.h>
47#include <sys/stream.h>
48#include <sys/dlpi.h>
49#include <sys/stropts.h>
50#include <sys/strsun.h>
51#include <sys/strlog.h>
52#include <sys/strsubr.h>
53#include <sys/sysmacros.h>
54#include <sys/cmn_err.h>
55#include <sys/debug.h>
56#include <sys/kmem.h>
57#include <sys/atomic.h>
58
59#include <sys/multidata.h>
60#include <sys/multidata_impl.h>
61
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070062static int mmd_constructor(void *, void *, int);
63static void mmd_destructor(void *, void *);
64static int pdslab_constructor(void *, void *, int);
65static void pdslab_destructor(void *, void *);
66static int pattbl_constructor(void *, void *, int);
67static void pattbl_destructor(void *, void *);
68static void mmd_esballoc_free(caddr_t);
69static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int);
70
71static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *);
72#pragma inline(pbuf_ref_valid)
73
74static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *);
75#pragma inline(pdi_in_range)
76
77static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int);
78#pragma inline(mmd_addpdesc_int)
79
80static void mmd_destroy_pattbl(patbkt_t **);
81#pragma inline(mmd_destroy_pattbl)
82
83static pattr_t *mmd_find_pattr(patbkt_t *, uint_t);
84#pragma inline(mmd_find_pattr)
85
86static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *);
87#pragma inline(mmd_destroy_pdesc)
88
89static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t,
90 boolean_t);
91#pragma inline(mmd_getpdesc)
92
93static struct kmem_cache *mmd_cache;
94static struct kmem_cache *pd_slab_cache;
95static struct kmem_cache *pattbl_cache;
96
97int mmd_debug = 1;
98#define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s
99
100/*
101 * Set to this to true to bypass pdesc bounds checking.
102 */
103boolean_t mmd_speed_over_safety = B_FALSE;
104
105/*
106 * Patchable kmem_cache flags.
107 */
108int mmd_kmem_flags = 0;
109int pdslab_kmem_flags = 0;
110int pattbl_kmem_flags = 0;
111
112/*
113 * Alignment (in bytes) of our kmem caches.
114 */
115#define MULTIDATA_CACHE_ALIGN 64
116
117/*
118 * Default number of packet descriptors per descriptor slab. Making
119 * this too small will trigger more descriptor slab allocation; making
120 * it too large will create too many unclaimed descriptors.
121 */
122#define PDSLAB_SZ 15
123uint_t pdslab_sz = PDSLAB_SZ;
124
125/*
126 * Default attribute hash table size. It's okay to set this to a small
127 * value (even to 1) because there aren't that many attributes currently
128 * defined, and because we assume there won't be many attributes associated
129 * with a Multidata at a given time. Increasing the size will reduce
130 * attribute search time (given a large number of attributes in a Multidata),
131 * and decreasing it will reduce the memory footprints and the overhead
132 * associated with managing the table.
133 */
134#define PATTBL_SZ 1
135uint_t pattbl_sz = PATTBL_SZ;
136
137/*
138 * Attribute hash key.
139 */
140#define PATTBL_HASH(x, sz) ((x) % (sz))
141
142/*
143 * Structure that precedes each Multidata metadata.
144 */
145struct mmd_buf_info {
146 frtn_t frp; /* free routine */
147 uint_t buf_len; /* length of kmem buffer */
148};
149
150/*
151 * The size of each metadata buffer.
152 */
153#define MMD_CACHE_SIZE \
154 (sizeof (struct mmd_buf_info) + sizeof (multidata_t))
155
156/*
157 * Called during startup in order to create the Multidata kmem caches.
158 */
159void
160mmd_init(void)
161{
162 pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */
163 pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */
164
165 mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE,
166 MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor,
167 NULL, NULL, NULL, mmd_kmem_flags);
168
169 pd_slab_cache = kmem_cache_create("multidata_pdslab",
170 PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN,
171 pdslab_constructor, pdslab_destructor, NULL,
172 (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags);
173
174 pattbl_cache = kmem_cache_create("multidata_pattbl",
175 sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN,
176 pattbl_constructor, pattbl_destructor, NULL,
177 (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags);
178}
179
180/*
181 * Create a Multidata message block.
182 */
183multidata_t *
184mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags)
185{
186 uchar_t *buf;
187 multidata_t *mmd;
188 uint_t mmd_mplen;
189 struct mmd_buf_info *buf_info;
190
191 ASSERT(hdr_mp != NULL);
192 ASSERT(mmd_mp != NULL);
193
194 /*
195 * Caller should never pass in a chain of mblks since we
196 * only care about the first one, hence the assertions.
197 */
198 ASSERT(hdr_mp->b_cont == NULL);
199
200 if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL)
201 return (NULL);
202
203 buf_info = (struct mmd_buf_info *)buf;
204 buf_info->frp.free_arg = (caddr_t)buf;
205
206 mmd = (multidata_t *)(buf_info + 1);
207 mmd_mplen = sizeof (*mmd);
208
209 if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI,
210 &(buf_info->frp))) == NULL) {
211 kmem_cache_free(mmd_cache, buf);
212 return (NULL);
213 }
214
215 DB_TYPE(*mmd_mp) = M_MULTIDATA;
216 (*mmd_mp)->b_wptr += mmd_mplen;
217 mmd->mmd_dp = (*mmd_mp)->b_datap;
218 mmd->mmd_hbuf = hdr_mp;
219
220 return (mmd);
221}
222
223/*
224 * Associate additional payload buffer to the Multidata.
225 */
226int
227mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp)
228{
229 int i;
230
231 ASSERT(mmd != NULL);
232 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
233 ASSERT(pld_mp != NULL);
234
235 mutex_enter(&mmd->mmd_pd_slab_lock);
236 for (i = 0; i < MULTIDATA_MAX_PBUFS &&
237 mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) {
238 if (mmd->mmd_pbuf[i] == pld_mp) {
239 /* duplicate entry */
240 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding "
241 "pld 0x%p to mmd 0x%p since it has been "
242 "previously added into slot %d (total %d)\n",
243 (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt));
244 mutex_exit(&mmd->mmd_pd_slab_lock);
245 return (-1);
246 } else if (mmd->mmd_pbuf[i] == NULL) {
247 mmd->mmd_pbuf[i] = pld_mp;
248 mmd->mmd_pbuf_cnt++;
249 mutex_exit(&mmd->mmd_pd_slab_lock);
250 return (i);
251 }
252 }
253
254 /* all slots are taken */
255 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p "
256 "since no slot space is left (total %d max %d)\n", (void *)pld_mp,
257 (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS));
258 mutex_exit(&mmd->mmd_pd_slab_lock);
259
260 return (-1);
261}
262
263/*
264 * Multidata metadata kmem cache constructor routine.
265 */
266/* ARGSUSED */
267static int
268mmd_constructor(void *buf, void *cdrarg, int kmflags)
269{
270 struct mmd_buf_info *buf_info;
271 multidata_t *mmd;
272
273 bzero((void *)buf, MMD_CACHE_SIZE);
274
275 buf_info = (struct mmd_buf_info *)buf;
276 buf_info->frp.free_func = mmd_esballoc_free;
277 buf_info->buf_len = MMD_CACHE_SIZE;
278
279 mmd = (multidata_t *)(buf_info + 1);
280 mmd->mmd_magic = MULTIDATA_MAGIC;
281
282 mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL);
283 QL_INIT(&(mmd->mmd_pd_slab_q));
284 QL_INIT(&(mmd->mmd_pd_q));
285
286 return (0);
287}
288
289/*
290 * Multidata metadata kmem cache destructor routine.
291 */
292/* ARGSUSED */
293static void
294mmd_destructor(void *buf, void *cdrarg)
295{
296 multidata_t *mmd;
297#ifdef DEBUG
298 int i;
299#endif
300
301 mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info));
302
303 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
304 ASSERT(mmd->mmd_dp == NULL);
305 ASSERT(mmd->mmd_hbuf == NULL);
306 ASSERT(mmd->mmd_pbuf_cnt == 0);
307#ifdef DEBUG
308 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++)
309 ASSERT(mmd->mmd_pbuf[i] == NULL);
310#endif
311 ASSERT(mmd->mmd_pattbl == NULL);
312
313 mutex_destroy(&(mmd->mmd_pd_slab_lock));
314 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
315 ASSERT(mmd->mmd_slab_cnt == 0);
316 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
317 ASSERT(mmd->mmd_pd_cnt == 0);
318 ASSERT(mmd->mmd_hbuf_ref == 0);
319 ASSERT(mmd->mmd_pbuf_ref == 0);
320}
321
322/*
323 * Multidata message block free callback routine.
324 */
325static void
326mmd_esballoc_free(caddr_t buf)
327{
328 multidata_t *mmd;
329 pdesc_t *pd;
330 pdesc_slab_t *slab;
331 int i;
332
333 ASSERT(buf != NULL);
334 ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE);
335
336 mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info));
337 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
338
339 ASSERT(mmd->mmd_dp != NULL);
340 ASSERT(mmd->mmd_dp->db_ref == 1);
341
342 /* remove all packet descriptors and private attributes */
343 pd = Q2PD(mmd->mmd_pd_q.ql_next);
344 while (pd != Q2PD(&(mmd->mmd_pd_q)))
345 pd = mmd_destroy_pdesc(mmd, pd);
346
347 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
348 ASSERT(mmd->mmd_pd_cnt == 0);
349 ASSERT(mmd->mmd_hbuf_ref == 0);
350 ASSERT(mmd->mmd_pbuf_ref == 0);
351
352 /* remove all global attributes */
353 if (mmd->mmd_pattbl != NULL)
354 mmd_destroy_pattbl(&(mmd->mmd_pattbl));
355
356 /* remove all descriptor slabs */
357 slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next);
358 while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) {
359 pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next);
360
361 remque(&(slab->pds_next));
362 slab->pds_next = NULL;
363 slab->pds_prev = NULL;
364 slab->pds_mmd = NULL;
365 slab->pds_used = 0;
366 kmem_cache_free(pd_slab_cache, slab);
367
368 ASSERT(mmd->mmd_slab_cnt > 0);
369 mmd->mmd_slab_cnt--;
370 slab = slab_next;
371 }
372 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
373 ASSERT(mmd->mmd_slab_cnt == 0);
374
375 mmd->mmd_dp = NULL;
376
377 /* finally, free all associated message blocks */
378 if (mmd->mmd_hbuf != NULL) {
379 freeb(mmd->mmd_hbuf);
380 mmd->mmd_hbuf = NULL;
381 }
382
383 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) {
384 if (mmd->mmd_pbuf[i] != NULL) {
385 freeb(mmd->mmd_pbuf[i]);
386 mmd->mmd_pbuf[i] = NULL;
387 ASSERT(mmd->mmd_pbuf_cnt > 0);
388 mmd->mmd_pbuf_cnt--;
389 }
390 }
391
392 ASSERT(mmd->mmd_pbuf_cnt == 0);
393 ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock)));
394 kmem_cache_free(mmd_cache, buf);
395}
396
397/*
398 * Multidata message block copy routine, called by copyb() when it
399 * encounters a M_MULTIDATA data block type. This routine should
400 * not be called by anyone other than copyb(), since it may go away
401 * (read: become static to this module) once some sort of copy callback
402 * routine is made available.
403 */
404mblk_t *
405mmd_copy(mblk_t *bp, int kmflags)
406{
407 multidata_t *mmd, *n_mmd;
408 mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS];
409 mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1];
410 mblk_t **pmp;
411 mblk_t *n_bp = NULL;
412 pdesc_t *pd;
413 uint_t n_pbuf_cnt = 0;
414 int idx, i;
415
416#define FREE_PBUFS() { \
417 for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \
418 if (*pmp != NULL) freeb(*pmp); \
419}
420
421#define REL_OFF(p, base, n_base) \
422 ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base))
423
424 ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA);
425 mmd = mmd_getmultidata(bp);
426
427 /* copy the header buffer */
428 if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL)
429 return (NULL);
430
431 /* copy the payload buffer(s) */
432 mutex_enter(&mmd->mmd_pd_slab_lock);
433 bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS);
434 n_pbuf_cnt = mmd->mmd_pbuf_cnt;
435 for (i = 0; i < n_pbuf_cnt; i++) {
436 ASSERT(mmd->mmd_pbuf[i] != NULL);
437 n_pbuf[i] = copyb(mmd->mmd_pbuf[i]);
438 if (n_pbuf[i] == NULL) {
439 FREE_PBUFS();
440 mutex_exit(&mmd->mmd_pd_slab_lock);
441 return (NULL);
442 }
443 }
444
445 /* allocate new Multidata */
446 n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags);
447 if (n_mmd == NULL) {
448 if (n_hbuf != NULL)
449 freeb(n_hbuf);
450 if (n_pbuf_cnt != 0)
451 FREE_PBUFS();
452 mutex_exit(&mmd->mmd_pd_slab_lock);
453 return (NULL);
454 }
455
456 /*
457 * Add payload buffer(s); upon success, leave n_pbuf array
458 * alone, as the newly-created Multidata had already contained
459 * the mblk pointers stored in the array. These will be freed
460 * along with the Multidata itself.
461 */
462 for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) {
463 idx = mmd_addpldbuf(n_mmd, *pmp);
464 if (idx < 0) {
465 FREE_PBUFS();
466 freeb(n_bp);
467 mutex_exit(&mmd->mmd_pd_slab_lock);
468 return (NULL);
469 }
470 }
471
472 /* copy over global attributes */
473 if (mmd->mmd_pattbl != NULL &&
474 mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) {
475 freeb(n_bp);
476 mutex_exit(&mmd->mmd_pd_slab_lock);
477 return (NULL);
478 }
479
480 /* copy over packet descriptors and their atttributes */
481 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */
482 while (pd != NULL) {
483 pdesc_t *n_pd;
484 pdescinfo_t *pdi, n_pdi;
485 uchar_t *n_base, *base;
486 pdesc_t *pd_next;
487
488 /* next pdesc */
489 pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL,
490 1, B_TRUE);
491
492 /* skip if already removed */
493 if (pd->pd_flags & PDESC_REM_DEFER) {
494 pd = pd_next;
495 continue;
496 }
497
498 pdi = &(pd->pd_pdi);
499 bzero(&n_pdi, sizeof (n_pdi));
500
501 /*
502 * Calculate new descriptor values based on the offset of
503 * each pointer relative to the associated buffer(s).
504 */
505 ASSERT(pdi->flags & PDESC_HAS_REF);
506 if (pdi->flags & PDESC_HBUF_REF) {
507 n_base = n_mmd->mmd_hbuf->b_rptr;
508 base = mmd->mmd_hbuf->b_rptr;
509
510 n_pdi.flags |= PDESC_HBUF_REF;
511 n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base);
512 n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base);
513 n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base);
514 n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base);
515 }
516
517 if (pdi->flags & PDESC_PBUF_REF) {
518 n_pdi.flags |= PDESC_PBUF_REF;
519 n_pdi.pld_cnt = pdi->pld_cnt;
520
521 for (i = 0; i < pdi->pld_cnt; i++) {
522 idx = pdi->pld_ary[i].pld_pbuf_idx;
523 ASSERT(idx < MULTIDATA_MAX_PBUFS);
524 ASSERT(n_mmd->mmd_pbuf[idx] != NULL);
525 ASSERT(mmd->mmd_pbuf[idx] != NULL);
526
527 n_base = n_mmd->mmd_pbuf[idx]->b_rptr;
528 base = mmd->mmd_pbuf[idx]->b_rptr;
529
530 n_pdi.pld_ary[i].pld_pbuf_idx = idx;
531
532 /*
533 * We can't copy the pointers just like that,
534 * so calculate the relative offset.
535 */
536 n_pdi.pld_ary[i].pld_rptr =
537 REL_OFF(pdi->pld_ary[i].pld_rptr,
538 base, n_base);
539 n_pdi.pld_ary[i].pld_wptr =
540 REL_OFF(pdi->pld_ary[i].pld_wptr,
541 base, n_base);
542 }
543 }
544
545 /* add the new descriptor to the new Multidata */
546 n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags);
547
548 if (n_pd == NULL || (pd->pd_pattbl != NULL &&
549 mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) {
550 freeb(n_bp);
551 mutex_exit(&mmd->mmd_pd_slab_lock);
552 return (NULL);
553 }
554
555 pd = pd_next;
556 }
557#undef REL_OFF
558#undef FREE_PBUFS
559
560 mutex_exit(&mmd->mmd_pd_slab_lock);
561 return (n_bp);
562}
563
564/*
565 * Given a Multidata message block, return the Multidata metadata handle.
566 */
567multidata_t *
568mmd_getmultidata(mblk_t *mp)
569{
570 multidata_t *mmd;
571
572 ASSERT(mp != NULL);
573
574 if (DB_TYPE(mp) != M_MULTIDATA)
575 return (NULL);
576
577 mmd = (multidata_t *)mp->b_rptr;
578 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
579
580 return (mmd);
581}
582
583/*
584 * Return the start and end addresses of the associated buffer(s).
585 */
586void
587mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi)
588{
589 int i;
590
591 ASSERT(mmd != NULL);
592 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
593 ASSERT(mbi != NULL);
594
595 bzero((void *)mbi, sizeof (mbufinfo_t));
596
597 if (mmd->mmd_hbuf != NULL) {
598 mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr;
599 mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr;
600 }
601
602 mutex_enter(&mmd->mmd_pd_slab_lock);
603 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
604 ASSERT(mmd->mmd_pbuf[i] != NULL);
605 mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr;
606 mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr;
607
608 }
609 mbi->pbuf_cnt = mmd->mmd_pbuf_cnt;
610 mutex_exit(&mmd->mmd_pd_slab_lock);
611}
612
613/*
614 * Return the Multidata statistics.
615 */
616uint_t
617mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref)
618{
619 uint_t pd_cnt;
620
621 ASSERT(mmd != NULL);
622 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
623
624 mutex_enter(&(mmd->mmd_pd_slab_lock));
625 if (hbuf_ref != NULL)
626 *hbuf_ref = mmd->mmd_hbuf_ref;
627 if (pbuf_ref != NULL)
628 *pbuf_ref = mmd->mmd_pbuf_ref;
629 pd_cnt = mmd->mmd_pd_cnt;
630 mutex_exit(&(mmd->mmd_pd_slab_lock));
631
632 return (pd_cnt);
633}
634
635#define HBUF_REF_VALID(mmd, pdi) \
636 ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \
637 (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \
638 (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \
639 (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \
640 (pdi)->hdr_base <= (pdi)->hdr_rptr && \
641 (pdi)->hdr_lim >= (pdi)->hdr_wptr && \
642 (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \
643 MBLKIN((mmd)->mmd_hbuf, \
644 (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \
645 PDESC_HDRSIZE(pdi)))
646
647/*
648 * Bounds check payload area(s).
649 */
650static boolean_t
651pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi)
652{
653 int i = 0, idx;
654 boolean_t valid = B_TRUE;
655 struct pld_ary_s *pa;
656
657 mutex_enter(&mmd->mmd_pd_slab_lock);
658 if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) {
659 mutex_exit(&mmd->mmd_pd_slab_lock);
660 return (B_FALSE);
661 }
662
663 pa = &pdi->pld_ary[0];
664 while (valid && i < pdi->pld_cnt) {
665 valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) &&
666 pa->pld_rptr != NULL && pa->pld_wptr != NULL &&
667 pa->pld_wptr >= pa->pld_rptr &&
668 pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr &&
669 MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr -
670 mmd->mmd_pbuf[idx]->b_rptr),
671 PDESC_PLD_SPAN_SIZE(pdi, i)));
672
673 if (!valid) {
674 MMD_DEBUG((CE_WARN,
675 "pbuf_ref_valid: pdi 0x%p pld out of bound; "
676 "index %d has pld_cnt %d pbuf_idx %d "
677 "(mmd_pbuf_cnt %d), "
678 "pld_rptr 0x%p pld_wptr 0x%p len %d "
679 "(valid 0x%p-0x%p len %d)\n", (void *)pdi,
680 i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt,
681 (void *)pa->pld_rptr,
682 (void *)pa->pld_wptr,
683 (int)PDESC_PLD_SPAN_SIZE(pdi, i),
684 (void *)mmd->mmd_pbuf[idx]->b_rptr,
685 (void *)mmd->mmd_pbuf[idx]->b_wptr,
686 (int)MBLKL(mmd->mmd_pbuf[idx])));
687 }
688
689 /* advance to next entry */
690 i++;
691 pa++;
692 }
693
694 mutex_exit(&mmd->mmd_pd_slab_lock);
695 return (valid);
696}
697
698/*
699 * Add a packet descriptor to the Multidata.
700 */
701pdesc_t *
702mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
703{
704 ASSERT(mmd != NULL);
705 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
706 ASSERT(pdi != NULL);
707 ASSERT(pdi->flags & PDESC_HAS_REF);
708
709 /* do the references refer to invalid memory regions? */
710 if (!mmd_speed_over_safety &&
711 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
712 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) {
713 if (err != NULL)
714 *err = EINVAL;
715 return (NULL);
716 }
717
718 return (mmd_addpdesc_int(mmd, pdi, err, kmflags));
719}
720
721/*
722 * Internal routine to add a packet descriptor, called when mmd_addpdesc
723 * or mmd_copy tries to allocate and add a descriptor to a Multidata.
724 */
725static pdesc_t *
726mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
727{
728 pdesc_slab_t *slab, *slab_last;
729 pdesc_t *pd;
730
731 ASSERT(pdi->flags & PDESC_HAS_REF);
732 ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi));
733 ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi));
734
735 if (err != NULL)
736 *err = 0;
737
738 mutex_enter(&(mmd->mmd_pd_slab_lock));
739 /*
740 * Is slab list empty or the last-added slab is full? If so,
741 * allocate new slab for the descriptor; otherwise, use the
742 * last-added slab instead.
743 */
744 slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev);
745 if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) ||
746 slab_last->pds_used == slab_last->pds_sz) {
747 slab = kmem_cache_alloc(pd_slab_cache, kmflags);
748 if (slab == NULL) {
749 if (err != NULL)
750 *err = ENOMEM;
751 mutex_exit(&(mmd->mmd_pd_slab_lock));
752 return (NULL);
753 }
754 slab->pds_mmd = mmd;
755
756 ASSERT(slab->pds_used == 0);
757 ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL);
758
759 /* insert slab at end of list */
760 insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev);
761 mmd->mmd_slab_cnt++;
762 } else {
763 slab = slab_last;
764 }
765 ASSERT(slab->pds_used < slab->pds_sz);
766 pd = &(slab->pds_free_desc[slab->pds_used++]);
767 ASSERT(pd->pd_magic == PDESC_MAGIC);
768 pd->pd_next = NULL;
769 pd->pd_prev = NULL;
770 pd->pd_slab = slab;
771 pd->pd_pattbl = NULL;
772
773 /* copy over the descriptor info from caller */
774 PDI_COPY(pdi, &(pd->pd_pdi));
775
776 if (pd->pd_flags & PDESC_HBUF_REF)
777 mmd->mmd_hbuf_ref++;
778 if (pd->pd_flags & PDESC_PBUF_REF)
779 mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt;
780 mmd->mmd_pd_cnt++;
781
782 /* insert descriptor at end of list */
783 insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev);
784 mutex_exit(&(mmd->mmd_pd_slab_lock));
785
786 return (pd);
787}
788
789/*
790 * Packet descriptor slab kmem cache constructor routine.
791 */
792/* ARGSUSED */
793static int
794pdslab_constructor(void *buf, void *cdrarg, int kmflags)
795{
796 pdesc_slab_t *slab;
797 uint_t cnt = (uint_t)(uintptr_t)cdrarg;
798 int i;
799
800 ASSERT(cnt > 0); /* slab size can't be zero */
801
802 slab = (pdesc_slab_t *)buf;
803 slab->pds_next = NULL;
804 slab->pds_prev = NULL;
805 slab->pds_mmd = NULL;
806 slab->pds_used = 0;
807 slab->pds_sz = cnt;
808
809 for (i = 0; i < cnt; i++) {
810 pdesc_t *pd = &(slab->pds_free_desc[i]);
811 pd->pd_magic = PDESC_MAGIC;
812 }
813 return (0);
814}
815
816/*
817 * Packet descriptor slab kmem cache destructor routine.
818 */
819/* ARGSUSED */
820static void
821pdslab_destructor(void *buf, void *cdrarg)
822{
823 pdesc_slab_t *slab;
824
825 slab = (pdesc_slab_t *)buf;
826 ASSERT(slab->pds_next == NULL);
827 ASSERT(slab->pds_prev == NULL);
828 ASSERT(slab->pds_mmd == NULL);
829 ASSERT(slab->pds_used == 0);
830 ASSERT(slab->pds_sz > 0);
831}
832
833/*
834 * Remove a packet descriptor from the in-use descriptor list,
835 * called by mmd_rempdesc or during free.
836 */
837static pdesc_t *
838mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd)
839{
840 pdesc_t *pd_next;
841
842 pd_next = Q2PD(pd->pd_next);
843 remque(&(pd->pd_next));
844
845 /* remove all local attributes */
846 if (pd->pd_pattbl != NULL)
847 mmd_destroy_pattbl(&(pd->pd_pattbl));
848
849 /* don't decrease counts for a removed descriptor */
850 if (!(pd->pd_flags & PDESC_REM_DEFER)) {
851 if (pd->pd_flags & PDESC_HBUF_REF) {
852 ASSERT(mmd->mmd_hbuf_ref > 0);
853 mmd->mmd_hbuf_ref--;
854 }
855 if (pd->pd_flags & PDESC_PBUF_REF) {
856 ASSERT(mmd->mmd_pbuf_ref > 0);
857 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
858 }
859 ASSERT(mmd->mmd_pd_cnt > 0);
860 mmd->mmd_pd_cnt--;
861 }
862 return (pd_next);
863}
864
865/*
866 * Remove a packet descriptor from the Multidata.
867 */
868void
869mmd_rempdesc(pdesc_t *pd)
870{
871 multidata_t *mmd;
872
873 ASSERT(pd->pd_magic == PDESC_MAGIC);
874 ASSERT(pd->pd_slab != NULL);
875
876 mmd = pd->pd_slab->pds_mmd;
877 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
878
879 mutex_enter(&(mmd->mmd_pd_slab_lock));
880 /*
881 * We can't deallocate the associated resources if the Multidata
882 * is shared with other threads, because it's possible that the
883 * descriptor handle value is held by those threads. That's why
884 * we simply mark the entry as "removed" and decrement the counts.
885 * If there are no other threads, then we free the descriptor.
886 */
887 if (mmd->mmd_dp->db_ref > 1) {
888 pd->pd_flags |= PDESC_REM_DEFER;
889 if (pd->pd_flags & PDESC_HBUF_REF) {
890 ASSERT(mmd->mmd_hbuf_ref > 0);
891 mmd->mmd_hbuf_ref--;
892 }
893 if (pd->pd_flags & PDESC_PBUF_REF) {
894 ASSERT(mmd->mmd_pbuf_ref > 0);
895 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
896 }
897 ASSERT(mmd->mmd_pd_cnt > 0);
898 mmd->mmd_pd_cnt--;
899 } else {
900 (void) mmd_destroy_pdesc(mmd, pd);
901 }
902 mutex_exit(&(mmd->mmd_pd_slab_lock));
903}
904
905/*
906 * A generic routine to traverse the packet descriptor in-use list.
907 */
908static pdesc_t *
909mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw,
910 boolean_t mutex_held)
911{
912 pdesc_t *pd_head;
913
914 ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd);
915 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
916 ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock)));
917
918 if (!mutex_held)
919 mutex_enter(&(mmd->mmd_pd_slab_lock));
920 pd_head = Q2PD(&(mmd->mmd_pd_q));
921
922 if (pd == NULL) {
923 /*
924 * We're called by mmd_get{first,last}pdesc, and so
925 * return either the first or last list element.
926 */
927 pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) :
928 Q2PD(mmd->mmd_pd_q.ql_prev);
929 } else {
930 /*
931 * We're called by mmd_get{next,prev}pdesc, and so
932 * return either the next or previous list element.
933 */
934 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
935 }
936
937 while (pd != pd_head) {
938 /* skip element if it has been removed */
939 if (!(pd->pd_flags & PDESC_REM_DEFER))
940 break;
941 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
942 }
943 if (!mutex_held)
944 mutex_exit(&(mmd->mmd_pd_slab_lock));
945
946 /* return NULL if we're back at the beginning */
947 if (pd == pd_head)
948 pd = NULL;
949
950 /* got an entry; copy descriptor info to caller */
951 if (pd != NULL && pdi != NULL)
952 PDI_COPY(&(pd->pd_pdi), pdi);
953
954 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
955 return (pd);
956
957}
958
959/*
960 * Return the first packet descriptor in the in-use list.
961 */
962pdesc_t *
963mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi)
964{
965 return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE));
966}
967
968/*
969 * Return the last packet descriptor in the in-use list.
970 */
971pdesc_t *
972mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi)
973{
974 return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE));
975}
976
977/*
978 * Return the next packet descriptor in the in-use list.
979 */
980pdesc_t *
981mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi)
982{
983 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE));
984}
985
986/*
987 * Return the previous packet descriptor in the in-use list.
988 */
989pdesc_t *
990mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi)
991{
992 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE));
993}
994
995/*
996 * Check to see if pdi stretches over c_pdi; used to ensure that a packet
997 * descriptor's header and payload span may not be extended beyond the
998 * current boundaries.
999 */
1000static boolean_t
1001pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi)
1002{
1003 int i;
1004 struct pld_ary_s *pa = &pdi->pld_ary[0];
1005 struct pld_ary_s *c_pa = &c_pdi->pld_ary[0];
1006
1007 if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim)
1008 return (B_FALSE);
1009
1010 /*
1011 * We don't allow the number of span to be reduced, for the sake
1012 * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to
1013 * clear a packet descriptor. Note that we allow the span count to
1014 * be increased, and the bounds check for the new one happens
1015 * in pbuf_ref_valid.
1016 */
1017 if (pdi->pld_cnt < c_pdi->pld_cnt)
1018 return (B_FALSE);
1019
1020 /* compare only those which are currently defined */
1021 for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) {
1022 if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx ||
1023 pa->pld_rptr < c_pa->pld_rptr ||
1024 pa->pld_wptr > c_pa->pld_wptr)
1025 return (B_FALSE);
1026 }
1027 return (B_TRUE);
1028}
1029
1030/*
1031 * Modify the layout of a packet descriptor.
1032 */
1033pdesc_t *
1034mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi)
1035{
1036 multidata_t *mmd;
1037 pdescinfo_t *c_pdi;
1038
1039 ASSERT(pd != NULL);
1040 ASSERT(pdi != NULL);
1041 ASSERT(pd->pd_magic == PDESC_MAGIC);
1042
1043 mmd = pd->pd_slab->pds_mmd;
1044 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1045
1046 /* entry has been removed */
1047 if (pd->pd_flags & PDESC_REM_DEFER)
1048 return (NULL);
1049
1050 /* caller doesn't intend to specify any buffer reference? */
1051 if (!(pdi->flags & PDESC_HAS_REF))
1052 return (NULL);
1053
1054 /* do the references refer to invalid memory regions? */
1055 if (!mmd_speed_over_safety &&
1056 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
1057 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi))))
1058 return (NULL);
1059
1060 /* they're not subsets of current references? */
1061 c_pdi = &(pd->pd_pdi);
1062 if (!pdi_in_range(pdi, c_pdi))
1063 return (NULL);
1064
1065 /* copy over the descriptor info from caller */
1066 PDI_COPY(pdi, c_pdi);
1067
1068 return (pd);
1069}
1070
1071/*
1072 * Copy the contents of a packet descriptor into a new buffer. If the
1073 * descriptor points to more than one buffer fragments, the contents
1074 * of both fragments will be joined, with the header buffer fragment
1075 * preceding the payload buffer fragment(s).
1076 */
1077mblk_t *
1078mmd_transform(pdesc_t *pd)
1079{
1080 multidata_t *mmd;
1081 pdescinfo_t *pdi;
1082 mblk_t *mp;
1083 int h_size = 0, p_size = 0;
1084 int i, len;
1085
1086 ASSERT(pd != NULL);
1087 ASSERT(pd->pd_magic == PDESC_MAGIC);
1088
1089 mmd = pd->pd_slab->pds_mmd;
1090 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1091
1092 /* entry has been removed */
1093 if (pd->pd_flags & PDESC_REM_DEFER)
1094 return (NULL);
1095
1096 mutex_enter(&mmd->mmd_pd_slab_lock);
1097 pdi = &(pd->pd_pdi);
1098 if (pdi->flags & PDESC_HBUF_REF)
1099 h_size = PDESC_HDRL(pdi);
1100 if (pdi->flags & PDESC_PBUF_REF) {
1101 for (i = 0; i < pdi->pld_cnt; i++)
1102 p_size += PDESC_PLD_SPAN_SIZE(pdi, i);
1103 }
1104
1105 /* allocate space large enough to hold the fragment(s) */
1106 ASSERT(h_size + p_size >= 0);
1107 if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) {
1108 mutex_exit(&mmd->mmd_pd_slab_lock);
1109 return (NULL);
1110 }
1111
1112 /* copy over the header fragment */
1113 if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) {
1114 bcopy(pdi->hdr_rptr, mp->b_wptr, h_size);
1115 mp->b_wptr += h_size;
1116 }
1117
1118 /* copy over the payload fragment */
1119 if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) {
1120 for (i = 0; i < pdi->pld_cnt; i++) {
1121 len = PDESC_PLD_SPAN_SIZE(pdi, i);
1122 if (len > 0) {
1123 bcopy(pdi->pld_ary[i].pld_rptr,
1124 mp->b_wptr, len);
1125 mp->b_wptr += len;
1126 }
1127 }
1128 }
1129
1130 mutex_exit(&mmd->mmd_pd_slab_lock);
1131 return (mp);
1132}
1133
1134/*
1135 * Return a chain of mblks representing the Multidata packet.
1136 */
1137mblk_t *
1138mmd_transform_link(pdesc_t *pd)
1139{
1140 multidata_t *mmd;
1141 pdescinfo_t *pdi;
1142 mblk_t *nmp = NULL;
1143
1144 ASSERT(pd != NULL);
1145 ASSERT(pd->pd_magic == PDESC_MAGIC);
1146
1147 mmd = pd->pd_slab->pds_mmd;
1148 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1149
1150 /* entry has been removed */
1151 if (pd->pd_flags & PDESC_REM_DEFER)
1152 return (NULL);
1153
1154 pdi = &(pd->pd_pdi);
1155
1156 /* duplicate header buffer */
1157 if ((pdi->flags & PDESC_HBUF_REF)) {
1158 if ((nmp = dupb(mmd->mmd_hbuf)) == NULL)
1159 return (NULL);
1160 nmp->b_rptr = pdi->hdr_rptr;
1161 nmp->b_wptr = pdi->hdr_wptr;
1162 }
1163
1164 /* duplicate payload buffer(s) */
1165 if (pdi->flags & PDESC_PBUF_REF) {
1166 int i;
1167 mblk_t *mp;
1168 struct pld_ary_s *pa = &pdi->pld_ary[0];
1169
1170 mutex_enter(&mmd->mmd_pd_slab_lock);
1171 for (i = 0; i < pdi->pld_cnt; i++, pa++) {
1172 ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL);
1173
1174 /* skip empty ones */
1175 if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0)
1176 continue;
1177
1178 mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]);
1179 if (mp == NULL) {
1180 if (nmp != NULL)
1181 freemsg(nmp);
1182 mutex_exit(&mmd->mmd_pd_slab_lock);
1183 return (NULL);
1184 }
1185 mp->b_rptr = pa->pld_rptr;
1186 mp->b_wptr = pa->pld_wptr;
1187 if (nmp == NULL)
1188 nmp = mp;
1189 else
1190 linkb(nmp, mp);
1191 }
1192 mutex_exit(&mmd->mmd_pd_slab_lock);
1193 }
1194
1195 return (nmp);
1196}
1197
1198/*
1199 * Return duplicate message block(s) of the associated buffer(s).
1200 */
1201int
1202mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp)
1203{
1204 ASSERT(mmd != NULL);
1205 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1206
1207 if (hmp != NULL) {
1208 *hmp = NULL;
1209 if (mmd->mmd_hbuf != NULL &&
1210 (*hmp = dupb(mmd->mmd_hbuf)) == NULL)
1211 return (-1);
1212 }
1213
1214 if (pmp != NULL) {
1215 int i;
1216 mblk_t *mp;
1217
1218 mutex_enter(&mmd->mmd_pd_slab_lock);
1219 *pmp = NULL;
1220 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1221 ASSERT(mmd->mmd_pbuf[i] != NULL);
1222 mp = dupb(mmd->mmd_pbuf[i]);
1223 if (mp == NULL) {
1224 if (hmp != NULL && *hmp != NULL)
1225 freeb(*hmp);
1226 if (*pmp != NULL)
1227 freemsg(*pmp);
1228 mutex_exit(&mmd->mmd_pd_slab_lock);
1229 return (-1);
1230 }
1231 if (*pmp == NULL)
1232 *pmp = mp;
1233 else
1234 linkb(*pmp, mp);
1235 }
1236 mutex_exit(&mmd->mmd_pd_slab_lock);
1237 }
1238
1239 return (0);
1240}
1241
1242/*
1243 * Return the layout of a packet descriptor.
1244 */
1245int
1246mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi)
1247{
1248 ASSERT(pd != NULL);
1249 ASSERT(pd->pd_magic == PDESC_MAGIC);
1250 ASSERT(pd->pd_slab != NULL);
1251 ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC);
1252 ASSERT(pdi != NULL);
1253
1254 /* entry has been removed */
1255 if (pd->pd_flags & PDESC_REM_DEFER)
1256 return (-1);
1257
1258 /* copy descriptor info to caller */
1259 PDI_COPY(&(pd->pd_pdi), pdi);
1260
1261 return (0);
1262}
1263
1264/*
1265 * Add a global or local attribute to a Multidata. Global attribute
1266 * association is specified by a NULL packet descriptor.
1267 */
1268pattr_t *
1269mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai,
1270 boolean_t persistent, int kmflags)
1271{
1272 patbkt_t **tbl_p;
1273 patbkt_t *tbl, *o_tbl;
1274 patbkt_t *bkt;
1275 pattr_t *pa;
1276 uint_t size;
1277
1278 ASSERT(mmd != NULL);
1279 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1280 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
1281 ASSERT(pai != NULL);
1282
1283 /* pointer to the attribute hash table (local or global) */
1284 tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl);
1285
1286 /*
1287 * See if the hash table has not yet been created; if so,
1288 * we create the table and store its address atomically.
1289 */
1290 if ((tbl = *tbl_p) == NULL) {
1291 tbl = kmem_cache_alloc(pattbl_cache, kmflags);
1292 if (tbl == NULL)
1293 return (NULL);
1294
Daniel Hoffman48bbca82017-02-17 11:48:20 -08001295 /* if someone got there first, use their table instead */
Josef 'Jeff' Sipek75d94462014-08-08 10:27:20 -04001296 if ((o_tbl = atomic_cas_ptr(tbl_p, NULL, tbl)) != NULL) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001297 kmem_cache_free(pattbl_cache, tbl);
1298 tbl = o_tbl;
1299 }
1300 }
1301
1302 ASSERT(tbl->pbkt_tbl_sz > 0);
1303 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1304
1305 /* attribute of the same type already exists? */
1306 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL)
1307 return (NULL);
1308
1309 size = sizeof (*pa) + pai->len;
1310 if ((pa = kmem_zalloc(size, kmflags)) == NULL)
1311 return (NULL);
1312
1313 pa->pat_magic = PATTR_MAGIC;
1314 pa->pat_lock = &(bkt->pbkt_lock);
1315 pa->pat_mmd = mmd;
1316 pa->pat_buflen = size;
1317 pa->pat_type = pai->type;
1318 pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL;
1319
1320 if (persistent)
1321 pa->pat_flags = PATTR_PERSIST;
1322
1323 /* insert attribute at end of hash chain */
1324 mutex_enter(&(bkt->pbkt_lock));
1325 insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev);
1326 mutex_exit(&(bkt->pbkt_lock));
1327
1328 return (pa);
1329}
1330
1331/*
1332 * Attribute hash table kmem cache constructor routine.
1333 */
1334/* ARGSUSED */
1335static int
1336pattbl_constructor(void *buf, void *cdrarg, int kmflags)
1337{
1338 patbkt_t *bkt;
1339 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1340 uint_t i;
1341
1342 ASSERT(tbl_sz > 0); /* table size can't be zero */
1343
1344 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1345 mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL);
1346 QL_INIT(&(bkt->pbkt_pattr_q));
1347
1348 /* first bucket contains the table size */
1349 bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0;
1350 }
1351 return (0);
1352}
1353
1354/*
1355 * Attribute hash table kmem cache destructor routine.
1356 */
1357/* ARGSUSED */
1358static void
1359pattbl_destructor(void *buf, void *cdrarg)
1360{
1361 patbkt_t *bkt;
1362 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1363 uint_t i;
1364
1365 ASSERT(tbl_sz > 0); /* table size can't be zero */
1366
1367 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1368 mutex_destroy(&(bkt->pbkt_lock));
1369 ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q));
1370 ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz);
1371 }
1372}
1373
1374/*
1375 * Destroy an attribute hash table, called by mmd_rempdesc or during free.
1376 */
1377static void
1378mmd_destroy_pattbl(patbkt_t **tbl)
1379{
1380 patbkt_t *bkt;
1381 pattr_t *pa, *pa_next;
1382 uint_t i, tbl_sz;
1383
1384 ASSERT(tbl != NULL);
1385 bkt = *tbl;
1386 tbl_sz = bkt->pbkt_tbl_sz;
1387
1388 /* make sure caller passes in the first bucket */
1389 ASSERT(tbl_sz > 0);
1390
1391 /* destroy the contents of each bucket */
1392 for (i = 0; i < tbl_sz; i++, bkt++) {
1393 /* we ought to be exclusive at this point */
1394 ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock)));
1395
1396 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1397 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1398 ASSERT(pa->pat_magic == PATTR_MAGIC);
1399 pa_next = Q2PATTR(pa->pat_next);
1400 remque(&(pa->pat_next));
1401 kmem_free(pa, pa->pat_buflen);
1402 pa = pa_next;
1403 }
1404 }
1405
1406 kmem_cache_free(pattbl_cache, *tbl);
1407 *tbl = NULL;
1408
1409 /* commit all previous stores */
1410 membar_producer();
1411}
1412
1413/*
1414 * Copy the contents of an attribute hash table, called by mmd_copy.
1415 */
1416static int
1417mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd,
1418 int kmflags)
1419{
1420 patbkt_t *bkt;
1421 pattr_t *pa;
1422 pattrinfo_t pai;
1423 uint_t i, tbl_sz;
1424
1425 ASSERT(src_tbl != NULL);
1426 bkt = src_tbl;
1427 tbl_sz = bkt->pbkt_tbl_sz;
1428
1429 /* make sure caller passes in the first bucket */
1430 ASSERT(tbl_sz > 0);
1431
1432 for (i = 0; i < tbl_sz; i++, bkt++) {
1433 mutex_enter(&(bkt->pbkt_lock));
1434 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1435 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1436 pattr_t *pa_next = Q2PATTR(pa->pat_next);
1437
1438 /* skip if it's removed */
1439 if (pa->pat_flags & PATTR_REM_DEFER) {
1440 pa = pa_next;
1441 continue;
1442 }
1443
1444 pai.type = pa->pat_type;
1445 pai.len = pa->pat_buflen - sizeof (*pa);
1446 if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags &
1447 PATTR_PERSIST) != 0, kmflags) == NULL) {
1448 mutex_exit(&(bkt->pbkt_lock));
1449 return (-1);
1450 }
1451
1452 /* copy over the contents */
1453 if (pai.buf != NULL)
1454 bcopy(pa + 1, pai.buf, pai.len);
1455
1456 pa = pa_next;
1457 }
1458 mutex_exit(&(bkt->pbkt_lock));
1459 }
1460
1461 return (0);
1462}
1463
1464/*
1465 * Search for an attribute type within an attribute hash bucket.
1466 */
1467static pattr_t *
1468mmd_find_pattr(patbkt_t *bkt, uint_t type)
1469{
1470 pattr_t *pa_head, *pa;
1471
1472 mutex_enter(&(bkt->pbkt_lock));
1473 pa_head = Q2PATTR(&(bkt->pbkt_pattr_q));
1474 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1475
1476 while (pa != pa_head) {
1477 ASSERT(pa->pat_magic == PATTR_MAGIC);
1478
1479 /* return a match; we treat removed entry as non-existent */
1480 if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER))
1481 break;
1482 pa = Q2PATTR(pa->pat_next);
1483 }
1484 mutex_exit(&(bkt->pbkt_lock));
1485
1486 return (pa == pa_head ? NULL : pa);
1487}
1488
1489/*
1490 * Remove an attribute from a Multidata.
1491 */
1492void
1493mmd_rempattr(pattr_t *pa)
1494{
1495 kmutex_t *pat_lock = pa->pat_lock;
1496
1497 ASSERT(pa->pat_magic == PATTR_MAGIC);
1498
1499 /* ignore if attribute was marked as persistent */
1500 if ((pa->pat_flags & PATTR_PERSIST) != 0)
1501 return;
1502
1503 mutex_enter(pat_lock);
1504 /*
1505 * We can't deallocate the associated resources if the Multidata
1506 * is shared with other threads, because it's possible that the
1507 * attribute handle value is held by those threads. That's why
1508 * we simply mark the entry as "removed". If there are no other
1509 * threads, then we free the attribute.
1510 */
1511 if (pa->pat_mmd->mmd_dp->db_ref > 1) {
1512 pa->pat_flags |= PATTR_REM_DEFER;
1513 } else {
1514 remque(&(pa->pat_next));
1515 kmem_free(pa, pa->pat_buflen);
1516 }
1517 mutex_exit(pat_lock);
1518}
1519
1520/*
1521 * Find an attribute (according to its type) and return its handle.
1522 */
1523pattr_t *
1524mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai)
1525{
1526 patbkt_t *tbl, *bkt;
1527 pattr_t *pa;
1528
1529 ASSERT(mmd != NULL);
1530 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1531 ASSERT(pai != NULL);
1532
1533 /* get the right attribute hash table (local or global) */
1534 tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl;
1535
1536 /* attribute hash table doesn't exist? */
1537 if (tbl == NULL)
1538 return (NULL);
1539
1540 ASSERT(tbl->pbkt_tbl_sz > 0);
1541 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1542
1543 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) {
1544 ASSERT(pa->pat_buflen >= sizeof (*pa));
1545 pai->len = pa->pat_buflen - sizeof (*pa);
1546 pai->buf = pai->len > 0 ?
1547 (uchar_t *)pa + sizeof (pattr_t) : NULL;
1548 }
1549 ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC);
1550 return (pa);
1551}
1552
1553/*
1554 * Return total size of buffers and total size of areas referenced
1555 * by all in-use (unremoved) packet descriptors.
1556 */
1557void
1558mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse)
1559{
1560 pdesc_t *pd;
1561 pdescinfo_t *pdi;
1562 int i;
1563
1564 ASSERT(mmd != NULL);
1565 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1566
1567 mutex_enter(&mmd->mmd_pd_slab_lock);
1568 if (ptotal != NULL) {
1569 *ptotal = 0;
1570
1571 if (mmd->mmd_hbuf != NULL)
1572 *ptotal += MBLKL(mmd->mmd_hbuf);
1573
1574 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1575 ASSERT(mmd->mmd_pbuf[i] != NULL);
1576 *ptotal += MBLKL(mmd->mmd_pbuf[i]);
1577 }
1578 }
1579 if (pinuse != NULL) {
1580 *pinuse = 0;
1581
1582 /* first pdesc */
1583 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE);
1584 while (pd != NULL) {
1585 pdi = &pd->pd_pdi;
1586
1587 /* next pdesc */
1588 pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE);
1589
1590 /* skip over removed descriptor */
1591 if (pdi->flags & PDESC_REM_DEFER)
1592 continue;
1593
1594 if (pdi->flags & PDESC_HBUF_REF)
1595 *pinuse += PDESC_HDRL(pdi);
1596
1597 if (pdi->flags & PDESC_PBUF_REF) {
1598 for (i = 0; i < pdi->pld_cnt; i++)
1599 *pinuse += PDESC_PLDL(pdi, i);
1600 }
1601 }
1602 }
1603 mutex_exit(&mmd->mmd_pd_slab_lock);
1604}