Rishi Srivatsavai | 4eaa471 | 2009-09-10 15:11:49 -0400 | [diff] [blame] | 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
| 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
| 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
| 21 | |
| 22 | /* |
Rishi Srivatsavai | 6f40bf6 | 2010-01-13 12:18:35 -0500 | [diff] [blame^] | 23 | * Copyright 2010 Sun Microsystems, Inc. All rights reserved. |
Rishi Srivatsavai | 4eaa471 | 2009-09-10 15:11:49 -0400 | [diff] [blame] | 24 | * Use is subject to license terms. |
| 25 | */ |
| 26 | |
| 27 | /* |
| 28 | * This module supports AF_TRILL sockets and TRILL layer-2 forwarding. |
| 29 | */ |
| 30 | |
| 31 | #include <sys/strsubr.h> |
| 32 | #include <sys/socket.h> |
| 33 | #include <sys/socketvar.h> |
| 34 | #include <sys/modctl.h> |
| 35 | #include <sys/cmn_err.h> |
| 36 | #include <sys/tihdr.h> |
| 37 | #include <sys/strsun.h> |
| 38 | #include <sys/policy.h> |
| 39 | #include <sys/ethernet.h> |
| 40 | #include <sys/vlan.h> |
| 41 | #include <net/trill.h> |
| 42 | #include <net/if_dl.h> |
| 43 | #include <sys/mac.h> |
| 44 | #include <sys/mac_client.h> |
| 45 | #include <sys/mac_provider.h> |
| 46 | #include <sys/mac_client_priv.h> |
| 47 | #include <sys/sdt.h> |
| 48 | #include <sys/dls.h> |
| 49 | #include <sys/sunddi.h> |
| 50 | |
| 51 | #include "trill_impl.h" |
| 52 | |
| 53 | static void trill_del_all(trill_inst_t *, boolean_t); |
| 54 | static int trill_del_nick(trill_inst_t *, uint16_t, boolean_t); |
| 55 | static void trill_stop_recv(trill_sock_t *); |
| 56 | static void trill_ctrl_input(trill_sock_t *, mblk_t *, const uint8_t *, |
| 57 | uint16_t); |
| 58 | static trill_node_t *trill_node_lookup(trill_inst_t *, uint16_t); |
| 59 | static void trill_node_unref(trill_inst_t *, trill_node_t *); |
| 60 | static void trill_sock_unref(trill_sock_t *); |
| 61 | static void trill_kstats_init(trill_sock_t *, const char *); |
| 62 | |
| 63 | static list_t trill_inst_list; |
| 64 | static krwlock_t trill_inst_rwlock; |
| 65 | |
| 66 | static sock_lower_handle_t trill_create(int, int, int, sock_downcalls_t **, |
| 67 | uint_t *, int *, int, cred_t *); |
| 68 | |
| 69 | static smod_reg_t sinfo = { |
| 70 | SOCKMOD_VERSION, |
| 71 | "trill", |
| 72 | SOCK_UC_VERSION, |
| 73 | SOCK_DC_VERSION, |
| 74 | trill_create, |
| 75 | NULL, |
| 76 | }; |
| 77 | |
| 78 | /* modldrv structure */ |
| 79 | static struct modlsockmod sockmod = { |
| 80 | &mod_sockmodops, "AF_TRILL socket module", &sinfo |
| 81 | }; |
| 82 | |
| 83 | /* modlinkage structure */ |
| 84 | static struct modlinkage ml = { |
| 85 | MODREV_1, |
| 86 | &sockmod, |
| 87 | NULL |
| 88 | }; |
| 89 | |
| 90 | #define VALID_NICK(n) ((n) != RBRIDGE_NICKNAME_NONE && \ |
| 91 | (n) != RBRIDGE_NICKNAME_UNUSED) |
| 92 | |
| 93 | static mblk_t * |
| 94 | create_trill_header(trill_sock_t *tsock, mblk_t *mp, const uint8_t *daddr, |
| 95 | boolean_t trill_hdr_ok, boolean_t multidest, uint16_t tci, |
| 96 | size_t msglen) |
| 97 | { |
| 98 | int extra_hdr_len; |
| 99 | struct ether_vlan_header *ethvlanhdr; |
| 100 | mblk_t *hdr_mp; |
| 101 | uint16_t etype; |
| 102 | |
| 103 | etype = msglen > 0 ? (uint16_t)msglen : ETHERTYPE_TRILL; |
| 104 | |
| 105 | /* When sending on the PVID, we must not give a VLAN ID */ |
| 106 | if (tci == tsock->ts_link->bl_pvid) |
| 107 | tci = TRILL_NO_TCI; |
| 108 | |
| 109 | /* |
| 110 | * Create new Ethernet header and include additional space |
| 111 | * for writing TRILL header and/or VLAN tag. |
| 112 | */ |
| 113 | extra_hdr_len = (trill_hdr_ok ? 0 : sizeof (trill_header_t)) + |
| 114 | (tci != TRILL_NO_TCI ? sizeof (struct ether_vlan_extinfo) : 0); |
| 115 | hdr_mp = mac_header(tsock->ts_link->bl_mh, daddr, |
| 116 | tci != TRILL_NO_TCI ? ETHERTYPE_VLAN : etype, mp, extra_hdr_len); |
| 117 | if (hdr_mp == NULL) { |
| 118 | freemsg(mp); |
| 119 | return (NULL); |
| 120 | } |
| 121 | |
| 122 | if (tci != TRILL_NO_TCI) { |
| 123 | /* LINTED: alignment */ |
| 124 | ethvlanhdr = (struct ether_vlan_header *)hdr_mp->b_rptr; |
| 125 | ethvlanhdr->ether_tci = htons(tci); |
| 126 | ethvlanhdr->ether_type = htons(etype); |
| 127 | hdr_mp->b_wptr += sizeof (struct ether_vlan_extinfo); |
| 128 | } |
| 129 | |
| 130 | if (!trill_hdr_ok) { |
| 131 | trill_header_t *thp; |
| 132 | /* LINTED: alignment */ |
| 133 | thp = (trill_header_t *)hdr_mp->b_wptr; |
| 134 | (void) memset(thp, 0, sizeof (trill_header_t)); |
| 135 | thp->th_hopcount = TRILL_DEFAULT_HOPS; |
| 136 | thp->th_multidest = (multidest ? 1:0); |
| 137 | hdr_mp->b_wptr += sizeof (trill_header_t); |
| 138 | } |
| 139 | |
| 140 | hdr_mp->b_cont = mp; |
| 141 | return (hdr_mp); |
| 142 | } |
| 143 | |
| 144 | /* |
| 145 | * TRILL local recv function. TRILL data frames that should be received |
| 146 | * by the local system are decapsulated here and passed to bridging for |
| 147 | * learning and local system receive. Only called when we are the forwarder |
| 148 | * on the link (multi-dest frames) or the frame was destined for us. |
| 149 | */ |
| 150 | static void |
| 151 | trill_recv_local(trill_sock_t *tsock, mblk_t *mp, uint16_t ingressnick) |
| 152 | { |
| 153 | struct ether_header *inner_ethhdr; |
| 154 | |
| 155 | /* LINTED: alignment */ |
| 156 | inner_ethhdr = (struct ether_header *)mp->b_rptr; |
| 157 | DTRACE_PROBE1(trill__recv__local, struct ether_header *, inner_ethhdr); |
| 158 | |
| 159 | DB_CKSUMFLAGS(mp) = 0; |
| 160 | /* |
| 161 | * Transmit the decapsulated frame on the link via Bridging. |
| 162 | * Bridging does source address learning and appropriate forwarding. |
| 163 | */ |
| 164 | bridge_trill_decaps(tsock->ts_link, mp, ingressnick); |
| 165 | KSPINCR(tks_decap); |
| 166 | } |
| 167 | |
| 168 | /* |
| 169 | * Determines the outgoing link to reach a RBridge having the given nick |
| 170 | * Assumes caller has acquired the trill instance rwlock. |
| 171 | */ |
| 172 | static trill_sock_t * |
| 173 | find_trill_link(trill_inst_t *tip, datalink_id_t linkid) |
| 174 | { |
| 175 | trill_sock_t *tsp = NULL; |
| 176 | |
| 177 | ASSERT(RW_LOCK_HELD(&tip->ti_rwlock)); |
| 178 | for (tsp = list_head(&tip->ti_socklist); tsp != NULL; |
| 179 | tsp = list_next(&tip->ti_socklist, tsp)) { |
| 180 | if (tsp->ts_link != NULL && tsp->ts_link->bl_linkid == linkid) { |
| 181 | ASSERT(tsp->ts_link->bl_mh != NULL); |
| 182 | ASSERT(!(tsp->ts_flags & TSF_SHUTDOWN)); |
| 183 | atomic_inc_uint(&tsp->ts_refs); |
| 184 | break; |
| 185 | } |
| 186 | } |
| 187 | return (tsp); |
| 188 | } |
| 189 | |
| 190 | /* |
| 191 | * TRILL destination forwarding function. Transmits the TRILL data packet |
| 192 | * to the next-hop, adjacent RBridge. Consumes passed mblk_t. |
| 193 | */ |
| 194 | static void |
| 195 | trill_dest_fwd(trill_inst_t *tip, mblk_t *fwd_mp, uint16_t adj_nick, |
| 196 | boolean_t has_trill_hdr, boolean_t multidest, uint16_t dtnick) |
| 197 | { |
| 198 | trill_node_t *adj; |
| 199 | trill_sock_t *tsock = NULL; |
| 200 | trill_header_t *trillhdr; |
| 201 | struct ether_header *ethhdr; |
| 202 | int ethtype; |
| 203 | int ethhdrlen; |
| 204 | |
| 205 | adj = trill_node_lookup(tip, adj_nick); |
| 206 | if (adj == NULL || ((tsock = adj->tn_tsp) == NULL)) |
| 207 | goto dest_fwd_fail; |
| 208 | |
| 209 | ASSERT(tsock->ts_link != NULL); |
| 210 | ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); |
| 211 | ASSERT(adj->tn_ni != NULL); |
| 212 | |
| 213 | DTRACE_PROBE3(trill__dest__fwd, uint16_t, adj_nick, trill_node_t, |
| 214 | adj, trill_sock_t, tsock); |
| 215 | |
| 216 | /* |
| 217 | * For broadcast links by using the dest address of |
| 218 | * the RBridge to forward the frame should result in |
| 219 | * savings. When the link is a bridged LAN or there are |
| 220 | * many end stations the frame will not always be flooded. |
| 221 | */ |
| 222 | fwd_mp = create_trill_header(tsock, fwd_mp, adj->tn_ni->tni_adjsnpa, |
| 223 | has_trill_hdr, multidest, tsock->ts_desigvlan, 0); |
| 224 | if (fwd_mp == NULL) |
| 225 | goto dest_fwd_fail; |
| 226 | |
| 227 | /* LINTED: alignment */ |
| 228 | ethhdr = (struct ether_header *)fwd_mp->b_rptr; |
| 229 | ethtype = ntohs(ethhdr->ether_type); |
| 230 | ASSERT(ethtype == ETHERTYPE_VLAN || ethtype == ETHERTYPE_TRILL); |
| 231 | |
| 232 | /* Pullup Ethernet and TRILL header (w/o TRILL options) */ |
| 233 | ethhdrlen = sizeof (struct ether_header) + |
| 234 | (ethtype == ETHERTYPE_VLAN ? sizeof (struct ether_vlan_extinfo):0); |
| 235 | if (!pullupmsg(fwd_mp, ethhdrlen + sizeof (trill_header_t))) |
| 236 | goto dest_fwd_fail; |
| 237 | /* LINTED: alignment */ |
| 238 | trillhdr = (struct trill_header *)(fwd_mp->b_rptr + ethhdrlen); |
| 239 | |
| 240 | /* Update TRILL header with ingress and egress nicks for new frames */ |
| 241 | if (!has_trill_hdr) { |
| 242 | /* We are creating a new TRILL frame */ |
| 243 | trillhdr->th_egressnick = (multidest ? dtnick:adj_nick); |
| 244 | rw_enter(&tip->ti_rwlock, RW_READER); |
| 245 | trillhdr->th_ingressnick = tip->ti_nick; |
| 246 | rw_exit(&tip->ti_rwlock); |
| 247 | if (!VALID_NICK(trillhdr->th_ingressnick)) |
| 248 | goto dest_fwd_fail; |
| 249 | } |
| 250 | |
| 251 | /* Set hop count and update header in packet */ |
| 252 | ASSERT(trillhdr->th_hopcount != 0); |
| 253 | trillhdr->th_hopcount--; |
| 254 | |
| 255 | /* Clear checksum flag and transmit frame on the link */ |
| 256 | DB_CKSUMFLAGS(fwd_mp) = 0; |
| 257 | DTRACE_PROBE1(trill__dest__fwd__tx, trill_header_t *, &trillhdr); |
| 258 | fwd_mp = bridge_trill_output(tsock->ts_link, fwd_mp); |
| 259 | if (fwd_mp == NULL) { |
| 260 | KSPINCR(tks_sent); |
| 261 | KSPINCR(tks_forward); |
| 262 | } else { |
| 263 | freemsg(fwd_mp); |
| 264 | KSPINCR(tks_drops); |
| 265 | } |
| 266 | trill_node_unref(tip, adj); |
| 267 | return; |
| 268 | |
| 269 | dest_fwd_fail: |
| 270 | if (adj != NULL) |
| 271 | trill_node_unref(tip, adj); |
| 272 | if (tsock != NULL) |
| 273 | KSPINCR(tks_drops); |
| 274 | freemsg(fwd_mp); |
| 275 | } |
| 276 | |
| 277 | /* |
| 278 | * TRILL multi-destination forwarding. Transmits the packet to the adjacencies |
| 279 | * on the distribution tree determined by the egress nick. Source addr (saddr) |
| 280 | * is NULL for new TRILL packets originating from us. |
| 281 | */ |
| 282 | static void |
| 283 | trill_multidest_fwd(trill_inst_t *tip, mblk_t *mp, uint16_t egressnick, |
| 284 | uint16_t ingressnick, boolean_t is_trill_pkt, const uint8_t *saddr, |
| 285 | int inner_vlan, boolean_t free_mblk) |
| 286 | { |
| 287 | int idx; |
| 288 | uint16_t adjnick; |
| 289 | trill_node_t *dest; |
| 290 | trill_node_t *adj; |
| 291 | mblk_t *fwd_mp; |
| 292 | boolean_t nicksaved = B_FALSE; |
| 293 | uint16_t adjnicksaved; |
| 294 | |
| 295 | /* Lookup the egress nick info, this is the DT root */ |
| 296 | if ((dest = trill_node_lookup(tip, egressnick)) == NULL) |
| 297 | goto fail_multidest_fwd; |
| 298 | |
| 299 | /* Send a copy to all our adjacencies on the DT root */ |
| 300 | ASSERT(dest->tn_ni); |
| 301 | for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { |
| 302 | |
| 303 | /* Check for a valid adjacency node */ |
| 304 | adjnick = TNI_ADJNICK(dest->tn_ni, idx); |
| 305 | if (!VALID_NICK(adjnick) || ingressnick == adjnick || |
| 306 | ((adj = trill_node_lookup(tip, adjnick)) == NULL)) |
| 307 | continue; |
| 308 | |
| 309 | /* Do not forward back to adjacency that sent the pkt to us */ |
| 310 | ASSERT(adj->tn_ni != NULL); |
| 311 | if ((saddr != NULL) && |
| 312 | (memcmp(adj->tn_ni->tni_adjsnpa, saddr, |
| 313 | ETHERADDRL) == 0)) { |
| 314 | trill_node_unref(tip, adj); |
| 315 | continue; |
| 316 | } |
| 317 | |
| 318 | /* Check if adj is marked as reaching inner VLAN downstream */ |
| 319 | if ((inner_vlan != VLAN_ID_NONE) && |
| 320 | !TRILL_VLANISSET(TNI_VLANFILTERMAP(dest->tn_ni, idx), |
| 321 | inner_vlan)) { |
| 322 | trill_node_unref(tip, adj); |
| 323 | DTRACE_PROBE4(trill__multi__dest__fwd__vlanfiltered, |
| 324 | uint16_t, adjnick, uint16_t, ingressnick, |
| 325 | uint16_t, egressnick, int, inner_vlan); |
| 326 | continue; |
| 327 | } |
| 328 | |
| 329 | trill_node_unref(tip, adj); |
| 330 | |
| 331 | /* |
| 332 | * Save the nick and look ahead to see if we should forward the |
| 333 | * frame to more adjacencies. We avoid doing a copy for this |
| 334 | * nick and use the passed mblk when we can consume the passed |
| 335 | * mblk. |
| 336 | */ |
| 337 | if (free_mblk && !nicksaved) { |
| 338 | adjnicksaved = adjnick; |
| 339 | nicksaved = B_TRUE; |
| 340 | continue; |
| 341 | } |
| 342 | |
| 343 | fwd_mp = copymsg(mp); |
| 344 | if (fwd_mp == NULL) |
| 345 | break; |
| 346 | DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, |
| 347 | adjnick, uint16_t, ingressnick); |
| 348 | trill_dest_fwd(tip, fwd_mp, adjnick, is_trill_pkt, |
| 349 | B_TRUE, egressnick); |
| 350 | } |
| 351 | trill_node_unref(tip, dest); |
| 352 | |
| 353 | if (nicksaved) { |
| 354 | ASSERT(free_mblk); |
| 355 | DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, |
| 356 | adjnicksaved, uint16_t, ingressnick); |
| 357 | trill_dest_fwd(tip, mp, adjnicksaved, is_trill_pkt, |
| 358 | B_TRUE, egressnick); |
| 359 | return; |
| 360 | } |
| 361 | |
| 362 | fail_multidest_fwd: |
| 363 | DTRACE_PROBE2(trill__multi__dest__fwd__fail, uint16_t, |
| 364 | egressnick, uint16_t, ingressnick); |
| 365 | if (free_mblk) { |
| 366 | freemsg(mp); |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | /* |
| 371 | * TRILL data receive function. Forwards the received frame if necessary |
| 372 | * and also determines if the received frame should be consumed locally. |
| 373 | * Consumes passed mblk. |
| 374 | */ |
| 375 | static void |
| 376 | trill_recv(trill_sock_t *tsock, mblk_t *mp, const uint8_t *mpsaddr) |
| 377 | { |
| 378 | trill_header_t *trillhdr; |
| 379 | trill_node_t *dest = NULL; |
| 380 | trill_node_t *source = NULL; |
| 381 | trill_node_t *adj; |
| 382 | uint16_t ournick, adjnick, treeroot; |
| 383 | struct ether_header *ethhdr; |
| 384 | trill_inst_t *tip = tsock->ts_tip; |
| 385 | uint8_t srcaddr[ETHERADDRL]; |
| 386 | size_t trillhdrlen; |
| 387 | int inner_vlan = VLAN_ID_NONE; |
| 388 | int tci; |
| 389 | int idx; |
| 390 | size_t min_size; |
| 391 | |
| 392 | /* Copy Ethernet source address before modifying packet */ |
| 393 | (void) memcpy(srcaddr, mpsaddr, ETHERADDRL); |
| 394 | |
| 395 | /* Pull up TRILL header if necessary. */ |
| 396 | min_size = sizeof (trill_header_t); |
| 397 | if ((MBLKL(mp) < min_size || |
| 398 | !IS_P2ALIGNED(mp->b_rptr, TRILL_HDR_ALIGN)) && |
| 399 | !pullupmsg(mp, min_size)) |
| 400 | goto fail; |
| 401 | |
| 402 | /* LINTED: alignment */ |
| 403 | trillhdr = (trill_header_t *)mp->b_rptr; |
| 404 | if (trillhdr->th_version != TRILL_PROTOCOL_VERS) { |
| 405 | DTRACE_PROBE1(trill__recv__wrongversion, |
| 406 | trill_header_t *, trillhdr); |
| 407 | goto fail; |
| 408 | } |
| 409 | |
| 410 | /* Drop if unknown or invalid nickname */ |
| 411 | if (!VALID_NICK(trillhdr->th_egressnick) || |
| 412 | !VALID_NICK(trillhdr->th_ingressnick)) { |
| 413 | DTRACE_PROBE1(trill__recv__invalidnick, |
| 414 | trill_header_t *, trillhdr); |
| 415 | goto fail; |
| 416 | } |
| 417 | |
| 418 | rw_enter(&tip->ti_rwlock, RW_READER); |
| 419 | ournick = tip->ti_nick; |
| 420 | treeroot = tip->ti_treeroot; |
| 421 | rw_exit(&tip->ti_rwlock); |
| 422 | /* Drop if we received a packet with our nick as ingress */ |
| 423 | if (trillhdr->th_ingressnick == ournick) |
| 424 | goto fail; |
| 425 | |
| 426 | /* Re-pull any TRILL options and inner Ethernet header */ |
| 427 | min_size += GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t) + |
| 428 | sizeof (struct ether_header); |
| 429 | if (MBLKL(mp) < min_size) { |
| 430 | if (!pullupmsg(mp, min_size)) |
| 431 | goto fail; |
| 432 | /* LINTED: alignment */ |
| 433 | trillhdr = (trill_header_t *)mp->b_rptr; |
| 434 | } |
| 435 | trillhdrlen = sizeof (trill_header_t) + |
| 436 | (GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t)); |
| 437 | |
| 438 | /* |
| 439 | * Get the inner Ethernet header, plus the inner VLAN header if there |
| 440 | * is one. |
| 441 | */ |
| 442 | /* LINTED: alignment */ |
| 443 | ethhdr = (struct ether_header *)(mp->b_rptr + trillhdrlen); |
| 444 | if (ethhdr->ether_type == htons(ETHERTYPE_VLAN)) { |
| 445 | min_size += sizeof (struct ether_vlan_extinfo); |
| 446 | if (MBLKL(mp) < min_size) { |
| 447 | if (!pullupmsg(mp, min_size)) |
| 448 | goto fail; |
| 449 | /* LINTED: alignment */ |
| 450 | trillhdr = (trill_header_t *)mp->b_rptr; |
| 451 | /* LINTED: alignment */ |
| 452 | ethhdr = (struct ether_header *)(mp->b_rptr + |
| 453 | trillhdrlen); |
| 454 | } |
| 455 | |
| 456 | tci = ntohs(((struct ether_vlan_header *)ethhdr)->ether_tci); |
| 457 | inner_vlan = VLAN_ID(tci); |
| 458 | } |
| 459 | |
| 460 | /* Known/single destination forwarding. */ |
| 461 | if (!trillhdr->th_multidest) { |
| 462 | |
| 463 | /* Inner MacDA must be unicast */ |
| 464 | if (ethhdr->ether_dhost.ether_addr_octet[0] & 1) |
| 465 | goto fail; |
| 466 | |
| 467 | /* Ingress and Egress nicks must be different */ |
| 468 | if (trillhdr->th_egressnick == trillhdr->th_ingressnick) |
| 469 | goto fail; |
| 470 | |
| 471 | DTRACE_PROBE1(trill__recv__singledest, |
| 472 | trill_header_t *, trillhdr); |
| 473 | if (trillhdr->th_egressnick == ournick) { |
| 474 | mp->b_rptr += trillhdrlen; |
| 475 | trill_recv_local(tsock, mp, trillhdr->th_ingressnick); |
| 476 | } else if (trillhdr->th_hopcount > 0) { |
| 477 | trill_dest_fwd(tip, mp, trillhdr->th_egressnick, |
| 478 | B_TRUE, B_FALSE, RBRIDGE_NICKNAME_NONE); |
| 479 | } else { |
| 480 | goto fail; |
| 481 | } |
| 482 | return; |
| 483 | } |
| 484 | |
| 485 | /* |
| 486 | * Multi-destination frame: perform checks verifying we have |
| 487 | * received a valid multi-destination frame before receiving the |
| 488 | * frame locally and forwarding the frame to other RBridges. |
| 489 | * |
| 490 | * Check if we received this multi-destination frame on a |
| 491 | * adjacency in the distribution tree indicated by the frame's |
| 492 | * egress nickname. |
| 493 | */ |
| 494 | if ((dest = trill_node_lookup(tip, trillhdr->th_egressnick)) == NULL) |
| 495 | goto fail; |
| 496 | for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { |
| 497 | adjnick = TNI_ADJNICK(dest->tn_ni, idx); |
| 498 | if ((adj = trill_node_lookup(tip, adjnick)) == NULL) |
| 499 | continue; |
| 500 | if (memcmp(adj->tn_ni->tni_adjsnpa, srcaddr, ETHERADDRL) == 0) { |
| 501 | trill_node_unref(tip, adj); |
| 502 | break; |
| 503 | } |
| 504 | trill_node_unref(tip, adj); |
| 505 | } |
| 506 | |
| 507 | if (idx >= dest->tn_ni->tni_adjcount) { |
| 508 | DTRACE_PROBE2(trill__recv__multidest__adjcheckfail, |
| 509 | trill_header_t *, trillhdr, trill_node_t *, dest); |
| 510 | goto fail; |
| 511 | } |
| 512 | |
| 513 | /* |
| 514 | * Reverse path forwarding check. Check if the ingress RBridge |
| 515 | * that has forwarded the frame advertised the use of the |
| 516 | * distribution tree specified in the egress nick. |
| 517 | */ |
| 518 | if ((source = trill_node_lookup(tip, trillhdr->th_ingressnick)) == NULL) |
| 519 | goto fail; |
| 520 | for (idx = 0; idx < source->tn_ni->tni_dtrootcount; idx++) { |
| 521 | if (TNI_DTROOTNICK(source->tn_ni, idx) == |
| 522 | trillhdr->th_egressnick) |
| 523 | break; |
| 524 | } |
| 525 | |
| 526 | if (idx >= source->tn_ni->tni_dtrootcount) { |
| 527 | /* |
| 528 | * Allow receipt of forwarded frame with the highest |
| 529 | * tree root RBridge as the egress RBridge when the |
| 530 | * ingress RBridge has not advertised the use of any |
| 531 | * distribution trees. |
| 532 | */ |
| 533 | if (source->tn_ni->tni_dtrootcount != 0 || |
| 534 | trillhdr->th_egressnick != treeroot) { |
| 535 | DTRACE_PROBE3( |
| 536 | trill__recv__multidest__rpfcheckfail, |
| 537 | trill_header_t *, trillhdr, trill_node_t *, |
| 538 | source, trill_inst_t *, tip); |
| 539 | goto fail; |
| 540 | } |
| 541 | } |
| 542 | |
| 543 | /* Check hop count before doing any forwarding */ |
| 544 | if (trillhdr->th_hopcount == 0) |
| 545 | goto fail; |
| 546 | |
| 547 | /* Forward frame using the distribution tree specified by egress nick */ |
| 548 | DTRACE_PROBE2(trill__recv__multidest, trill_header_t *, |
| 549 | trillhdr, trill_node_t *, source); |
| 550 | trill_node_unref(tip, source); |
| 551 | trill_node_unref(tip, dest); |
| 552 | |
| 553 | /* Tell forwarding not to free if we're the link forwarder. */ |
| 554 | trill_multidest_fwd(tip, mp, trillhdr->th_egressnick, |
| 555 | trillhdr->th_ingressnick, B_TRUE, srcaddr, inner_vlan, |
| 556 | B_FALSE); |
| 557 | |
| 558 | /* |
| 559 | * Send de-capsulated frame locally if we are the link forwarder (also |
| 560 | * does bridge learning). |
| 561 | */ |
| 562 | mp->b_rptr += trillhdrlen; |
| 563 | trill_recv_local(tsock, mp, trillhdr->th_ingressnick); |
| 564 | KSPINCR(tks_recv); |
| 565 | return; |
| 566 | |
| 567 | fail: |
| 568 | DTRACE_PROBE2(trill__recv__multidest__fail, mblk_t *, mp, |
| 569 | trill_sock_t *, tsock); |
| 570 | if (dest != NULL) |
| 571 | trill_node_unref(tip, dest); |
| 572 | if (source != NULL) |
| 573 | trill_node_unref(tip, source); |
| 574 | freemsg(mp); |
| 575 | KSPINCR(tks_drops); |
| 576 | } |
| 577 | |
| 578 | static void |
| 579 | trill_stop_recv(trill_sock_t *tsock) |
| 580 | { |
| 581 | mutex_enter(&tsock->ts_socklock); |
| 582 | stop_retry: |
| 583 | if (tsock->ts_state == TS_UNBND || tsock->ts_link == NULL) { |
| 584 | mutex_exit(&tsock->ts_socklock); |
| 585 | return; |
| 586 | } |
| 587 | |
| 588 | /* |
| 589 | * If another thread is closing the socket then wait. Our callers |
| 590 | * expect us to return only after the socket is closed. |
| 591 | */ |
| 592 | if (tsock->ts_flags & TSF_CLOSEWAIT) { |
| 593 | cv_wait(&tsock->ts_sockclosewait, &tsock->ts_socklock); |
| 594 | goto stop_retry; |
| 595 | } |
| 596 | |
| 597 | /* |
| 598 | * Set state and flags to block new bind or close calls |
| 599 | * while we close the socket. |
| 600 | */ |
| 601 | tsock->ts_flags |= TSF_CLOSEWAIT; |
| 602 | |
| 603 | /* Wait until all AF_TRILL socket transmit operations are done */ |
| 604 | while (tsock->ts_sockthreadcount > 0) |
| 605 | cv_wait(&tsock->ts_sockthreadwait, &tsock->ts_socklock); |
| 606 | |
| 607 | /* |
| 608 | * We are guaranteed to be the only thread closing on the |
| 609 | * socket while the TSF_CLOSEWAIT flag is set, all others cv_wait |
| 610 | * for us to finish. |
| 611 | */ |
| 612 | ASSERT(tsock->ts_link != NULL); |
| 613 | if (tsock->ts_ksp != NULL) |
| 614 | kstat_delete(tsock->ts_ksp); |
| 615 | |
| 616 | /* |
| 617 | * Release lock before bridge_trill_lnunref to prevent deadlock |
| 618 | * between trill_ctrl_input thread waiting to acquire ts_socklock |
| 619 | * and bridge_trill_lnunref waiting for the trill thread to finish. |
| 620 | */ |
| 621 | mutex_exit(&tsock->ts_socklock); |
| 622 | |
| 623 | /* |
| 624 | * Release TRILL link reference from Bridging. On return from |
| 625 | * bridge_trill_lnunref we can be sure there are no active TRILL data |
| 626 | * threads for this link. |
| 627 | */ |
| 628 | bridge_trill_lnunref(tsock->ts_link); |
| 629 | |
| 630 | /* Set socket as unbound & wakeup threads waiting for socket to close */ |
| 631 | mutex_enter(&tsock->ts_socklock); |
| 632 | ASSERT(tsock->ts_link != NULL); |
| 633 | tsock->ts_link = NULL; |
| 634 | tsock->ts_state = TS_UNBND; |
| 635 | tsock->ts_flags &= ~TSF_CLOSEWAIT; |
| 636 | cv_broadcast(&tsock->ts_sockclosewait); |
| 637 | mutex_exit(&tsock->ts_socklock); |
| 638 | } |
| 639 | |
| 640 | static int |
| 641 | trill_start_recv(trill_sock_t *tsock, const struct sockaddr *sa, socklen_t len) |
| 642 | { |
| 643 | struct sockaddr_dl *lladdr = (struct sockaddr_dl *)sa; |
| 644 | datalink_id_t linkid; |
| 645 | int err = 0; |
| 646 | |
| 647 | if (len != sizeof (*lladdr)) |
| 648 | return (EINVAL); |
| 649 | |
| 650 | mutex_enter(&tsock->ts_socklock); |
| 651 | if (tsock->ts_tip == NULL || tsock->ts_state != TS_UNBND) { |
| 652 | err = EINVAL; |
| 653 | goto bind_error; |
| 654 | } |
| 655 | |
| 656 | if (tsock->ts_flags & TSF_CLOSEWAIT || tsock->ts_link != NULL) { |
| 657 | err = EBUSY; |
| 658 | goto bind_error; |
| 659 | } |
| 660 | |
| 661 | (void) memcpy(&(tsock->ts_lladdr), lladdr, |
| 662 | sizeof (struct sockaddr_dl)); |
| 663 | (void) memcpy(&linkid, tsock->ts_lladdr.sdl_data, |
| 664 | sizeof (datalink_id_t)); |
| 665 | |
| 666 | tsock->ts_link = bridge_trill_lnref(tsock->ts_tip->ti_binst, |
| 667 | linkid, tsock); |
| 668 | if (tsock->ts_link == NULL) { |
| 669 | err = EINVAL; |
| 670 | goto bind_error; |
| 671 | } |
| 672 | |
| 673 | trill_kstats_init(tsock, tsock->ts_tip->ti_bridgename); |
| 674 | tsock->ts_state = TS_IDLE; |
| 675 | |
| 676 | bind_error: |
| 677 | mutex_exit(&tsock->ts_socklock); |
| 678 | return (err); |
| 679 | } |
| 680 | |
| 681 | static int |
| 682 | trill_do_unbind(trill_sock_t *tsock) |
| 683 | { |
| 684 | /* If a bind has not been done, we can't unbind. */ |
| 685 | if (tsock->ts_state != TS_IDLE) |
| 686 | return (EINVAL); |
| 687 | |
| 688 | trill_stop_recv(tsock); |
| 689 | return (0); |
| 690 | } |
| 691 | |
| 692 | static void |
| 693 | trill_instance_unref(trill_inst_t *tip) |
| 694 | { |
| 695 | rw_enter(&trill_inst_rwlock, RW_WRITER); |
| 696 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 697 | if (atomic_dec_uint_nv(&tip->ti_refs) == 0) { |
| 698 | list_remove(&trill_inst_list, tip); |
| 699 | rw_exit(&tip->ti_rwlock); |
| 700 | rw_exit(&trill_inst_rwlock); |
| 701 | if (tip->ti_binst != NULL) |
| 702 | bridge_trill_brunref(tip->ti_binst); |
| 703 | list_destroy(&tip->ti_socklist); |
| 704 | rw_destroy(&tip->ti_rwlock); |
| 705 | kmem_free(tip, sizeof (*tip)); |
| 706 | } else { |
| 707 | rw_exit(&tip->ti_rwlock); |
| 708 | rw_exit(&trill_inst_rwlock); |
| 709 | } |
| 710 | } |
| 711 | |
| 712 | /* |
| 713 | * This is called when the bridge module receives a TRILL-encapsulated packet |
| 714 | * on a given link or a packet identified as "TRILL control." We must verify |
| 715 | * that it's for us (it almost certainly will be), and then either decapsulate |
| 716 | * (if it's to our nickname), forward (if it's to someone else), or send up one |
| 717 | * of the sockets (if it's control traffic). |
| 718 | * |
| 719 | * Sadly, on Ethernet, the control traffic is identified by Outer.MacDA, and |
| 720 | * not by TRILL header information. |
| 721 | */ |
| 722 | static void |
| 723 | trill_recv_pkt_cb(void *lptr, bridge_link_t *blp, mac_resource_handle_t rsrc, |
| 724 | mblk_t *mp, mac_header_info_t *hdr_info) |
| 725 | { |
| 726 | trill_sock_t *tsock = lptr; |
| 727 | |
| 728 | _NOTE(ARGUNUSED(rsrc)); |
| 729 | |
| 730 | ASSERT(tsock->ts_tip != NULL); |
| 731 | ASSERT(tsock->ts_link != NULL); |
| 732 | ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); |
| 733 | |
| 734 | /* |
| 735 | * Only receive packet if the source address is not multicast (which is |
| 736 | * bogus). |
| 737 | */ |
| 738 | if (hdr_info->mhi_saddr[0] & 1) |
| 739 | goto discard; |
| 740 | |
| 741 | /* |
| 742 | * Check if this is our own packet reflected back. It should not be. |
| 743 | */ |
| 744 | if (bcmp(hdr_info->mhi_saddr, blp->bl_local_mac, ETHERADDRL) == 0) |
| 745 | goto discard; |
| 746 | |
| 747 | /* Only receive unicast packet if addressed to us */ |
| 748 | if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST && |
| 749 | bcmp(hdr_info->mhi_daddr, blp->bl_local_mac, ETHERADDRL) != 0) |
| 750 | goto discard; |
| 751 | |
| 752 | if (hdr_info->mhi_bindsap == ETHERTYPE_TRILL) { |
| 753 | /* TRILL data packets */ |
| 754 | trill_recv(tsock, mp, hdr_info->mhi_saddr); |
| 755 | } else { |
| 756 | /* Design constraint for cheap IS-IS/BPDU comparison */ |
| 757 | ASSERT(all_isis_rbridges[4] != bridge_group_address[4]); |
| 758 | /* Send received control packet upstream */ |
| 759 | trill_ctrl_input(tsock, mp, hdr_info->mhi_saddr, |
| 760 | hdr_info->mhi_daddr[4] == all_isis_rbridges[4] ? |
| 761 | hdr_info->mhi_tci : TRILL_TCI_BPDU); |
| 762 | } |
| 763 | |
| 764 | return; |
| 765 | |
| 766 | discard: |
| 767 | freemsg(mp); |
| 768 | KSPINCR(tks_drops); |
| 769 | } |
| 770 | |
| 771 | /* |
| 772 | * This is called when the bridge module discovers that the destination address |
| 773 | * for a packet is not local -- it's through some remote node. We must verify |
| 774 | * that the remote node isn't our nickname (it shouldn't be), add a TRILL |
| 775 | * header, and then use the IS-IS data to determine which link and which |
| 776 | * next-hop RBridge should be used for output. We then transmit on that link. |
| 777 | * |
| 778 | * The egress_nick is RBRIDGE_NICKNAME_NONE for the "unknown destination" case. |
| 779 | */ |
| 780 | static void |
| 781 | trill_encap_pkt_cb(void *lptr, bridge_link_t *blp, mac_header_info_t *hdr_info, |
| 782 | mblk_t *mp, uint16_t egress_nick) |
| 783 | { |
| 784 | uint16_t ournick; |
| 785 | uint16_t dtnick; |
| 786 | trill_node_t *self = NULL; |
| 787 | trill_sock_t *tsock = lptr; |
| 788 | trill_inst_t *tip = tsock->ts_tip; |
| 789 | int vlan = VLAN_ID_NONE; |
| 790 | |
| 791 | _NOTE(ARGUNUSED(blp)); |
| 792 | ASSERT(hdr_info->mhi_bindsap != ETHERTYPE_TRILL); |
| 793 | |
| 794 | /* egress_nick = RBRIDGE_NICKNAME_NONE is valid */ |
| 795 | if (egress_nick != RBRIDGE_NICKNAME_NONE && !VALID_NICK(egress_nick)) |
| 796 | goto discard; |
| 797 | |
| 798 | /* Check if our own nick is valid before we do any forwarding */ |
| 799 | rw_enter(&tip->ti_rwlock, RW_READER); |
| 800 | ournick = tip->ti_nick; |
| 801 | dtnick = tip->ti_treeroot; |
| 802 | rw_exit(&tip->ti_rwlock); |
| 803 | if (!VALID_NICK(ournick)) |
| 804 | goto discard; |
| 805 | |
| 806 | /* |
| 807 | * For Multi-Destination forwarding determine our choice of |
| 808 | * root distribution tree. If we didn't choose a distribution |
| 809 | * tree (dtroots_count=0) then we use the highest priority tree |
| 810 | * root (t_treeroot) else we drop the packet without forwarding. |
| 811 | */ |
| 812 | if (egress_nick == RBRIDGE_NICKNAME_NONE) { |
| 813 | if ((self = trill_node_lookup(tip, ournick)) == NULL) |
| 814 | goto discard; |
| 815 | |
| 816 | /* |
| 817 | * Use the first DT configured for now. In future we |
| 818 | * should have DT selection code here. |
| 819 | */ |
| 820 | if (self->tn_ni->tni_dtrootcount > 0) { |
| 821 | dtnick = TNI_DTROOTNICK(self->tn_ni, 0); |
| 822 | } |
| 823 | |
| 824 | trill_node_unref(tip, self); |
| 825 | if (!VALID_NICK(dtnick)) { |
| 826 | DTRACE_PROBE(trill__fwd__packet__nodtroot); |
| 827 | goto discard; |
| 828 | } |
| 829 | } |
| 830 | |
| 831 | /* |
| 832 | * Retrieve VLAN ID of the native frame used for VLAN |
| 833 | * pruning of multi-destination frames. |
| 834 | */ |
| 835 | if (hdr_info->mhi_istagged) { |
| 836 | vlan = VLAN_ID(hdr_info->mhi_tci); |
| 837 | } |
| 838 | |
| 839 | DTRACE_PROBE2(trill__fwd__packet, mac_header_info_t *, hdr_info, |
| 840 | uint16_t, egress_nick); |
| 841 | if (egress_nick == RBRIDGE_NICKNAME_NONE) { |
| 842 | trill_multidest_fwd(tip, mp, dtnick, |
| 843 | ournick, B_FALSE, NULL, vlan, B_TRUE); |
| 844 | } else { |
| 845 | trill_dest_fwd(tip, mp, egress_nick, B_FALSE, B_FALSE, |
| 846 | RBRIDGE_NICKNAME_NONE); |
| 847 | } |
| 848 | KSPINCR(tks_encap); |
| 849 | return; |
| 850 | |
| 851 | discard: |
| 852 | freemsg(mp); |
| 853 | } |
| 854 | |
| 855 | /* |
| 856 | * This is called when the bridge module has completely torn down a bridge |
| 857 | * instance and all of the attached links. We need to make the TRILL instance |
| 858 | * go away at this point. |
| 859 | */ |
| 860 | static void |
| 861 | trill_br_dstr_cb(void *bptr, bridge_inst_t *bip) |
| 862 | { |
| 863 | trill_inst_t *tip = bptr; |
| 864 | |
| 865 | _NOTE(ARGUNUSED(bip)); |
| 866 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 867 | if (tip->ti_binst != NULL) |
| 868 | bridge_trill_brunref(tip->ti_binst); |
| 869 | tip->ti_binst = NULL; |
| 870 | rw_exit(&tip->ti_rwlock); |
| 871 | } |
| 872 | |
| 873 | /* |
| 874 | * This is called when the bridge module is tearing down a link, but before the |
| 875 | * actual tear-down starts. When this function returns, we must make sure that |
| 876 | * we will not initiate any new transmits on this link. |
| 877 | */ |
| 878 | static void |
| 879 | trill_ln_dstr_cb(void *lptr, bridge_link_t *blp) |
| 880 | { |
| 881 | trill_sock_t *tsock = lptr; |
| 882 | |
| 883 | _NOTE(ARGUNUSED(blp)); |
| 884 | trill_stop_recv(tsock); |
| 885 | } |
| 886 | |
| 887 | static void |
| 888 | trill_init(void) |
| 889 | { |
| 890 | list_create(&trill_inst_list, sizeof (trill_inst_t), |
| 891 | offsetof(trill_inst_t, ti_instnode)); |
| 892 | rw_init(&trill_inst_rwlock, NULL, RW_DRIVER, NULL); |
| 893 | bridge_trill_register_cb(trill_recv_pkt_cb, trill_encap_pkt_cb, |
| 894 | trill_br_dstr_cb, trill_ln_dstr_cb); |
| 895 | } |
| 896 | |
| 897 | static void |
| 898 | trill_fini(void) |
| 899 | { |
| 900 | bridge_trill_register_cb(NULL, NULL, NULL, NULL); |
| 901 | rw_destroy(&trill_inst_rwlock); |
| 902 | list_destroy(&trill_inst_list); |
| 903 | } |
| 904 | |
| 905 | /* Loadable module configuration entry points */ |
| 906 | int |
| 907 | _init(void) |
| 908 | { |
| 909 | int rc; |
| 910 | |
| 911 | trill_init(); |
| 912 | if ((rc = mod_install(&ml)) != 0) |
| 913 | trill_fini(); |
| 914 | return (rc); |
| 915 | } |
| 916 | |
| 917 | int |
| 918 | _info(struct modinfo *modinfop) |
| 919 | { |
| 920 | return (mod_info(&ml, modinfop)); |
| 921 | } |
| 922 | |
| 923 | int |
| 924 | _fini(void) |
| 925 | { |
| 926 | int rc; |
| 927 | |
| 928 | rw_enter(&trill_inst_rwlock, RW_READER); |
| 929 | rc = list_is_empty(&trill_inst_list) ? 0 : EBUSY; |
| 930 | rw_exit(&trill_inst_rwlock); |
| 931 | if (rc == 0 && ((rc = mod_remove(&ml)) == 0)) |
| 932 | trill_fini(); |
| 933 | return (rc); |
| 934 | } |
| 935 | |
| 936 | static void |
| 937 | trill_kstats_init(trill_sock_t *tsock, const char *bname) |
| 938 | { |
| 939 | int i; |
| 940 | char kstatname[KSTAT_STRLEN]; |
| 941 | kstat_named_t *knt; |
| 942 | static const char *sock_kstats_list[] = { TRILL_KSSOCK_NAMES }; |
| 943 | char link_name[MAXNAMELEN]; |
| 944 | int num; |
| 945 | int err; |
| 946 | |
| 947 | bzero(link_name, sizeof (link_name)); |
| 948 | if ((err = dls_mgmt_get_linkinfo(tsock->ts_link->bl_linkid, link_name, |
| 949 | NULL, NULL, NULL)) != 0) { |
| 950 | cmn_err(CE_WARN, "%s: trill_kstats_init: error %d retrieving" |
| 951 | " linkinfo for linkid:%d", "trill", err, |
| 952 | tsock->ts_link->bl_linkid); |
| 953 | return; |
| 954 | } |
| 955 | |
| 956 | bzero(kstatname, sizeof (kstatname)); |
| 957 | (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", |
| 958 | bname, link_name); |
| 959 | |
| 960 | num = sizeof (sock_kstats_list) / sizeof (*sock_kstats_list); |
| 961 | for (i = 0; i < num; i++) { |
| 962 | knt = (kstat_named_t *)&(tsock->ts_kstats); |
| 963 | kstat_named_init(&knt[i], sock_kstats_list[i], |
| 964 | KSTAT_DATA_UINT64); |
| 965 | } |
| 966 | |
| 967 | tsock->ts_ksp = kstat_create_zone("trill", 0, kstatname, "sock", |
| 968 | KSTAT_TYPE_NAMED, num, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); |
| 969 | if (tsock->ts_ksp != NULL) { |
| 970 | tsock->ts_ksp->ks_data = &tsock->ts_kstats; |
| 971 | kstat_install(tsock->ts_ksp); |
| 972 | } |
| 973 | } |
| 974 | |
| 975 | static trill_sock_t * |
| 976 | trill_do_open(int flags) |
| 977 | { |
| 978 | trill_sock_t *tsock; |
| 979 | int kmflag = ((flags & SOCKET_NOSLEEP)) ? KM_NOSLEEP:KM_SLEEP; |
| 980 | |
| 981 | tsock = kmem_zalloc(sizeof (trill_sock_t), kmflag); |
| 982 | if (tsock != NULL) { |
| 983 | tsock->ts_state = TS_UNBND; |
| 984 | tsock->ts_refs++; |
| 985 | mutex_init(&tsock->ts_socklock, NULL, MUTEX_DRIVER, NULL); |
| 986 | cv_init(&tsock->ts_sockthreadwait, NULL, CV_DRIVER, NULL); |
| 987 | cv_init(&tsock->ts_sockclosewait, NULL, CV_DRIVER, NULL); |
| 988 | } |
| 989 | return (tsock); |
| 990 | } |
| 991 | |
| 992 | static int |
| 993 | trill_find_bridge(trill_sock_t *tsock, const char *bname, boolean_t can_create) |
| 994 | { |
| 995 | trill_inst_t *tip, *newtip = NULL; |
| 996 | |
| 997 | /* Allocate some memory (speculatively) before taking locks */ |
| 998 | if (can_create) |
| 999 | newtip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); |
| 1000 | |
| 1001 | rw_enter(&trill_inst_rwlock, RW_WRITER); |
| 1002 | for (tip = list_head(&trill_inst_list); tip != NULL; |
| 1003 | tip = list_next(&trill_inst_list, tip)) { |
| 1004 | if (strcmp(tip->ti_bridgename, bname) == 0) |
| 1005 | break; |
| 1006 | } |
| 1007 | if (tip == NULL) { |
| 1008 | if (!can_create || newtip == NULL) { |
| 1009 | rw_exit(&trill_inst_rwlock); |
| 1010 | return (can_create ? ENOMEM : ENOENT); |
| 1011 | } |
| 1012 | |
| 1013 | tip = newtip; |
| 1014 | newtip = NULL; |
| 1015 | (void) strcpy(tip->ti_bridgename, bname); |
| 1016 | |
| 1017 | /* Register TRILL instance with bridging */ |
| 1018 | tip->ti_binst = bridge_trill_brref(bname, tip); |
| 1019 | if (tip->ti_binst == NULL) { |
| 1020 | rw_exit(&trill_inst_rwlock); |
| 1021 | kmem_free(tip, sizeof (*tip)); |
| 1022 | return (ENOENT); |
| 1023 | } |
| 1024 | |
| 1025 | rw_init(&tip->ti_rwlock, NULL, RW_DRIVER, NULL); |
| 1026 | list_create(&tip->ti_socklist, sizeof (trill_sock_t), |
| 1027 | offsetof(trill_sock_t, ts_socklistnode)); |
| 1028 | list_insert_tail(&trill_inst_list, tip); |
| 1029 | } |
| 1030 | atomic_inc_uint(&tip->ti_refs); |
| 1031 | rw_exit(&trill_inst_rwlock); |
| 1032 | |
| 1033 | /* If we didn't need the preallocated memory, then discard now. */ |
| 1034 | if (newtip != NULL) |
| 1035 | kmem_free(newtip, sizeof (*newtip)); |
| 1036 | |
| 1037 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 1038 | list_insert_tail(&(tip->ti_socklist), tsock); |
| 1039 | tsock->ts_tip = tip; |
| 1040 | rw_exit(&tip->ti_rwlock); |
| 1041 | return (0); |
| 1042 | } |
| 1043 | |
| 1044 | static void |
| 1045 | trill_clear_bridge(trill_sock_t *tsock) |
| 1046 | { |
| 1047 | trill_inst_t *tip; |
| 1048 | |
| 1049 | if ((tip = tsock->ts_tip) == NULL) |
| 1050 | return; |
| 1051 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 1052 | list_remove(&tip->ti_socklist, tsock); |
| 1053 | if (list_is_empty(&tip->ti_socklist)) |
| 1054 | trill_del_all(tip, B_TRUE); |
| 1055 | rw_exit(&tip->ti_rwlock); |
| 1056 | } |
| 1057 | |
| 1058 | static void |
| 1059 | trill_sock_unref(trill_sock_t *tsock) |
| 1060 | { |
| 1061 | if (atomic_dec_uint_nv(&tsock->ts_refs) == 0) { |
| 1062 | mutex_destroy(&tsock->ts_socklock); |
| 1063 | cv_destroy(&tsock->ts_sockthreadwait); |
| 1064 | cv_destroy(&tsock->ts_sockclosewait); |
| 1065 | kmem_free(tsock, sizeof (trill_sock_t)); |
| 1066 | } |
| 1067 | } |
| 1068 | |
| 1069 | static void |
| 1070 | trill_do_close(trill_sock_t *tsock) |
| 1071 | { |
| 1072 | trill_inst_t *tip; |
| 1073 | |
| 1074 | tip = tsock->ts_tip; |
| 1075 | trill_stop_recv(tsock); |
| 1076 | /* Remove socket from TRILL instance socket list */ |
| 1077 | trill_clear_bridge(tsock); |
| 1078 | tsock->ts_flags |= TSF_SHUTDOWN; |
| 1079 | trill_sock_unref(tsock); |
| 1080 | if (tip != NULL) |
| 1081 | trill_instance_unref(tip); |
| 1082 | } |
| 1083 | |
| 1084 | static void |
| 1085 | trill_del_all(trill_inst_t *tip, boolean_t lockheld) |
| 1086 | { |
| 1087 | int i; |
| 1088 | |
| 1089 | if (!lockheld) |
| 1090 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 1091 | for (i = RBRIDGE_NICKNAME_MIN; i < RBRIDGE_NICKNAME_MAX; i++) { |
| 1092 | if (tip->ti_nodes[i] != NULL) |
| 1093 | (void) trill_del_nick(tip, i, B_TRUE); |
| 1094 | } |
| 1095 | if (!lockheld) |
| 1096 | rw_exit(&tip->ti_rwlock); |
| 1097 | } |
| 1098 | |
| 1099 | static void |
| 1100 | trill_node_free(trill_node_t *nick_entry) |
| 1101 | { |
| 1102 | trill_nickinfo_t *tni; |
| 1103 | |
| 1104 | tni = nick_entry->tn_ni; |
| 1105 | kmem_free(tni, TNI_TOTALSIZE(tni)); |
| 1106 | kmem_free(nick_entry, sizeof (trill_node_t)); |
| 1107 | } |
| 1108 | |
| 1109 | static void |
| 1110 | trill_node_unref(trill_inst_t *tip, trill_node_t *tnp) |
| 1111 | { |
| 1112 | if (atomic_dec_uint_nv(&tnp->tn_refs) == 0) { |
| 1113 | if (tnp->tn_tsp != NULL) |
| 1114 | trill_sock_unref(tnp->tn_tsp); |
| 1115 | trill_node_free(tnp); |
| 1116 | (void) atomic_dec_uint_nv(&tip->ti_nodecount); |
| 1117 | } |
| 1118 | } |
| 1119 | |
| 1120 | static trill_node_t * |
| 1121 | trill_node_lookup(trill_inst_t *tip, uint16_t nick) |
| 1122 | { |
| 1123 | trill_node_t *nick_entry; |
| 1124 | |
| 1125 | if (!VALID_NICK(nick)) |
| 1126 | return (NULL); |
| 1127 | rw_enter(&tip->ti_rwlock, RW_READER); |
| 1128 | nick_entry = tip->ti_nodes[nick]; |
| 1129 | if (nick_entry != NULL) { |
| 1130 | atomic_inc_uint(&nick_entry->tn_refs); |
| 1131 | } |
| 1132 | rw_exit(&tip->ti_rwlock); |
| 1133 | return (nick_entry); |
| 1134 | } |
| 1135 | |
| 1136 | static int |
| 1137 | trill_del_nick(trill_inst_t *tip, uint16_t nick, boolean_t lockheld) |
| 1138 | { |
| 1139 | trill_node_t *nick_entry; |
| 1140 | int rc = ENOENT; |
| 1141 | |
| 1142 | if (!lockheld) |
| 1143 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 1144 | if (VALID_NICK(nick)) { |
| 1145 | nick_entry = tip->ti_nodes[nick]; |
| 1146 | if (nick_entry != NULL) { |
| 1147 | trill_node_unref(tip, nick_entry); |
| 1148 | tip->ti_nodes[nick] = NULL; |
| 1149 | rc = 0; |
| 1150 | } |
| 1151 | } |
| 1152 | if (!lockheld) |
| 1153 | rw_exit(&tip->ti_rwlock); |
| 1154 | return (rc); |
| 1155 | } |
| 1156 | |
| 1157 | static int |
| 1158 | trill_add_nick(trill_inst_t *tip, void *arg, boolean_t self, int mode) |
| 1159 | { |
| 1160 | uint16_t nick; |
| 1161 | int size; |
| 1162 | trill_node_t *tnode; |
| 1163 | trill_nickinfo_t tnihdr; |
| 1164 | |
| 1165 | /* First make sure we have at least the header available */ |
| 1166 | if (ddi_copyin(arg, &tnihdr, sizeof (trill_nickinfo_t), mode) != 0) |
| 1167 | return (EFAULT); |
| 1168 | |
| 1169 | nick = tnihdr.tni_nick; |
| 1170 | if (!VALID_NICK(nick)) { |
| 1171 | DTRACE_PROBE1(trill__add__nick__bad, trill_nickinfo_t *, |
| 1172 | &tnihdr); |
| 1173 | return (EINVAL); |
| 1174 | } |
| 1175 | |
| 1176 | size = TNI_TOTALSIZE(&tnihdr); |
| 1177 | if (size > TNI_MAXSIZE) |
| 1178 | return (EINVAL); |
| 1179 | tnode = kmem_zalloc(sizeof (trill_node_t), KM_SLEEP); |
| 1180 | tnode->tn_ni = kmem_zalloc(size, KM_SLEEP); |
| 1181 | if (ddi_copyin(arg, tnode->tn_ni, size, mode) != 0) { |
| 1182 | kmem_free(tnode->tn_ni, size); |
| 1183 | kmem_free(tnode, sizeof (trill_node_t)); |
| 1184 | return (EFAULT); |
| 1185 | } |
| 1186 | |
| 1187 | tnode->tn_refs++; |
| 1188 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 1189 | if (tip->ti_nodes[nick] != NULL) |
| 1190 | (void) trill_del_nick(tip, nick, B_TRUE); |
| 1191 | |
| 1192 | if (self) { |
| 1193 | tip->ti_nick = nick; |
| 1194 | } else { |
| 1195 | tnode->tn_tsp = find_trill_link(tip, |
| 1196 | tnode->tn_ni->tni_linkid); |
| 1197 | } |
| 1198 | DTRACE_PROBE2(trill__add__nick, trill_node_t *, tnode, |
| 1199 | uint16_t, nick); |
| 1200 | tip->ti_nodes[nick] = tnode; |
| 1201 | tip->ti_nodecount++; |
| 1202 | rw_exit(&tip->ti_rwlock); |
| 1203 | return (0); |
| 1204 | } |
| 1205 | |
| 1206 | static int |
| 1207 | trill_do_ioctl(trill_sock_t *tsock, int cmd, void *arg, int mode) |
| 1208 | { |
| 1209 | int error = 0; |
| 1210 | trill_inst_t *tip = tsock->ts_tip; |
| 1211 | |
| 1212 | switch (cmd) { |
| 1213 | case TRILL_DESIGVLAN: { |
| 1214 | uint16_t desigvlan; |
| 1215 | |
| 1216 | if (ddi_copyin(arg, &desigvlan, sizeof (desigvlan), mode) != 0) |
| 1217 | return (EFAULT); |
| 1218 | tsock->ts_desigvlan = desigvlan; |
| 1219 | break; |
| 1220 | } |
| 1221 | case TRILL_VLANFWDER: { |
| 1222 | uint8_t vlans[TRILL_VLANS_ARRSIZE]; |
| 1223 | |
| 1224 | if (tsock->ts_link == NULL) |
| 1225 | return (EINVAL); |
| 1226 | if ((ddi_copyin(arg, vlans, sizeof (vlans), mode)) != 0) |
| 1227 | return (EFAULT); |
| 1228 | bridge_trill_setvlans(tsock->ts_link, vlans); |
| 1229 | break; |
| 1230 | } |
| 1231 | case TRILL_SETNICK: |
| 1232 | if (tip == NULL) |
| 1233 | return (EINVAL); |
| 1234 | error = trill_add_nick(tip, arg, B_TRUE, mode); |
| 1235 | break; |
| 1236 | |
| 1237 | case TRILL_GETNICK: |
| 1238 | if (tip == NULL) |
| 1239 | return (EINVAL); |
| 1240 | rw_enter(&tip->ti_rwlock, RW_READER); |
| 1241 | if (ddi_copyout(&tip->ti_nick, arg, sizeof (tip->ti_nick), |
| 1242 | mode) != 0) |
| 1243 | error = EFAULT; |
| 1244 | rw_exit(&tip->ti_rwlock); |
| 1245 | break; |
| 1246 | |
| 1247 | case TRILL_ADDNICK: |
| 1248 | if (tip == NULL) |
| 1249 | break; |
| 1250 | error = trill_add_nick(tip, arg, B_FALSE, mode); |
| 1251 | break; |
| 1252 | |
| 1253 | case TRILL_DELNICK: { |
| 1254 | uint16_t delnick; |
| 1255 | |
| 1256 | if (tip == NULL) |
| 1257 | break; |
| 1258 | if (ddi_copyin(arg, &delnick, sizeof (delnick), mode) != 0) |
| 1259 | return (EFAULT); |
| 1260 | error = trill_del_nick(tip, delnick, B_FALSE); |
| 1261 | break; |
| 1262 | } |
| 1263 | case TRILL_DELALL: |
| 1264 | if (tip == NULL) |
| 1265 | break; |
| 1266 | trill_del_all(tip, B_FALSE); |
| 1267 | break; |
| 1268 | |
| 1269 | case TRILL_TREEROOT: { |
| 1270 | uint16_t treeroot; |
| 1271 | |
| 1272 | if (tip == NULL) |
| 1273 | break; |
| 1274 | if (ddi_copyin(arg, &treeroot, sizeof (treeroot), mode) != 0) |
| 1275 | return (EFAULT); |
| 1276 | if (!VALID_NICK(treeroot)) |
| 1277 | return (EINVAL); |
| 1278 | rw_enter(&tip->ti_rwlock, RW_WRITER); |
| 1279 | tip->ti_treeroot = treeroot; |
| 1280 | rw_exit(&tip->ti_rwlock); |
| 1281 | break; |
| 1282 | } |
| 1283 | case TRILL_HWADDR: |
| 1284 | if (tsock->ts_link == NULL) |
| 1285 | break; |
| 1286 | if (ddi_copyout(tsock->ts_link->bl_local_mac, arg, ETHERADDRL, |
| 1287 | mode) != 0) |
| 1288 | return (EFAULT); |
| 1289 | break; |
| 1290 | |
| 1291 | case TRILL_NEWBRIDGE: { |
| 1292 | char bname[MAXLINKNAMELEN]; |
| 1293 | |
| 1294 | if (tsock->ts_state != TS_UNBND) |
| 1295 | return (ENOTSUP); |
| 1296 | /* ts_tip can only be set once */ |
| 1297 | if (tip != NULL) |
| 1298 | return (EEXIST); |
| 1299 | if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) |
| 1300 | return (EFAULT); |
| 1301 | bname[MAXLINKNAMELEN-1] = '\0'; |
| 1302 | error = trill_find_bridge(tsock, bname, B_TRUE); |
| 1303 | break; |
| 1304 | } |
| 1305 | |
| 1306 | case TRILL_GETBRIDGE: { |
| 1307 | char bname[MAXLINKNAMELEN]; |
| 1308 | |
| 1309 | /* ts_tip can only be set once */ |
| 1310 | if (tip != NULL) |
| 1311 | return (EEXIST); |
| 1312 | if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) |
| 1313 | return (EFAULT); |
| 1314 | bname[MAXLINKNAMELEN - 1] = '\0'; |
| 1315 | error = trill_find_bridge(tsock, bname, B_FALSE); |
| 1316 | break; |
| 1317 | } |
| 1318 | |
| 1319 | case TRILL_LISTNICK: { |
| 1320 | trill_listnick_t tln; |
| 1321 | trill_node_t *tnp; |
| 1322 | trill_nickinfo_t *tnip; |
| 1323 | uint16_t nick; |
| 1324 | |
| 1325 | if (tip == NULL) |
| 1326 | return (EINVAL); |
| 1327 | if (ddi_copyin(arg, &tln, sizeof (tln), mode) != 0) |
| 1328 | return (EFAULT); |
| 1329 | nick = tln.tln_nick; |
| 1330 | if (nick >= RBRIDGE_NICKNAME_MAX) { |
| 1331 | error = EINVAL; |
| 1332 | break; |
| 1333 | } |
| 1334 | rw_enter(&tip->ti_rwlock, RW_READER); |
| 1335 | while (++nick < RBRIDGE_NICKNAME_MAX) { |
| 1336 | if ((tnp = tip->ti_nodes[nick]) != NULL) { |
| 1337 | tnip = tnp->tn_ni; |
| 1338 | ASSERT(nick == tnip->tni_nick); |
| 1339 | tln.tln_nick = nick; |
| 1340 | bcopy(tnip->tni_adjsnpa, tln.tln_nexthop, |
| 1341 | ETHERADDRL); |
| 1342 | tln.tln_ours = nick == tip->ti_nick; |
| 1343 | if (tln.tln_ours || tnp->tn_tsp == NULL) { |
| 1344 | tln.tln_linkid = |
| 1345 | DATALINK_INVALID_LINKID; |
| 1346 | } else { |
| 1347 | tln.tln_linkid = |
| 1348 | tnp->tn_tsp->ts_link->bl_linkid; |
| 1349 | } |
| 1350 | break; |
| 1351 | } |
| 1352 | } |
| 1353 | rw_exit(&tip->ti_rwlock); |
| 1354 | if (nick >= RBRIDGE_NICKNAME_MAX) |
| 1355 | bzero(&tln, sizeof (tln)); |
| 1356 | if (ddi_copyout(&tln, arg, sizeof (tln), mode) != 0) |
| 1357 | return (EFAULT); |
| 1358 | break; |
| 1359 | } |
| 1360 | |
| 1361 | /* |
| 1362 | * Port flush: this is used when we lose AF on a port. We must discard |
| 1363 | * all regular bridge forwarding entries on this port with the |
| 1364 | * indicated VLAN. |
| 1365 | */ |
| 1366 | case TRILL_PORTFLUSH: { |
| 1367 | uint16_t vlan = (uint16_t)(uintptr_t)arg; |
| 1368 | |
| 1369 | if (tsock->ts_link == NULL) |
| 1370 | return (EINVAL); |
| 1371 | bridge_trill_flush(tsock->ts_link, vlan, B_FALSE); |
| 1372 | break; |
| 1373 | } |
| 1374 | |
| 1375 | /* |
| 1376 | * Nick flush: this is used when we lose AF on a port. We must discard |
| 1377 | * all bridge TRILL forwarding entries on this port with the indicated |
| 1378 | * VLAN. |
| 1379 | */ |
| 1380 | case TRILL_NICKFLUSH: { |
| 1381 | uint16_t vlan = (uint16_t)(uintptr_t)arg; |
| 1382 | |
| 1383 | if (tsock->ts_link == NULL) |
| 1384 | return (EINVAL); |
| 1385 | bridge_trill_flush(tsock->ts_link, vlan, B_TRUE); |
| 1386 | break; |
| 1387 | } |
| 1388 | |
| 1389 | case TRILL_GETMTU: |
| 1390 | if (tsock->ts_link == NULL) |
| 1391 | break; |
| 1392 | if (ddi_copyout(&tsock->ts_link->bl_maxsdu, arg, |
| 1393 | sizeof (uint_t), mode) != 0) |
| 1394 | return (EFAULT); |
| 1395 | break; |
| 1396 | |
| 1397 | default: |
| 1398 | error = ENOTSUP; |
| 1399 | break; |
| 1400 | } |
| 1401 | |
| 1402 | return (error); |
| 1403 | } |
| 1404 | |
| 1405 | /* |
| 1406 | * Sends received packet back upstream on the TRILL socket. |
| 1407 | * Consumes passed mblk_t. |
| 1408 | */ |
| 1409 | static void |
| 1410 | trill_ctrl_input(trill_sock_t *tsock, mblk_t *mp, const uint8_t *saddr, |
| 1411 | uint16_t tci) |
| 1412 | { |
| 1413 | int udi_size; |
| 1414 | mblk_t *mp1; |
| 1415 | struct T_unitdata_ind *tudi; |
| 1416 | struct sockaddr_dl *sdl; |
| 1417 | char *lladdr; |
| 1418 | int error; |
| 1419 | |
| 1420 | ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); |
| 1421 | if (tsock->ts_flow_ctrld) { |
| 1422 | freemsg(mp); |
| 1423 | KSPINCR(tks_drops); |
| 1424 | return; |
| 1425 | } |
| 1426 | |
| 1427 | udi_size = sizeof (struct T_unitdata_ind) + |
| 1428 | sizeof (struct sockaddr_dl); |
| 1429 | mp1 = allocb(udi_size, BPRI_MED); |
| 1430 | if (mp1 == NULL) { |
| 1431 | freemsg(mp); |
| 1432 | KSPINCR(tks_drops); |
| 1433 | return; |
| 1434 | } |
| 1435 | |
| 1436 | mp1->b_cont = mp; |
| 1437 | mp = mp1; |
| 1438 | mp->b_datap->db_type = M_PROTO; |
| 1439 | /* LINTED: alignment */ |
| 1440 | tudi = (struct T_unitdata_ind *)mp->b_rptr; |
| 1441 | mp->b_wptr = (uchar_t *)tudi + udi_size; |
| 1442 | |
| 1443 | tudi->PRIM_type = T_UNITDATA_IND; |
| 1444 | tudi->SRC_length = sizeof (struct sockaddr_dl); |
| 1445 | tudi->SRC_offset = sizeof (struct T_unitdata_ind); |
| 1446 | tudi->OPT_length = 0; |
| 1447 | tudi->OPT_offset = sizeof (struct T_unitdata_ind) + |
| 1448 | sizeof (struct sockaddr_dl); |
| 1449 | |
| 1450 | /* Information of the link on which packet was received. */ |
| 1451 | sdl = (struct sockaddr_dl *)&tudi[1]; |
| 1452 | (void) memset(sdl, 0, sizeof (struct sockaddr_dl)); |
| 1453 | sdl->sdl_family = AF_TRILL; |
| 1454 | |
| 1455 | /* LINTED: alignment */ |
| 1456 | *(datalink_id_t *)sdl->sdl_data = tsock->ts_link->bl_linkid; |
| 1457 | sdl->sdl_nlen = sizeof (tsock->ts_link->bl_linkid); |
| 1458 | |
| 1459 | lladdr = LLADDR(sdl); |
| 1460 | (void) memcpy(lladdr, saddr, ETHERADDRL); |
| 1461 | lladdr += ETHERADDRL; |
| 1462 | sdl->sdl_alen = ETHERADDRL; |
| 1463 | |
| 1464 | /* LINTED: alignment */ |
| 1465 | *(uint16_t *)lladdr = tci; |
| 1466 | sdl->sdl_slen = sizeof (uint16_t); |
| 1467 | |
| 1468 | DTRACE_PROBE2(trill__ctrl__input, trill_sock_t *, tsock, mblk_t *, mp); |
| 1469 | (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, |
| 1470 | mp, msgdsize(mp), 0, &error, NULL); |
| 1471 | |
| 1472 | if (error == ENOSPC) { |
| 1473 | mutex_enter(&tsock->ts_socklock); |
| 1474 | (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, |
| 1475 | NULL, 0, 0, &error, NULL); |
| 1476 | if (error == ENOSPC) |
| 1477 | tsock->ts_flow_ctrld = B_TRUE; |
| 1478 | mutex_exit(&tsock->ts_socklock); |
| 1479 | KSPINCR(tks_drops); |
| 1480 | } else if (error != 0) { |
| 1481 | KSPINCR(tks_drops); |
| 1482 | } else { |
| 1483 | KSPINCR(tks_recv); |
| 1484 | } |
| 1485 | |
| 1486 | DTRACE_PROBE2(trill__ctrl__input__done, trill_sock_t *, |
| 1487 | tsock, int, error); |
| 1488 | } |
| 1489 | |
| 1490 | /* ARGSUSED */ |
| 1491 | static void |
| 1492 | trill_activate(sock_lower_handle_t proto_handle, |
| 1493 | sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, |
| 1494 | int flags, cred_t *cr) |
| 1495 | { |
| 1496 | trill_sock_t *tsock = (trill_sock_t *)proto_handle; |
| 1497 | struct sock_proto_props sopp; |
| 1498 | |
| 1499 | tsock->ts_conn_upcalls = sock_upcalls; |
| 1500 | tsock->ts_conn_upper_handle = sock_handle; |
| 1501 | |
| 1502 | sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | |
| 1503 | SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ | |
| 1504 | SOCKOPT_MAXBLK | SOCKOPT_MINPSZ; |
| 1505 | sopp.sopp_wroff = 0; |
| 1506 | sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; |
| 1507 | sopp.sopp_rxlowat = SOCKET_RECVLOWATER; |
| 1508 | sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl); |
| 1509 | sopp.sopp_maxpsz = INFPSZ; |
| 1510 | sopp.sopp_maxblk = INFPSZ; |
| 1511 | sopp.sopp_minpsz = 0; |
| 1512 | (*tsock->ts_conn_upcalls->su_set_proto_props)( |
| 1513 | tsock->ts_conn_upper_handle, &sopp); |
| 1514 | } |
| 1515 | |
| 1516 | /* ARGSUSED */ |
| 1517 | static int |
| 1518 | trill_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) |
| 1519 | { |
| 1520 | trill_sock_t *tsock = (trill_sock_t *)proto_handle; |
| 1521 | |
| 1522 | trill_do_close(tsock); |
| 1523 | return (0); |
| 1524 | } |
| 1525 | |
| 1526 | /* ARGSUSED */ |
| 1527 | static int |
| 1528 | trill_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, |
| 1529 | socklen_t len, cred_t *cr) |
| 1530 | { |
| 1531 | int error; |
| 1532 | trill_sock_t *tsock = (trill_sock_t *)proto_handle; |
| 1533 | |
| 1534 | if (sa == NULL) |
| 1535 | error = trill_do_unbind(tsock); |
| 1536 | else |
| 1537 | error = trill_start_recv(tsock, sa, len); |
| 1538 | |
| 1539 | return (error); |
| 1540 | } |
| 1541 | |
| 1542 | /* ARGSUSED */ |
| 1543 | static int |
Rishi Srivatsavai | 4eaa471 | 2009-09-10 15:11:49 -0400 | [diff] [blame] | 1544 | trill_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, |
| 1545 | cred_t *cr) |
| 1546 | { |
| 1547 | trill_sock_t *tsock = (trill_sock_t *)proto_handle; |
| 1548 | struct sockaddr_dl *laddr; |
| 1549 | uint16_t tci; |
| 1550 | |
| 1551 | ASSERT(DB_TYPE(mp) == M_DATA); |
| 1552 | ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); |
| 1553 | |
| 1554 | if (msg->msg_name == NULL || msg->msg_namelen != sizeof (*laddr)) |
| 1555 | goto eproto; |
| 1556 | |
| 1557 | /* |
| 1558 | * The name is a datalink_id_t, the address is an Ethernet address, and |
| 1559 | * the selector value is the VLAN ID. |
| 1560 | */ |
| 1561 | laddr = (struct sockaddr_dl *)msg->msg_name; |
| 1562 | if (laddr->sdl_nlen != sizeof (datalink_id_t) || |
| 1563 | laddr->sdl_alen != ETHERADDRL || |
| 1564 | (laddr->sdl_slen != sizeof (tci) && laddr->sdl_slen != 0)) |
| 1565 | goto eproto; |
| 1566 | |
| 1567 | mutex_enter(&tsock->ts_socklock); |
| 1568 | if (tsock->ts_state != TS_IDLE || tsock->ts_link == NULL) { |
| 1569 | mutex_exit(&tsock->ts_socklock); |
| 1570 | goto eproto; |
| 1571 | } |
| 1572 | atomic_inc_uint(&tsock->ts_sockthreadcount); |
| 1573 | mutex_exit(&tsock->ts_socklock); |
| 1574 | |
| 1575 | /* |
| 1576 | * Safe to dereference VLAN now, as we've checked the user's specified |
| 1577 | * values, and alignment is now guaranteed. |
| 1578 | */ |
| 1579 | if (laddr->sdl_slen == 0) { |
| 1580 | tci = TRILL_NO_TCI; |
| 1581 | } else { |
| 1582 | /* LINTED: alignment */ |
| 1583 | tci = *(uint16_t *)(LLADDR(laddr) + ETHERADDRL); |
| 1584 | } |
| 1585 | |
| 1586 | mp = create_trill_header(tsock, mp, (const uchar_t *)LLADDR(laddr), |
| 1587 | B_TRUE, B_FALSE, tci, msgdsize(mp)); |
| 1588 | if (mp != NULL) { |
| 1589 | mp = bridge_trill_output(tsock->ts_link, mp); |
| 1590 | if (mp == NULL) { |
| 1591 | KSPINCR(tks_sent); |
| 1592 | } else { |
| 1593 | freemsg(mp); |
| 1594 | KSPINCR(tks_drops); |
| 1595 | } |
| 1596 | } |
| 1597 | |
| 1598 | /* Wake up any threads blocking on us */ |
| 1599 | if (atomic_dec_uint_nv(&tsock->ts_sockthreadcount) == 0) |
| 1600 | cv_broadcast(&tsock->ts_sockthreadwait); |
| 1601 | return (0); |
| 1602 | |
| 1603 | eproto: |
| 1604 | freemsg(mp); |
| 1605 | KSPINCR(tks_drops); |
| 1606 | return (EPROTO); |
| 1607 | } |
| 1608 | |
| 1609 | /* ARGSUSED */ |
| 1610 | static int |
| 1611 | trill_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, |
| 1612 | int mode, int32_t *rvalp, cred_t *cr) |
| 1613 | { |
| 1614 | trill_sock_t *tsock = (trill_sock_t *)proto_handle; |
| 1615 | int rc; |
| 1616 | |
| 1617 | switch (cmd) { |
| 1618 | /* List of unprivileged TRILL ioctls */ |
| 1619 | case TRILL_GETNICK: |
| 1620 | case TRILL_GETBRIDGE: |
| 1621 | case TRILL_LISTNICK: |
| 1622 | break; |
| 1623 | default: |
| 1624 | if (secpolicy_dl_config(cr) != 0) |
| 1625 | return (EPERM); |
| 1626 | break; |
| 1627 | } |
| 1628 | |
| 1629 | /* Lock ensures socket state is unchanged during ioctl handling */ |
| 1630 | mutex_enter(&tsock->ts_socklock); |
| 1631 | rc = trill_do_ioctl(tsock, cmd, (void *)arg, mode); |
| 1632 | mutex_exit(&tsock->ts_socklock); |
| 1633 | return (rc); |
| 1634 | } |
| 1635 | |
Rishi Srivatsavai | 4eaa471 | 2009-09-10 15:11:49 -0400 | [diff] [blame] | 1636 | static void |
| 1637 | trill_clr_flowctrl(sock_lower_handle_t proto_handle) |
| 1638 | { |
| 1639 | trill_sock_t *tsock = (trill_sock_t *)proto_handle; |
| 1640 | |
| 1641 | mutex_enter(&tsock->ts_socklock); |
| 1642 | tsock->ts_flow_ctrld = B_FALSE; |
| 1643 | mutex_exit(&tsock->ts_socklock); |
| 1644 | } |
| 1645 | |
Rishi Srivatsavai | 4eaa471 | 2009-09-10 15:11:49 -0400 | [diff] [blame] | 1646 | static sock_downcalls_t sock_trill_downcalls = { |
Rishi Srivatsavai | 6f40bf6 | 2010-01-13 12:18:35 -0500 | [diff] [blame^] | 1647 | trill_activate, /* sd_activate */ |
| 1648 | sock_accept_notsupp, /* sd_accept */ |
| 1649 | trill_bind, /* sd_bind */ |
| 1650 | sock_listen_notsupp, /* sd_listen */ |
| 1651 | sock_connect_notsupp, /* sd_connect */ |
| 1652 | sock_getpeername_notsupp, /* sd_getpeername */ |
| 1653 | sock_getsockname_notsupp, /* sd_getsockname */ |
| 1654 | sock_getsockopt_notsupp, /* sd_getsockopt */ |
| 1655 | sock_setsockopt_notsupp, /* sd_setsockopt */ |
| 1656 | trill_send, /* sd_send */ |
| 1657 | NULL, /* sd_send_uio */ |
| 1658 | NULL, /* sd_recv_uio */ |
| 1659 | NULL, /* sd_poll */ |
| 1660 | sock_shutdown_notsupp, /* sd_shutdown */ |
| 1661 | trill_clr_flowctrl, /* sd_setflowctrl */ |
| 1662 | trill_ioctl, /* sd_ioctl */ |
| 1663 | trill_close /* sd_close */ |
Rishi Srivatsavai | 4eaa471 | 2009-09-10 15:11:49 -0400 | [diff] [blame] | 1664 | }; |
| 1665 | |
| 1666 | /* ARGSUSED */ |
| 1667 | static sock_lower_handle_t |
| 1668 | trill_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, |
| 1669 | uint_t *smodep, int *errorp, int flags, cred_t *credp) |
| 1670 | { |
| 1671 | trill_sock_t *tsock; |
| 1672 | |
| 1673 | if (family != AF_TRILL || type != SOCK_DGRAM || proto != 0) { |
| 1674 | *errorp = EPROTONOSUPPORT; |
| 1675 | return (NULL); |
| 1676 | } |
| 1677 | |
| 1678 | *sock_downcalls = &sock_trill_downcalls; |
| 1679 | *smodep = SM_ATOMIC; |
| 1680 | tsock = trill_do_open(flags); |
| 1681 | *errorp = (tsock != NULL) ? 0:ENOMEM; |
| 1682 | return ((sock_lower_handle_t)tsock); |
| 1683 | } |