| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/conf.h> |
| #include <sys/modctl.h> |
| #include <sys/stat.h> |
| #include <sys/stream.h> |
| #include <sys/strsun.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/priv_names.h> |
| #include <inet/common.h> |
| |
| #define _SUN_TPI_VERSION 2 |
| #include <sys/tihdr.h> |
| #include <sys/timod.h> |
| #include <sys/tiuser.h> |
| #include <sys/suntpi.h> |
| #include <inet/common.h> |
| #include <inet/ip.h> |
| #include <inet/mi.h> |
| #include <inet/proto_set.h> |
| #include <sys/ib/clients/rds/rds.h> |
| #include <sys/policy.h> |
| #include <inet/ipclassifier.h> |
| #include <sys/ib/clients/rds/rds_kstat.h> |
| #include "sys/random.h" |
| #include <sys/ib/clients/rds/rds_transport.h> |
| #include <sys/ib/ibtl/ibti.h> |
| |
| |
| #define RDS_NAME "rds" |
| #define RDS_STRTAB rdsinfo |
| #define RDS_DEVDESC "RDS STREAMS driver" |
| #define RDS_DEVMINOR 0 |
| #define RDS_DEVMTFLAGS D_MP | D_SYNCSTR |
| #define RDS_DEFAULT_PRIV_MODE 0666 |
| |
| #define rds_smallest_port 1 |
| #define rds_largest_port 65535 |
| |
| #define RDS_RECV_HIWATER (56 * 1024) |
| #define RDS_RECV_LOWATER 128 |
| #define RDS_XMIT_HIWATER (56 * 1024) |
| #define RDS_XMIT_LOWATER 1024 |
| |
| #define RDS_DPRINTF2 0 && |
| #define LABEL "RDS" |
| |
| typedef struct rdsahdr_s { |
| in_port_t uha_src_port; /* Source port */ |
| in_port_t uha_dst_port; /* Destination port */ |
| } rdsha_t; |
| |
| #define RDSH_SIZE 4 |
| |
| int rds_recv_hiwat = RDS_RECV_HIWATER; |
| int rds_recv_lowat = RDS_RECV_LOWATER; |
| int rds_xmit_hiwat = RDS_XMIT_HIWATER; |
| int rds_xmit_lowat = RDS_XMIT_LOWATER; |
| |
| int rdsdebug; |
| |
| static dev_info_t *rds_dev_info; |
| |
| /* Hint not protected by any lock */ |
| static in_port_t rds_next_port_to_try; |
| |
| ldi_ident_t rds_li; |
| static int loopmax = rds_largest_port - rds_smallest_port + 1; |
| |
| /* global configuration variables */ |
| uint_t UserBufferSize; |
| uint_t rds_rx_pkts_pending_hwm; |
| |
| extern void rds_ioctl(queue_t *, mblk_t *); |
| extern void rds_ioctl_copyin_done(queue_t *q, mblk_t *mp); |
| |
| int rds_open_transport_driver(); |
| int rds_close_transport_driver(); |
| |
| #define RDS_CURRENT_PORT_QUOTA() \ |
| (rds_rx_pkts_pending_hwm/RDS_GET_NPORT()) |
| |
| krwlock_t rds_transport_lock; |
| ldi_handle_t rds_transport_handle = NULL; |
| rds_transport_ops_t *rds_transport_ops = NULL; |
| |
| static int |
| rds_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) |
| { |
| int ret; |
| |
| if (cmd != DDI_ATTACH) |
| return (DDI_FAILURE); |
| |
| rds_dev_info = devi; |
| |
| ret = ddi_create_minor_node(devi, RDS_NAME, S_IFCHR, |
| RDS_DEVMINOR, DDI_PSEUDO, 0); |
| if (ret != DDI_SUCCESS) { |
| return (ret); |
| } |
| |
| return (DDI_SUCCESS); |
| } |
| |
| static int |
| rds_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) |
| { |
| if (cmd != DDI_DETACH) |
| return (DDI_FAILURE); |
| |
| ASSERT(devi == rds_dev_info); |
| |
| ddi_remove_minor_node(devi, NULL); |
| |
| return (DDI_SUCCESS); |
| } |
| |
| /* ARGSUSED */ |
| static int |
| rds_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) |
| { |
| int error = DDI_FAILURE; |
| |
| switch (cmd) { |
| case DDI_INFO_DEVT2DEVINFO: |
| if (rds_dev_info != NULL) { |
| *result = (void *)rds_dev_info; |
| error = DDI_SUCCESS; |
| } |
| break; |
| |
| case DDI_INFO_DEVT2INSTANCE: |
| *result = NULL; |
| error = DDI_SUCCESS; |
| break; |
| |
| default: |
| break; |
| } |
| |
| return (error); |
| } |
| |
| |
| /*ARGSUSED*/ |
| static int |
| rds_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) |
| { |
| rds_t *rds; |
| int ret; |
| |
| if (is_system_labeled()) { |
| /* |
| * RDS socket is not supported on labeled systems |
| */ |
| return (ESOCKTNOSUPPORT); |
| } |
| |
| /* Open the transport driver if IB HW is present */ |
| rw_enter(&rds_transport_lock, RW_READER); |
| if (rds_transport_handle == NULL) { |
| rw_exit(&rds_transport_lock); |
| ret = rds_open_transport_driver(); |
| rw_enter(&rds_transport_lock, RW_READER); |
| |
| if (ret != 0) { |
| /* Transport driver failed to load */ |
| rw_exit(&rds_transport_lock); |
| return (ret); |
| } |
| } |
| rw_exit(&rds_transport_lock); |
| |
| if (sflag == MODOPEN) { |
| return (EINVAL); |
| } |
| |
| /* Reopen not supported */ |
| if (q->q_ptr != NULL) { |
| dprint(2, ("%s: Reopen is not supported: %p", LABEL, q->q_ptr)); |
| return (0); |
| } |
| |
| rds = rds_create(q, credp); |
| if (rds == NULL) { |
| dprint(2, ("%s: rds_create failed", LABEL)); |
| return (0); |
| } |
| |
| q->q_ptr = WR(q)->q_ptr = rds; |
| rds->rds_state = TS_UNBND; |
| rds->rds_family = AF_INET_OFFLOAD; |
| |
| q->q_hiwat = rds_recv_hiwat; |
| q->q_lowat = rds_recv_lowat; |
| |
| qprocson(q); |
| |
| WR(q)->q_hiwat = rds_xmit_hiwat; |
| WR(q)->q_lowat = rds_xmit_lowat; |
| |
| /* Set the Stream head watermarks */ |
| (void) proto_set_rx_hiwat(q, NULL, rds_recv_hiwat); |
| (void) proto_set_rx_lowat(q, NULL, rds_recv_lowat); |
| |
| return (0); |
| } |
| |
| static int |
| rds_close(queue_t *q) |
| { |
| rds_t *rdsp = (rds_t *)q->q_ptr; |
| |
| qprocsoff(q); |
| |
| /* |
| * NPORT should be decremented only if this socket was previously |
| * bound to an RDS port. |
| */ |
| if (rdsp->rds_state >= TS_IDLE) { |
| RDS_DECR_NPORT(); |
| RDS_SET_PORT_QUOTA(RDS_CURRENT_PORT_QUOTA()); |
| rds_transport_ops-> |
| rds_transport_resume_port(ntohs(rdsp->rds_port)); |
| } |
| |
| /* close the transport driver if this is the last socket */ |
| if (RDS_GET_NPORT() == 1) { |
| (void) rds_close_transport_driver(); |
| } |
| |
| /* |
| * We set the flags without holding a lock as this is |
| * just a hint for the fanout lookup to skip this rds. |
| * We dont free the struct until it's out of the hash and |
| * the ref count goes down. |
| */ |
| rdsp->rds_flags |= RDS_CLOSING; |
| rds_bind_hash_remove(rdsp, B_FALSE); |
| mutex_enter(&rdsp->rds_lock); |
| ASSERT(rdsp->rds_refcnt > 0); |
| if (rdsp->rds_refcnt != 1) { |
| cv_wait(&rdsp->rds_refcv, &rdsp->rds_lock); |
| } |
| mutex_exit(&rdsp->rds_lock); |
| RDS_DEC_REF_CNT(rdsp); |
| RD(q)->q_ptr = NULL; |
| WR(q)->q_ptr = NULL; |
| return (0); |
| } |
| |
| /* |
| * Add a new message to the socket |
| */ |
| int |
| rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr, |
| in_port_t local_port, in_port_t rem_port, zoneid_t zoneid) |
| { |
| rds_t *rds; |
| struct T_unitdata_ind *tudi; |
| int udi_size; /* Size of T_unitdata_ind */ |
| mblk_t *mp1; |
| sin_t *sin; |
| int error = 0; |
| |
| local_port = htons(local_port); |
| rem_port = htons(rem_port); |
| |
| ASSERT(mp->b_datap->db_type == M_DATA); |
| rds = rds_fanout(local_addr, rem_addr, local_port, rem_port, zoneid); |
| if (rds == NULL) { |
| dprint(2, ("%s: rds_fanout failed: (0x%x 0x%x %d %d)", LABEL, |
| local_addr, rem_addr, ntohs(local_port), ntohs(rem_port))); |
| freemsg(mp); |
| return (error); |
| } |
| |
| udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); |
| |
| /* Allocate a message block for the T_UNITDATA_IND structure. */ |
| mp1 = allocb(udi_size, BPRI_MED); |
| if (mp1 == NULL) { |
| dprint(2, ("%s: allocb failed", LABEL)); |
| freemsg(mp); |
| return (ENOMEM); |
| } |
| |
| mp1->b_cont = mp; |
| mp = mp1; |
| mp->b_datap->db_type = M_PROTO; |
| tudi = (struct T_unitdata_ind *)(uintptr_t)mp->b_rptr; |
| mp->b_wptr = (uchar_t *)tudi + udi_size; |
| tudi->PRIM_type = T_UNITDATA_IND; |
| tudi->SRC_length = sizeof (sin_t); |
| tudi->SRC_offset = sizeof (struct T_unitdata_ind); |
| tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); |
| udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); |
| tudi->OPT_length = udi_size; |
| sin = (sin_t *)&tudi[1]; |
| sin->sin_addr.s_addr = rem_addr; |
| sin->sin_port = ntohs(rem_port); |
| sin->sin_family = rds->rds_family; |
| *(uint32_t *)(uintptr_t)&sin->sin_zero[0] = 0; |
| *(uint32_t *)(uintptr_t)&sin->sin_zero[4] = 0; |
| |
| putnext(rds->rds_ulpd, mp); |
| |
| /* check port quota */ |
| if (RDS_GET_RXPKTS_PEND() > rds_rx_pkts_pending_hwm) { |
| ulong_t current_port_quota = RDS_GET_PORT_QUOTA(); |
| if (rds->rds_port_quota > current_port_quota) { |
| /* this may result in stalling the port */ |
| rds->rds_port_quota = current_port_quota; |
| (void) proto_set_rx_hiwat(rds->rds_ulpd, NULL, |
| rds->rds_port_quota * UserBufferSize); |
| RDS_INCR_PORT_QUOTA_ADJUSTED(); |
| } |
| } |
| |
| /* |
| * canputnext() check is done after putnext as the protocol does |
| * not allow dropping any received packet. |
| */ |
| if (!canputnext(rds->rds_ulpd)) { |
| error = ENOSPC; |
| } |
| |
| RDS_DEC_REF_CNT(rds); |
| return (error); |
| } |
| |
| |
| /* Default structure copied into T_INFO_ACK messages */ |
| static struct T_info_ack rds_g_t_info_ack_ipv4 = { |
| T_INFO_ACK, |
| 65535, /* TSDU_size. Excl. headers */ |
| T_INVALID, /* ETSU_size. rds does not support expedited data. */ |
| T_INVALID, /* CDATA_size. rds does not support connect data. */ |
| T_INVALID, /* DDATA_size. rds does not support disconnect data. */ |
| sizeof (sin_t), /* ADDR_size. */ |
| 0, /* OPT_size - not initialized here */ |
| 65535, /* TIDU_size. Excl. headers */ |
| T_CLTS, /* SERV_type. rds supports connection-less. */ |
| TS_UNBND, /* CURRENT_state. This is set from rds_state. */ |
| (XPG4_1|SENDZERO) /* PROVIDER_flag */ |
| }; |
| |
| static in_port_t |
| rds_update_next_port(in_port_t port) |
| { |
| (void) random_get_pseudo_bytes((uint8_t *)&port, sizeof (in_port_t)); |
| if (port < rds_smallest_port) |
| port = rds_smallest_port; |
| return (port); |
| } |
| |
| /* This routine creates a T_ERROR_ACK message and passes it upstream. */ |
| static void |
| rds_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) |
| { |
| if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) |
| qreply(q, mp); |
| } |
| |
| static void |
| rds_capability_req(queue_t *q, mblk_t *mp) |
| { |
| t_uscalar_t cap_bits1; |
| struct T_capability_ack *tcap; |
| |
| cap_bits1 = |
| ((struct T_capability_req *)(uintptr_t)mp->b_rptr)->CAP_bits1; |
| |
| mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), |
| mp->b_datap->db_type, T_CAPABILITY_ACK); |
| if (mp == NULL) |
| return; |
| tcap = (struct T_capability_ack *)(uintptr_t)mp->b_rptr; |
| tcap->CAP_bits1 = 0; |
| |
| if (cap_bits1 & TC1_INFO) { |
| tcap->CAP_bits1 |= TC1_INFO; |
| *(&tcap->INFO_ack) = rds_g_t_info_ack_ipv4; |
| } |
| |
| qreply(q, mp); |
| } |
| |
| static void |
| rds_info_req(queue_t *q, mblk_t *omp) |
| { |
| rds_t *rds = (rds_t *)q->q_ptr; |
| struct T_info_ack *tap; |
| mblk_t *mp; |
| |
| /* Create a T_INFO_ACK message. */ |
| mp = tpi_ack_alloc(omp, sizeof (struct T_info_ack), M_PCPROTO, |
| T_INFO_ACK); |
| if (mp == NULL) |
| return; |
| tap = (struct T_info_ack *)(uintptr_t)mp->b_rptr; |
| *tap = rds_g_t_info_ack_ipv4; |
| tap->CURRENT_state = rds->rds_state; |
| tap->OPT_size = 128; |
| qreply(q, mp); |
| } |
| |
| /* |
| * NO locking protection here as sockfs will only send down |
| * one bind operation at a time. |
| */ |
| static void |
| rds_bind(queue_t *q, mblk_t *mp) |
| { |
| sin_t *sin; |
| rds_t *rds; |
| struct T_bind_req *tbr; |
| in_port_t port; /* Host byte order */ |
| in_port_t requested_port; /* Host byte order */ |
| struct T_bind_ack *tba; |
| int count; |
| rds_bf_t *rdsbf; |
| in_port_t lport; /* Network byte order */ |
| |
| rds = (rds_t *)q->q_ptr; |
| if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) < sizeof (*tbr)) { |
| rds_err_ack(q, mp, TPROTO, 0); |
| return; |
| } |
| |
| /* |
| * We don't allow multiple binds |
| */ |
| if (rds->rds_state != TS_UNBND) { |
| rds_err_ack(q, mp, TOUTSTATE, 0); |
| return; |
| } |
| |
| tbr = (struct T_bind_req *)(uintptr_t)mp->b_rptr; |
| switch (tbr->ADDR_length) { |
| case sizeof (sin_t): /* Complete IPv4 address */ |
| sin = (sin_t *)(uintptr_t)mi_offset_param(mp, tbr->ADDR_offset, |
| sizeof (sin_t)); |
| if (sin == NULL || !OK_32PTR((char *)sin)) { |
| rds_err_ack(q, mp, TSYSERR, EINVAL); |
| return; |
| } |
| if (rds->rds_family != AF_INET_OFFLOAD || |
| sin->sin_family != AF_INET_OFFLOAD) { |
| rds_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); |
| return; |
| } |
| if (sin->sin_addr.s_addr == INADDR_ANY) { |
| rds_err_ack(q, mp, TBADADDR, 0); |
| return; |
| } |
| |
| /* |
| * verify that the address is hosted on IB |
| * only exception is the loopback address. |
| */ |
| if ((sin->sin_addr.s_addr != INADDR_LOOPBACK) && |
| !rds_verify_bind_address(sin->sin_addr.s_addr)) { |
| rds_err_ack(q, mp, TBADADDR, 0); |
| return; |
| } |
| |
| port = ntohs(sin->sin_port); |
| break; |
| default: /* Invalid request */ |
| rds_err_ack(q, mp, TBADADDR, 0); |
| return; |
| } |
| |
| requested_port = port; |
| |
| /* |
| * TPI only sends down T_BIND_REQ for AF_INET and AF_INET6 |
| * since RDS socket is of type AF_INET_OFFLOAD a O_T_BIND_REQ |
| * will be sent down. Treat O_T_BIND_REQ as T_BIND_REQ |
| */ |
| |
| if (requested_port == 0) { |
| /* |
| * If the application passed in zero for the port number, it |
| * doesn't care which port number we bind to. Get one in the |
| * valid range. |
| */ |
| port = rds_update_next_port(rds_next_port_to_try); |
| } |
| |
| ASSERT(port != 0); |
| count = 0; |
| for (;;) { |
| rds_t *rds1; |
| ASSERT(sin->sin_addr.s_addr != INADDR_ANY); |
| /* |
| * Walk through the list of rds streams bound to |
| * requested port with the same IP address. |
| */ |
| lport = htons(port); |
| rdsbf = &rds_bind_fanout[RDS_BIND_HASH(lport)]; |
| mutex_enter(&rdsbf->rds_bf_lock); |
| for (rds1 = rdsbf->rds_bf_rds; rds1 != NULL; |
| rds1 = rds1->rds_bind_hash) { |
| if (lport != rds1->rds_port || |
| rds1->rds_src != sin->sin_addr.s_addr || |
| rds1->rds_zoneid != rds->rds_zoneid) |
| |
| continue; |
| break; |
| } |
| |
| if (rds1 == NULL) { |
| /* |
| * No other stream has this IP address |
| * and port number. We can use it. |
| */ |
| break; |
| } |
| mutex_exit(&rdsbf->rds_bf_lock); |
| if (requested_port != 0) { |
| /* |
| * We get here only when requested port |
| * is bound (and only first of the for() |
| * loop iteration). |
| * |
| * The semantics of this bind request |
| * require it to fail so we return from |
| * the routine (and exit the loop). |
| * |
| */ |
| rds_err_ack(q, mp, TADDRBUSY, 0); |
| return; |
| } |
| |
| port = rds_update_next_port(port + 1); |
| |
| if (++count >= loopmax) { |
| /* |
| * We've tried every possible port number and |
| * there are none available, so send an error |
| * to the user. |
| */ |
| rds_err_ack(q, mp, TNOADDR, 0); |
| return; |
| } |
| } |
| |
| /* |
| * Copy the source address into our rds structure. |
| */ |
| rds->rds_src = sin->sin_addr.s_addr; |
| rds->rds_port = lport; |
| |
| /* |
| * reset the next port if we choose the port |
| */ |
| if (requested_port == 0) { |
| rds_next_port_to_try = port + 1; |
| } |
| |
| rds->rds_state = TS_IDLE; |
| rds_bind_hash_insert(rdsbf, rds); |
| mutex_exit(&rdsbf->rds_bf_lock); |
| |
| /* Reset the message type in preparation for shipping it back. */ |
| mp->b_datap->db_type = M_PCPROTO; |
| tba = (struct T_bind_ack *)(uintptr_t)mp->b_rptr; |
| tba->PRIM_type = T_BIND_ACK; |
| |
| /* Increment the number of ports and set the port quota */ |
| RDS_INCR_NPORT(); |
| rds->rds_port_quota = RDS_CURRENT_PORT_QUOTA(); |
| RDS_SET_PORT_QUOTA(rds->rds_port_quota); |
| (void) proto_set_rx_hiwat(RD(q), NULL, |
| rds->rds_port_quota * UserBufferSize); |
| |
| qreply(q, mp); |
| } |
| |
| static void |
| rds_wput_other(queue_t *q, mblk_t *mp) |
| { |
| uchar_t *rptr = mp->b_rptr; |
| struct datab *db; |
| cred_t *cr; |
| |
| db = mp->b_datap; |
| switch (db->db_type) { |
| case M_DATA: |
| /* Not connected */ |
| freemsg(mp); |
| return; |
| case M_PROTO: |
| case M_PCPROTO: |
| if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr < |
| sizeof (t_scalar_t)) { |
| freemsg(mp); |
| return; |
| } |
| switch (((union T_primitives *)(uintptr_t)rptr)->type) { |
| case T_CAPABILITY_REQ: |
| rds_capability_req(q, mp); |
| return; |
| |
| case T_INFO_REQ: |
| rds_info_req(q, mp); |
| return; |
| case O_T_BIND_REQ: |
| case T_BIND_REQ: |
| rds_bind(q, mp); |
| return; |
| case T_SVR4_OPTMGMT_REQ: |
| case T_OPTMGMT_REQ: |
| /* |
| * All Solaris components should pass a db_credp |
| * for this TPI message, hence we ASSERT. |
| * But in case there is some other M_PROTO that looks |
| * like a TPI message sent by some other kernel |
| * component, we check and return an error. |
| */ |
| cr = msg_getcred(mp, NULL); |
| ASSERT(cr != NULL); |
| if (cr == NULL) { |
| rds_err_ack(q, mp, TSYSERR, EINVAL); |
| return; |
| } |
| if (((union T_primitives *)(uintptr_t)rptr)->type == |
| T_SVR4_OPTMGMT_REQ) { |
| (void) svr4_optcom_req(q, mp, cr, &rds_opt_obj, |
| B_FALSE); |
| } else { |
| (void) tpi_optcom_req(q, mp, cr, &rds_opt_obj, |
| B_FALSE); |
| } |
| return; |
| case T_CONN_REQ: |
| /* |
| * We should not receive T_CONN_REQ as sockfs only |
| * sends down T_CONN_REQ if family == AF_INET/AF_INET6 |
| * and type == SOCK_DGRAM/SOCK_RAW. For all others |
| * it simply calls soisconnected. see sotpi_connect() |
| * for details. |
| */ |
| /* FALLTHRU */ |
| default: |
| cmn_err(CE_PANIC, "type %d \n", |
| ((union T_primitives *)(uintptr_t)rptr)->type); |
| } |
| break; |
| case M_FLUSH: |
| if (*rptr & FLUSHW) |
| flushq(q, FLUSHDATA); |
| break; |
| case M_IOCTL: |
| rds_ioctl(q, mp); |
| break; |
| case M_IOCDATA: |
| /* IOCTL continuation following copyin or copyout. */ |
| if (mi_copy_state(q, mp, NULL) == -1) { |
| /* |
| * The copy operation failed. mi_copy_state already |
| * cleaned up, so we're out of here. |
| */ |
| return; |
| } |
| /* |
| * If we just completed a copy in, continue processing |
| * in rds_ioctl_copyin_done. If it was a copy out, we call |
| * mi_copyout again. If there is nothing more to copy out, |
| * it will complete the IOCTL. |
| */ |
| |
| if (MI_COPY_DIRECTION(mp) == MI_COPY_IN) |
| rds_ioctl_copyin_done(q, mp); |
| else |
| mi_copyout(q, mp); |
| return; |
| |
| default: |
| cmn_err(CE_PANIC, "types %d \n", db->db_type); |
| } |
| } |
| |
| static int |
| rds_wput(queue_t *q, mblk_t *mp) |
| { |
| struct datab *db; |
| uchar_t *rptr = mp->b_rptr; |
| |
| db = mp->b_datap; |
| switch (db->db_type) { |
| case M_PROTO: |
| case M_PCPROTO: |
| ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <= |
| (uintptr_t)INT_MAX); |
| if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >= |
| sizeof (struct T_unitdata_req)) { |
| if (((union T_primitives *)(uintptr_t)rptr)->type |
| == T_UNITDATA_REQ) { |
| /* |
| * We should never come here for T_UNITDATA_REQ |
| */ |
| cmn_err(CE_PANIC, "rds_wput T_UNITDATA_REQ \n"); |
| } |
| } |
| /* FALLTHRU */ |
| default: |
| rds_wput_other(q, mp); |
| return (0); |
| } |
| } |
| |
| static int |
| rds_wput_data(queue_t *q, mblk_t *mp, uio_t *uiop) |
| { |
| uchar_t *rptr = mp->b_rptr; |
| rds_t *rds; |
| mblk_t *mp1; |
| sin_t *sin; |
| ipaddr_t dst; |
| uint16_t port; |
| int ret = 0; |
| |
| #define tudr ((struct T_unitdata_req *)(uintptr_t)rptr) |
| |
| rds = (rds_t *)q->q_ptr; |
| /* Handle UNITDATA_REQ messages here */ |
| if (rds->rds_state == TS_UNBND) { |
| /* If a port has not been bound to the stream, fail. */ |
| dprint(2, ("%s: socket is not bound to a port", LABEL)); |
| freemsg(mp); |
| return (EPROTO); |
| } |
| |
| mp1 = mp->b_cont; |
| mp->b_cont = NULL; |
| if (mp1 == NULL) { |
| dprint(2, ("%s: No message to send", LABEL)); |
| freemsg(mp); |
| return (EPROTO); |
| } |
| |
| /* |
| * No options allowed |
| */ |
| if (tudr->OPT_length != 0) { |
| ret = EINVAL; |
| goto done; |
| } |
| |
| ASSERT(mp1->b_datap->db_ref == 1); |
| |
| if ((rptr + tudr->DEST_offset + tudr->DEST_length) > |
| mp->b_wptr) { |
| ret = EDESTADDRREQ; |
| goto done; |
| } |
| |
| sin = (sin_t *)(uintptr_t)&rptr[tudr->DEST_offset]; |
| if (!OK_32PTR((char *)sin) || tudr->DEST_length != |
| sizeof (sin_t) || sin->sin_family != AF_INET_OFFLOAD) { |
| ret = EDESTADDRREQ; |
| goto done; |
| } |
| /* Extract port and ipaddr */ |
| port = sin->sin_port; |
| dst = sin->sin_addr.s_addr; |
| |
| if (port == 0 || dst == INADDR_ANY) { |
| ret = EDESTADDRREQ; |
| goto done; |
| } |
| |
| ASSERT(rds_transport_ops != NULL); |
| ret = rds_transport_ops->rds_transport_sendmsg(uiop, rds->rds_src, dst, |
| ntohs(rds->rds_port), ntohs(port), rds->rds_zoneid); |
| if (ret != 0) { |
| if ((ret != ENOBUFS) && (ret != ENOMEM)) { |
| /* ENOMEM is actually EWOULDBLOCK */ |
| dprint(2, ("%s: rds_sendmsg returned %d", LABEL, ret)); |
| goto done; |
| } |
| } |
| done: |
| freemsg(mp1); |
| freemsg(mp); |
| return (ret); |
| } |
| |
| /* |
| * Make sure we dont return EINVAL and EWOULDBLOCK as it has |
| * special meanings for the synchronous streams (rwnext()). |
| * We should return ENOMEM which is changed to EWOULDBLOCK by kstrputmsg() |
| */ |
| static int |
| rds_wrw(queue_t *q, struiod_t *dp) |
| { |
| mblk_t *mp = dp->d_mp; |
| int error = 0; |
| struct datab *db; |
| uchar_t *rptr; |
| |
| db = mp->b_datap; |
| rptr = mp->b_rptr; |
| switch (db->db_type) { |
| case M_PROTO: |
| case M_PCPROTO: |
| ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <= |
| (uintptr_t)INT_MAX); |
| if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >= |
| sizeof (struct T_unitdata_req)) { |
| /* Detect valid T_UNITDATA_REQ here */ |
| if (((union T_primitives *)(uintptr_t)rptr)->type |
| == T_UNITDATA_REQ) |
| break; |
| } |
| /* FALLTHRU */ |
| default: |
| |
| if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { |
| /* |
| * Uio error of some sort, so just return the error. |
| */ |
| goto done; |
| } |
| dp->d_mp = 0; |
| rds_wput_other(q, mp); |
| return (0); |
| } |
| |
| dp->d_mp = 0; |
| error = rds_wput_data(q, mp, &dp->d_uio); |
| done: |
| if (error == EWOULDBLOCK || error == EINVAL) |
| error = EIO; |
| |
| return (error); |
| } |
| |
| static void |
| rds_rsrv(queue_t *q) |
| { |
| rds_t *rds = (rds_t *)q->q_ptr; |
| ulong_t current_port_quota; |
| |
| /* update the port quota to the current level */ |
| current_port_quota = RDS_GET_PORT_QUOTA(); |
| if (rds->rds_port_quota != current_port_quota) { |
| rds->rds_port_quota = current_port_quota; |
| (void) proto_set_rx_hiwat(q, NULL, |
| rds->rds_port_quota * UserBufferSize); |
| } |
| |
| /* No more messages in the q, unstall the socket */ |
| rds_transport_ops->rds_transport_resume_port(ntohs(rds->rds_port)); |
| } |
| |
| int |
| rds_close_transport_driver() |
| { |
| ASSERT(rds_transport_ops != NULL); |
| |
| rw_enter(&rds_transport_lock, RW_WRITER); |
| if (rds_transport_handle != NULL) { |
| rds_transport_ops->rds_transport_close_ib(); |
| (void) ldi_close(rds_transport_handle, FNDELAY, kcred); |
| rds_transport_handle = NULL; |
| } |
| rw_exit(&rds_transport_lock); |
| |
| return (0); |
| } |
| |
| |
| int |
| rds_open_transport_driver() |
| { |
| int ret = 0; |
| |
| rw_enter(&rds_transport_lock, RW_WRITER); |
| if (rds_transport_handle != NULL) { |
| /* |
| * Someone beat us to it. |
| */ |
| goto done; |
| } |
| |
| if (ibt_hw_is_present() == 0) { |
| ret = ENODEV; |
| goto done; |
| } |
| |
| if (rds_li == NULL) { |
| ret = EPROTONOSUPPORT; |
| goto done; |
| } |
| |
| ret = ldi_open_by_name("/devices/ib/rdsib@0:rdsib", |
| FREAD | FWRITE, kcred, &rds_transport_handle, rds_li); |
| if (ret != 0) { |
| ret = EPROTONOSUPPORT; |
| rds_transport_handle = NULL; |
| goto done; |
| } |
| |
| ret = rds_transport_ops->rds_transport_open_ib(); |
| if (ret != 0) { |
| (void) ldi_close(rds_transport_handle, FNDELAY, kcred); |
| rds_transport_handle = NULL; |
| } |
| done: |
| rw_exit(&rds_transport_lock); |
| return (ret); |
| } |
| |
| static struct module_info info = { |
| 0, "rds", 1, INFPSZ, 65536, 1024 |
| }; |
| |
| static struct qinit rinit = { |
| NULL, (pfi_t)rds_rsrv, rds_open, rds_close, NULL, &info |
| }; |
| |
| static struct qinit winit = { |
| (pfi_t)rds_wput, NULL, rds_open, rds_close, NULL, &info, |
| NULL, rds_wrw, NULL, STRUIOT_STANDARD |
| }; |
| |
| struct streamtab rdsinfo = { |
| &rinit, &winit, NULL, NULL |
| }; |
| |
| DDI_DEFINE_STREAM_OPS(rds_devops, nulldev, nulldev, rds_attach, rds_detach, |
| nulldev, rds_info, RDS_DEVMTFLAGS, &RDS_STRTAB, ddi_quiesce_not_supported); |
| |
| /* |
| * Module linkage information for the kernel. |
| */ |
| static struct modldrv modldrv = { |
| &mod_driverops, |
| RDS_DEVDESC, |
| &rds_devops |
| }; |
| |
| static struct modlinkage modlinkage = { |
| MODREV_1, |
| &modldrv, |
| NULL |
| }; |
| |
| int |
| _init(void) |
| { |
| int ret; |
| |
| rds_init(); |
| |
| ret = mod_install(&modlinkage); |
| if (ret != 0) |
| goto done; |
| ret = ldi_ident_from_mod(&modlinkage, &rds_li); |
| if (ret != 0) |
| rds_li = NULL; |
| done: |
| return (ret); |
| } |
| |
| int |
| _fini(void) |
| { |
| int ret; |
| |
| ret = mod_remove(&modlinkage); |
| if (ret != 0) { |
| return (ret); |
| } |
| |
| rds_fini(); |
| |
| ldi_ident_release(rds_li); |
| return (0); |
| } |
| |
| int |
| _info(struct modinfo *modinfop) |
| { |
| return (mod_info(&modlinkage, modinfop)); |
| } |