blob: c0222cc6e208457b292b4c62f49da0b53290e08c [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All Rights Reserved
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/cred.h>
#include <sys/buf.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/uio.h>
#include <sys/errno.h>
#include <sys/sysmacros.h>
#include <sys/statvfs.h>
#include <sys/kmem.h>
#include <sys/dirent.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/systeminfo.h>
#include <sys/flock.h>
#include <sys/pathname.h>
#include <sys/nbmlock.h>
#include <sys/share.h>
#include <sys/atomic.h>
#include <sys/policy.h>
#include <sys/fem.h>
#include <sys/sdt.h>
#include <rpc/types.h>
#include <rpc/auth.h>
#include <rpc/rpcsec_gss.h>
#include <rpc/svc.h>
#include <nfs/nfs.h>
#include <nfs/export.h>
#include <nfs/lm.h>
#include <nfs/nfs4.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tndb.h>
#define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
#define RFS4_LOCK_DELAY 10 /* Milliseconds */
static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
/* End of Tunables */
/*
* Used to bump the stateid4.seqid value and show changes in the stateid
*/
#define next_stateid(sp) (++(sp)->bits.chgseq)
/*
* RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
* This is used to return NFS4ERR_TOOSMALL when clients specify
* maxcount that isn't large enough to hold the smallest possible
* XDR encoded dirent.
*
* sizeof cookie (8 bytes) +
* sizeof name_len (4 bytes) +
* sizeof smallest (padded) name (4 bytes) +
* sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
* sizeof attrlist4_len (4 bytes) +
* sizeof next boolean (4 bytes)
*
* RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
* the smallest possible entry4 (assumes no attrs requested).
* sizeof nfsstat4 (4 bytes) +
* sizeof verifier4 (8 bytes) +
* sizeof entry4list bool (4 bytes) +
* sizeof entry4 (36 bytes) +
* sizeof eof bool (4 bytes)
*
* RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
* VOP_READDIR. Its value is the size of the maximum possible dirent
* for solaris. The DIRENT64_RECLEN macro returns the size of dirent
* required for a given name length. MAXNAMELEN is the maximum
* filename length allowed in Solaris. The first two DIRENT64_RECLEN()
* macros are to allow for . and .. entries -- just a minor tweak to try
* and guarantee that buffer we give to VOP_READDIR will be large enough
* to hold ., .., and the largest possible solaris dirent64.
*/
#define RFS4_MINLEN_ENTRY4 36
#define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
#define RFS4_MINLEN_RDDIR_BUF \
(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
/*
* It would be better to pad to 4 bytes since that's what XDR would do,
* but the dirents UFS gives us are already padded to 8, so just take
* what we're given. Dircount is only a hint anyway. Currently the
* solaris kernel is ASCII only, so there's no point in calling the
* UTF8 functions.
*
* dirent64: named padded to provide 8 byte struct alignment
* d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
*
* cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
*
*/
#define DIRENT64_TO_DIRCOUNT(dp) \
(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
/*
* types of label comparison
*/
#define EQUALITY_CHECK 0
#define DOMINANCE_CHECK 1
time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
u_longlong_t nfs4_srv_caller_id;
verifier4 Write4verf;
verifier4 Readdir4verf;
void rfs4_init_compound_state(struct compound_state *);
static void nullfree(caddr_t);
static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_create_free(nfs_resop4 *resop);
static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
struct svc_req *, struct compound_state *);
static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_getattr_free(nfs_resop4 *);
static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_getfh_free(nfs_resop4 *);
static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void lock_denied_free(nfs_resop4 *);
static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
struct svc_req *req, struct compound_state *cs);
static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
struct svc_req *, struct compound_state *);
static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
struct svc_req *, struct compound_state *);
static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_read_free(nfs_resop4 *);
static void rfs4_op_readdir_free(nfs_resop4 *resop);
static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_readlink_free(nfs_resop4 *);
static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
struct svc_req *, struct compound_state *);
static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
struct svc_req *, struct compound_state *);
static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
struct svc_req *req, struct compound_state *);
static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
struct compound_state *);
static void rfs4_op_secinfo_free(nfs_resop4 *);
static nfsstat4 check_open_access(uint32_t,
struct compound_state *, struct svc_req *);
nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
static int vop_shrlock(vnode_t *, int, struct shrlock *, int);
static int rfs4_shrlock(rfs4_state_t *, int);
static int rfs4_share(rfs4_state_t *);
void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
/*
* translation table for attrs
*/
struct nfs4_ntov_table {
union nfs4_attr_u *na;
uint8_t amap[NFS4_MAXNUM_ATTRS];
int attrcnt;
bool_t vfsstat;
};
static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
struct nfs4_svgetit_arg *sargp);
static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
fem_t *deleg_rdops;
fem_t *deleg_wrops;
rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
kmutex_t rfs4_servinst_lock; /* protects linked list */
int rfs4_seen_first_compound; /* set first time we see one */
#ifdef DEBUG
int rfs4_servinst_debug = 0;
#endif
/*
* NFS4 op dispatch table
*/
struct rfsv4disp {
void (*dis_proc)(); /* proc to call */
void (*dis_resfree)(); /* frees space allocated by proc */
int dis_flags; /* RPC_IDEMPOTENT, etc... */
};
static struct rfsv4disp rfsv4disptab[] = {
/*
* NFS VERSION 4
*/
/* RFS_NULL = 0 */
{rfs4_op_illegal, nullfree, 0},
/* UNUSED = 1 */
{rfs4_op_illegal, nullfree, 0},
/* UNUSED = 2 */
{rfs4_op_illegal, nullfree, 0},
/* OP_ACCESS = 3 */
{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
/* OP_CLOSE = 4 */
{rfs4_op_close, nullfree, 0},
/* OP_COMMIT = 5 */
{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
/* OP_CREATE = 6 */
{rfs4_op_create, nullfree, 0},
/* OP_DELEGPURGE = 7 */
{rfs4_op_inval, nullfree, 0},
/* OP_DELEGRETURN = 8 */
{rfs4_op_delegreturn, nullfree, 0},
/* OP_GETATTR = 9 */
{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
/* OP_GETFH = 10 */
{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
/* OP_LINK = 11 */
{rfs4_op_link, nullfree, 0},
/* OP_LOCK = 12 */
{rfs4_op_lock, lock_denied_free, 0},
/* OP_LOCKT = 13 */
{rfs4_op_lockt, lock_denied_free, 0},
/* OP_LOCKU = 14 */
{rfs4_op_locku, nullfree, 0},
/* OP_LOOKUP = 15 */
{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
/* OP_LOOKUPP = 16 */
{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
/* OP_NVERIFY = 17 */
{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
/* OP_OPEN = 18 */
{rfs4_op_open, rfs4_free_reply, 0},
/* OP_OPENATTR = 19 */
{rfs4_op_openattr, nullfree, 0},
/* OP_OPEN_CONFIRM = 20 */
{rfs4_op_open_confirm, nullfree, 0},
/* OP_OPEN_DOWNGRADE = 21 */
{rfs4_op_open_downgrade, nullfree, 0},
/* OP_OPEN_PUTFH = 22 */
{rfs4_op_putfh, nullfree, RPC_ALL},
/* OP_PUTPUBFH = 23 */
{rfs4_op_putpubfh, nullfree, RPC_ALL},
/* OP_PUTROOTFH = 24 */
{rfs4_op_putrootfh, nullfree, RPC_ALL},
/* OP_READ = 25 */
{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
/* OP_READDIR = 26 */
{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
/* OP_READLINK = 27 */
{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
/* OP_REMOVE = 28 */
{rfs4_op_remove, nullfree, 0},
/* OP_RENAME = 29 */
{rfs4_op_rename, nullfree, 0},
/* OP_RENEW = 30 */
{rfs4_op_renew, nullfree, 0},
/* OP_RESTOREFH = 31 */
{rfs4_op_restorefh, nullfree, RPC_ALL},
/* OP_SAVEFH = 32 */
{rfs4_op_savefh, nullfree, RPC_ALL},
/* OP_SECINFO = 33 */
{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
/* OP_SETATTR = 34 */
{rfs4_op_setattr, nullfree, 0},
/* OP_SETCLIENTID = 35 */
{rfs4_op_setclientid, nullfree, 0},
/* OP_SETCLIENTID_CONFIRM = 36 */
{rfs4_op_setclientid_confirm, nullfree, 0},
/* OP_VERIFY = 37 */
{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
/* OP_WRITE = 38 */
{rfs4_op_write, nullfree, 0},
/* OP_RELEASE_LOCKOWNER = 39 */
{rfs4_op_release_lockowner, nullfree, 0},
};
static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
#define OP_ILLEGAL_IDX (rfsv4disp_cnt)
#ifdef DEBUG
int rfs4_fillone_debug = 0;
int rfs4_shrlock_debug = 0;
int rfs4_no_stub_access = 1;
int rfs4_rddir_debug = 0;
static char *rfs4_op_string[] = {
"rfs4_op_null",
"rfs4_op_1 unused",
"rfs4_op_2 unused",
"rfs4_op_access",
"rfs4_op_close",
"rfs4_op_commit",
"rfs4_op_create",
"rfs4_op_delegpurge",
"rfs4_op_delegreturn",
"rfs4_op_getattr",
"rfs4_op_getfh",
"rfs4_op_link",
"rfs4_op_lock",
"rfs4_op_lockt",
"rfs4_op_locku",
"rfs4_op_lookup",
"rfs4_op_lookupp",
"rfs4_op_nverify",
"rfs4_op_open",
"rfs4_op_openattr",
"rfs4_op_open_confirm",
"rfs4_op_open_downgrade",
"rfs4_op_putfh",
"rfs4_op_putpubfh",
"rfs4_op_putrootfh",
"rfs4_op_read",
"rfs4_op_readdir",
"rfs4_op_readlink",
"rfs4_op_remove",
"rfs4_op_rename",
"rfs4_op_renew",
"rfs4_op_restorefh",
"rfs4_op_savefh",
"rfs4_op_secinfo",
"rfs4_op_setattr",
"rfs4_op_setclientid",
"rfs4_op_setclient_confirm",
"rfs4_op_verify",
"rfs4_op_write",
"rfs4_op_release_lockowner",
"rfs4_op_illegal"
};
#endif
void rfs4_ss_chkclid(rfs4_client_t *);
#ifdef nextdp
#undef nextdp
#endif
#define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
VOPNAME_OPEN, deleg_rdopen,
VOPNAME_WRITE, deleg_write,
VOPNAME_SETATTR, deleg_setattr,
VOPNAME_RWLOCK, deleg_rd_rwlock,
VOPNAME_SPACE, deleg_space,
VOPNAME_SETSECATTR, deleg_setsecattr,
VOPNAME_VNEVENT, deleg_vnevent,
NULL, NULL
};
static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
VOPNAME_OPEN, deleg_wropen,
VOPNAME_READ, deleg_read,
VOPNAME_WRITE, deleg_write,
VOPNAME_SETATTR, deleg_setattr,
VOPNAME_RWLOCK, deleg_wr_rwlock,
VOPNAME_SPACE, deleg_space,
VOPNAME_SETSECATTR, deleg_setsecattr,
VOPNAME_VNEVENT, deleg_vnevent,
NULL, NULL
};
int
rfs4_srvrinit(void)
{
timespec32_t verf;
int error;
extern void rfs4_attr_init();
extern krwlock_t rfs4_deleg_policy_lock;
/*
* The following algorithm attempts to find a unique verifier
* to be used as the write verifier returned from the server
* to the client. It is important that this verifier change
* whenever the server reboots. Of secondary importance, it
* is important for the verifier to be unique between two
* different servers.
*
* Thus, an attempt is made to use the system hostid and the
* current time in seconds when the nfssrv kernel module is
* loaded. It is assumed that an NFS server will not be able
* to boot and then to reboot in less than a second. If the
* hostid has not been set, then the current high resolution
* time is used. This will ensure different verifiers each
* time the server reboots and minimize the chances that two
* different servers will have the same verifier.
* XXX - this is broken on LP64 kernels.
*/
verf.tv_sec = (time_t)nfs_atoi(hw_serial);
if (verf.tv_sec != 0) {
verf.tv_nsec = gethrestime_sec();
} else {
timespec_t tverf;
gethrestime(&tverf);
verf.tv_sec = (time_t)tverf.tv_sec;
verf.tv_nsec = tverf.tv_nsec;
}
Write4verf = *(uint64_t *)&verf;
rfs4_attr_init();
mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
/* Used to manage create/destroy of server state */
mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
/* Used to manage access to server instance linked list */
mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
/* Used to manage access to rfs4_deleg_policy */
rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
if (error != 0) {
rfs4_disable_delegation();
} else {
error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
&deleg_wrops);
if (error != 0) {
rfs4_disable_delegation();
fem_free(deleg_rdops);
}
}
nfs4_srv_caller_id = fs_new_caller_id();
lockt_sysid = lm_alloc_sysidt();
return (0);
}
void
rfs4_srvrfini(void)
{
extern krwlock_t rfs4_deleg_policy_lock;
if (lockt_sysid != LM_NOSYSID) {
lm_free_sysidt(lockt_sysid);
lockt_sysid = LM_NOSYSID;
}
mutex_destroy(&rfs4_deleg_lock);
mutex_destroy(&rfs4_state_lock);
rw_destroy(&rfs4_deleg_policy_lock);
fem_free(deleg_rdops);
fem_free(deleg_wrops);
}
void
rfs4_init_compound_state(struct compound_state *cs)
{
bzero(cs, sizeof (*cs));
cs->cont = TRUE;
cs->access = CS_ACCESS_DENIED;
cs->deleg = FALSE;
cs->mandlock = FALSE;
cs->fh.nfs_fh4_val = cs->fhbuf;
}
void
rfs4_grace_start(rfs4_servinst_t *sip)
{
time_t now = gethrestime_sec();
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
rw_enter(&sip->rwlock, RW_WRITER);
sip->start_time = now;
sip->grace_period = rfs4_grace_period;
rw_exit(&sip->rwlock);
}
/*
* returns true if the instance's grace period has never been started
*/
int
rfs4_servinst_grace_new(rfs4_servinst_t *sip)
{
time_t start_time;
rw_enter(&sip->rwlock, RW_READER);
start_time = sip->start_time;
rw_exit(&sip->rwlock);
return (start_time == 0);
}
/*
* Indicates if server instance is within the
* grace period.
*/
int
rfs4_servinst_in_grace(rfs4_servinst_t *sip)
{
time_t grace_expiry;
rw_enter(&sip->rwlock, RW_READER);
grace_expiry = sip->start_time + sip->grace_period;
rw_exit(&sip->rwlock);
return (gethrestime_sec() < grace_expiry);
}
int
rfs4_clnt_in_grace(rfs4_client_t *cp)
{
ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
return (rfs4_servinst_in_grace(cp->server_instance));
}
/*
* reset all currently active grace periods
*/
void
rfs4_grace_reset_all(void)
{
#ifdef DEBUG
int n = 0;
#endif
rfs4_servinst_t *sip;
mutex_enter(&rfs4_servinst_lock);
for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
if (rfs4_servinst_in_grace(sip)) {
rfs4_grace_start(sip);
#ifdef DEBUG
n++;
#endif
}
}
mutex_exit(&rfs4_servinst_lock);
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_grace_reset_all: reset %d instances", n));
}
/*
* start any new instances' grace periods
*/
void
rfs4_grace_start_new(void)
{
#ifdef DEBUG
int n = 0;
#endif
rfs4_servinst_t *sip;
mutex_enter(&rfs4_servinst_lock);
for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
if (rfs4_servinst_grace_new(sip))
rfs4_grace_start(sip);
#ifdef DEBUG
n++;
#endif
}
mutex_exit(&rfs4_servinst_lock);
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_grace_start_new: started %d new instances", n));
}
/*
* Create a new server instance, and make it the currently active instance.
* Note that starting the grace period too early will reduce the clients'
* recovery window.
*/
void
rfs4_servinst_create(int start_grace)
{
rfs4_servinst_t *sip;
sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
sip->start_time = (time_t)0;
sip->grace_period = (time_t)0;
sip->next = NULL;
sip->prev = NULL;
mutex_enter(&rfs4_servinst_lock);
if (rfs4_cur_servinst == NULL) {
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_servinst_create: creating first instance"));
} else {
/* add to linked list */
sip->prev = rfs4_cur_servinst;
rfs4_cur_servinst->next = sip;
}
if (start_grace)
rfs4_grace_start(sip);
/* make the new instance "current" */
rfs4_cur_servinst = sip;
mutex_exit(&rfs4_servinst_lock);
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_servinst_create: new current instance: %p; start_grace: %d",
(void *)sip, start_grace));
}
/*
* In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
* all instances directly.
*/
void
rfs4_servinst_destroy_all(void)
{
rfs4_servinst_t *sip, *prev, *current;
#ifdef DEBUG
int n = 0;
#endif
mutex_enter(&rfs4_servinst_lock);
ASSERT(rfs4_cur_servinst != NULL);
current = rfs4_cur_servinst;
rfs4_cur_servinst = NULL;
for (sip = current; sip != NULL; sip = prev) {
prev = sip->prev;
rw_destroy(&sip->rwlock);
kmem_free(sip, sizeof (rfs4_servinst_t));
#ifdef DEBUG
n++;
#endif
}
mutex_exit(&rfs4_servinst_lock);
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_servinst_destroy_all: destroyed %d instances", n));
}
/*
* Assign the current server instance to a client_t.
* Should be called with cp->dbe held.
*/
void
rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
{
ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
"rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
(void *)cp->server_instance, (void *)sip));
/*
* The lock ensures that if the current instance is in the process
* of changing, we will see the new one.
*/
mutex_enter(&rfs4_servinst_lock);
cp->server_instance = sip;
mutex_exit(&rfs4_servinst_lock);
}
rfs4_servinst_t *
rfs4_servinst(rfs4_client_t *cp)
{
ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
return (cp->server_instance);
}
/* ARGSUSED */
static void
nullfree(caddr_t resop)
{
}
/*
* This is a fall-through for invalid or not implemented (yet) ops
*/
/* ARGSUSED */
static void
rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
}
/*
* Check if the security flavor, nfsnum, is in the flavor_list.
*/
bool_t
in_flavor_list(int nfsnum, int *flavor_list, int count)
{
int i;
for (i = 0; i < count; i++) {
if (nfsnum == flavor_list[i])
return (TRUE);
}
return (FALSE);
}
/*
* Used by rfs4_op_secinfo to get the security information from the
* export structure associated with the component.
*/
/* ARGSUSED */
static nfsstat4
do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
{
int error, different_export = 0;
vnode_t *dvp, *vp, *tvp;
struct exportinfo *exi = NULL;
fid_t fid;
uint_t count, i;
secinfo4 *resok_val;
struct secinfo *secp;
bool_t did_traverse;
int dotdot, walk;
dvp = cs->vp;
dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
/*
* If dotdotting, then need to check whether it's above the
* root of a filesystem, or above an export point.
*/
if (dotdot) {
/*
* If dotdotting at the root of a filesystem, then
* need to traverse back to the mounted-on filesystem
* and do the dotdot lookup there.
*/
if (cs->vp->v_flag & VROOT) {
/*
* If at the system root, then can
* go up no further.
*/
if (VN_CMP(dvp, rootdir))
return (puterrno4(ENOENT));
/*
* Traverse back to the mounted-on filesystem
*/
dvp = untraverse(cs->vp);
/*
* Set the different_export flag so we remember
* to pick up a new exportinfo entry for
* this new filesystem.
*/
different_export = 1;
} else {
/*
* If dotdotting above an export point then set
* the different_export to get new export info.
*/
different_export = nfs_exported(cs->exi, cs->vp);
}
}
/*
* Get the vnode for the component "nm".
*/
error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
if (error)
return (puterrno4(error));
/*
* If the vnode is in a pseudo filesystem, or if the security flavor
* used in the request is valid but not an explicitly shared flavor,
* or the access bit indicates that this is a limited access,
* check whether this vnode is visible.
*/
if (!different_export &&
(PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
cs->access & CS_ACCESS_LIMITED)) {
if (! nfs_visible(cs->exi, vp, &different_export)) {
VN_RELE(vp);
return (puterrno4(ENOENT));
}
}
/*
* If it's a mountpoint, then traverse it.
*/
if (vn_ismntpt(vp)) {
tvp = vp;
if ((error = traverse(&tvp)) != 0) {
VN_RELE(vp);
return (puterrno4(error));
}
/* remember that we had to traverse mountpoint */
did_traverse = TRUE;
vp = tvp;
different_export = 1;
} else if (vp->v_vfsp != dvp->v_vfsp) {
/*
* If vp isn't a mountpoint and the vfs ptrs aren't the same,
* then vp is probably an LOFS object. We don't need the
* realvp, we just need to know that we might have crossed
* a server fs boundary and need to call checkexport4.
* (LOFS lookup hides server fs mountpoints, and actually calls
* traverse)
*/
different_export = 1;
did_traverse = FALSE;
}
/*
* Get the export information for it.
*/
if (different_export) {
bzero(&fid, sizeof (fid));
fid.fid_len = MAXFIDSZ;
error = vop_fid_pseudo(vp, &fid);
if (error) {
VN_RELE(vp);
return (puterrno4(error));
}
if (dotdot)
exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
else
exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
if (exi == NULL) {
if (did_traverse == TRUE) {
/*
* If this vnode is a mounted-on vnode,
* but the mounted-on file system is not
* exported, send back the secinfo for
* the exported node that the mounted-on
* vnode lives in.
*/
exi = cs->exi;
} else {
VN_RELE(vp);
return (puterrno4(EACCES));
}
}
} else {
exi = cs->exi;
}
ASSERT(exi != NULL);
/*
* Create the secinfo result based on the security information
* from the exportinfo structure (exi).
*
* Return all flavors for a pseudo node.
* For a real export node, return the flavor that the client
* has access with.
*/
ASSERT(RW_LOCK_HELD(&exported_lock));
if (PSEUDO(exi)) {
count = exi->exi_export.ex_seccnt; /* total sec count */
resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
secp = exi->exi_export.ex_secinfo;
for (i = 0; i < count; i++) {
resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
if (resok_val[i].flavor == RPCSEC_GSS) {
rpcsec_gss_info *info;
info = &resok_val[i].flavor_info;
info->qop = secp[i].s_secinfo.sc_qop;
info->service =
(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
/* get oid opaque data */
info->oid.sec_oid4_len =
secp[i].s_secinfo.sc_gss_mech_type->length;
info->oid.sec_oid4_val =
kmem_alloc(
secp[i].s_secinfo.sc_gss_mech_type->length,
KM_SLEEP);
bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
info->oid.sec_oid4_val, info->oid.sec_oid4_len);
}
}
resp->SECINFO4resok_len = count;
resp->SECINFO4resok_val = resok_val;
} else {
int ret_cnt = 0, k = 0;
int *flavor_list;
count = exi->exi_export.ex_seccnt; /* total sec count */
secp = exi->exi_export.ex_secinfo;
flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
/* find out which flavors to return */
for (i = 0; i < count; i ++) {
int access, flavor, perm;
flavor = secp[i].s_secinfo.sc_nfsnum;
perm = secp[i].s_flags;
access = nfsauth4_secinfo_access(exi, cs->req,
flavor, perm);
if (! (access & NFSAUTH_DENIED) &&
! (access & NFSAUTH_WRONGSEC)) {
flavor_list[ret_cnt] = flavor;
ret_cnt++;
}
}
/* Create the returning SECINFO value */
resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
for (i = 0; i < count; i++) {
/* If the flavor is in the flavor list, fill in resok_val. */
if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
flavor_list, ret_cnt)) {
resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
if (resok_val[k].flavor == RPCSEC_GSS) {
rpcsec_gss_info *info;
info = &resok_val[k].flavor_info;
info->qop = secp[i].s_secinfo.sc_qop;
info->service =
(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
/* get oid opaque data */
info->oid.sec_oid4_len =
secp[i].s_secinfo.sc_gss_mech_type->length;
info->oid.sec_oid4_val =
kmem_alloc(
secp[i].s_secinfo.sc_gss_mech_type->length,
KM_SLEEP);
bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
info->oid.sec_oid4_val, info->oid.sec_oid4_len);
}
k++;
}
if (k >= ret_cnt)
break;
}
resp->SECINFO4resok_len = ret_cnt;
resp->SECINFO4resok_val = resok_val;
kmem_free(flavor_list, count * sizeof (int));
}
VN_RELE(vp);
return (NFS4_OK);
}
/*
* SECINFO (Operation 33): Obtain required security information on
* the component name in the format of (security-mechanism-oid, qop, service)
* triplets.
*/
/* ARGSUSED */
static void
rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
uint_t len;
char *nm;
/*
* Current file handle (cfh) should have been set before getting
* into this function. If not, return error.
*/
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
if (cs->vp->v_type != VDIR) {
*cs->statusp = resp->status = NFS4ERR_NOTDIR;
return;
}
/*
* Verify the component name. If failed, error out, but
* do not error out if the component name is a "..".
* SECINFO will return its parents secinfo data for SECINFO "..".
*/
if (!utf8_dir_verify(utfnm)) {
if (utfnm->utf8string_len != 2 ||
utfnm->utf8string_val[0] != '.' ||
utfnm->utf8string_val[1] != '.') {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
}
nm = utf8_to_str(utfnm, &len, NULL);
if (nm == NULL) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
if (len > MAXNAMELEN) {
*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
kmem_free(nm, len);
return;
}
*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
kmem_free(nm, len);
}
/*
* Free SECINFO result.
*/
/* ARGSUSED */
static void
rfs4_op_secinfo_free(nfs_resop4 *resop)
{
SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
int count, i;
secinfo4 *resok_val;
/* If this is not an Ok result, nothing to free. */
if (resp->status != NFS4_OK) {
return;
}
count = resp->SECINFO4resok_len;
resok_val = resp->SECINFO4resok_val;
for (i = 0; i < count; i++) {
if (resok_val[i].flavor == RPCSEC_GSS) {
rpcsec_gss_info *info;
info = &resok_val[i].flavor_info;
kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
}
}
kmem_free(resok_val, count * sizeof (secinfo4));
resp->SECINFO4resok_len = 0;
resp->SECINFO4resok_val = NULL;
}
/*
* do label check on client label and server's file lable.
*/
static boolean_t
do_rfs4_label_check(bslabel_t *clabel, vnode_t *vp, int flag)
{
bslabel_t *slabel;
ts_label_t *tslabel;
boolean_t result;
if ((tslabel = nfs4_getflabel(vp)) == NULL) {
return (B_FALSE);
}
slabel = label2bslabel(tslabel);
DTRACE_PROBE4(tx__rfs4__log__info__labelcheck, char *,
"comparing server's file label(1) with client label(2) (vp(3))",
bslabel_t *, slabel, bslabel_t *, clabel, vnode_t *, vp);
if (flag == EQUALITY_CHECK)
result = blequal(clabel, slabel);
else
result = bldominates(clabel, slabel);
label_rele(tslabel);
return (result);
}
/* ARGSUSED */
static void
rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
int error;
vnode_t *vp;
struct vattr va;
int checkwriteperm;
cred_t *cr = cs->cr;
bslabel_t *clabel, *slabel;
ts_label_t *tslabel;
boolean_t admin_low_client;
#if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
if (cs->access == CS_ACCESS_DENIED) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
#endif
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
ASSERT(cr != NULL);
vp = cs->vp;
/*
* If the file system is exported read only, it is not appropriate
* to check write permissions for regular files and directories.
* Special files are interpreted by the client, so the underlying
* permissions are sent back to the client for interpretation.
*/
if (rdonly4(cs->exi, cs->vp, req) &&
(vp->v_type == VREG || vp->v_type == VDIR))
checkwriteperm = 0;
else
checkwriteperm = 1;
/*
* XXX
* We need the mode so that we can correctly determine access
* permissions relative to a mandatory lock file. Access to
* mandatory lock files is denied on the server, so it might
* as well be reflected to the server during the open.
*/
va.va_mask = AT_MODE;
error = VOP_GETATTR(vp, &va, 0, cr);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
return;
}
resp->access = 0;
resp->supported = 0;
if (is_system_labeled()) {
ASSERT(req->rq_label != NULL);
clabel = req->rq_label;
DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
"got client label from request(1)",
struct svc_req *, req);
if (!blequal(&l_admin_low->tsl_label, clabel)) {
if ((tslabel = nfs4_getflabel(vp)) == NULL) {
*cs->statusp = resp->status = puterrno4(EACCES);
return;
}
slabel = label2bslabel(tslabel);
DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
char *, "got server label(1) for vp(2)",
bslabel_t *, slabel, vnode_t *, vp);
admin_low_client = B_FALSE;
} else
admin_low_client = B_TRUE;
}
if (args->access & ACCESS4_READ) {
error = VOP_ACCESS(vp, VREAD, 0, cr);
if (!error && !MANDLOCK(vp, va.va_mode) &&
(!is_system_labeled() || admin_low_client ||
bldominates(clabel, slabel)))
resp->access |= ACCESS4_READ;
resp->supported |= ACCESS4_READ;
}
if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
error = VOP_ACCESS(vp, VEXEC, 0, cr);
if (!error && (!is_system_labeled() || admin_low_client ||
bldominates(clabel, slabel)))
resp->access |= ACCESS4_LOOKUP;
resp->supported |= ACCESS4_LOOKUP;
}
if (checkwriteperm &&
(args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
error = VOP_ACCESS(vp, VWRITE, 0, cr);
if (!error && !MANDLOCK(vp, va.va_mode) &&
(!is_system_labeled() || admin_low_client ||
blequal(clabel, slabel)))
resp->access |=
(args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
}
if (checkwriteperm &&
(args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
error = VOP_ACCESS(vp, VWRITE, 0, cr);
if (!error && (!is_system_labeled() || admin_low_client ||
blequal(clabel, slabel)))
resp->access |= ACCESS4_DELETE;
resp->supported |= ACCESS4_DELETE;
}
if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
error = VOP_ACCESS(vp, VEXEC, 0, cr);
if (!error && !MANDLOCK(vp, va.va_mode) &&
(!is_system_labeled() || admin_low_client ||
bldominates(clabel, slabel)))
resp->access |= ACCESS4_EXECUTE;
resp->supported |= ACCESS4_EXECUTE;
}
if (is_system_labeled() && !admin_low_client)
label_rele(tslabel);
*cs->statusp = resp->status = NFS4_OK;
}
/* ARGSUSED */
static void
rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
int error;
vnode_t *vp = cs->vp;
cred_t *cr = cs->cr;
vattr_t va;
if (vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
if (cs->access == CS_ACCESS_DENIED) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
if (args->offset + args->count < args->offset) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
va.va_mask = AT_UID;
error = VOP_GETATTR(vp, &va, 0, cr);
/*
* If we can't get the attributes, then we can't do the
* right access checking. So, we'll fail the request.
*/
if (error) {
*cs->statusp = resp->status = puterrno4(error);
return;
}
if (rdonly4(cs->exi, cs->vp, req)) {
*cs->statusp = resp->status = NFS4ERR_ROFS;
return;
}
if (vp->v_type != VREG) {
if (vp->v_type == VDIR)
resp->status = NFS4ERR_ISDIR;
else
resp->status = NFS4ERR_INVAL;
*cs->statusp = resp->status;
return;
}
if (crgetuid(cr) != va.va_uid &&
(error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
*cs->statusp = resp->status = puterrno4(error);
return;
}
error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
if (!error)
error = VOP_FSYNC(vp, FNODSYNC, cr);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
return;
}
*cs->statusp = resp->status = NFS4_OK;
resp->writeverf = Write4verf;
}
/*
* rfs4_op_mknod is called from rfs4_op_create after all initial verification
* was completed. It does the nfsv4 create for special files.
*/
/* ARGSUSED */
static vnode_t *
do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
struct compound_state *cs, vattr_t *vap, char *nm)
{
int error;
cred_t *cr = cs->cr;
vnode_t *dvp = cs->vp;
vnode_t *vp = NULL;
int mode;
enum vcexcl excl;
switch (args->type) {
case NF4CHR:
case NF4BLK:
if (secpolicy_sys_devices(cr) != 0) {
*cs->statusp = resp->status = NFS4ERR_PERM;
return (NULL);
}
if (args->type == NF4CHR)
vap->va_type = VCHR;
else
vap->va_type = VBLK;
vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
args->ftype4_u.devdata.specdata2);
vap->va_mask |= AT_RDEV;
break;
case NF4SOCK:
vap->va_type = VSOCK;
break;
case NF4FIFO:
vap->va_type = VFIFO;
break;
default:
*cs->statusp = resp->status = NFS4ERR_BADTYPE;
return (NULL);
}
/*
* Must specify the mode.
*/
if (!(vap->va_mask & AT_MODE)) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return (NULL);
}
excl = EXCL;
mode = 0;
error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
return (NULL);
}
return (vp);
}
/*
* nfsv4 create is used to create non-regular files. For regular files,
* use nfsv4 open.
*/
/* ARGSUSED */
static void
rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
CREATE4args *args = &argop->nfs_argop4_u.opcreate;
CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
int error;
struct vattr bva, iva, iva2, ava, *vap;
cred_t *cr = cs->cr;
vnode_t *dvp = cs->vp;
vnode_t *vp = NULL;
char *nm, *lnm;
uint_t len, llen;
int syncval = 0;
struct nfs4_svgetit_arg sarg;
struct nfs4_ntov_table ntov;
struct statvfs64 sb;
nfsstat4 status;
resp->attrset = 0;
if (dvp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
/*
* If there is an unshared filesystem mounted on this vnode,
* do not allow to create an object in this directory.
*/
if (vn_ismntpt(dvp)) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
/* Verify that type is correct */
switch (args->type) {
case NF4LNK:
case NF4BLK:
case NF4CHR:
case NF4SOCK:
case NF4FIFO:
case NF4DIR:
break;
default:
*cs->statusp = resp->status = NFS4ERR_BADTYPE;
return;
};
if (cs->access == CS_ACCESS_DENIED) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
if (dvp->v_type != VDIR) {
*cs->statusp = resp->status = NFS4ERR_NOTDIR;
return;
}
if (!utf8_dir_verify(&args->objname)) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
if (rdonly4(cs->exi, cs->vp, req)) {
*cs->statusp = resp->status = NFS4ERR_ROFS;
return;
}
/*
* Name of newly created object
*/
nm = utf8_to_fn(&args->objname, &len, NULL);
if (nm == NULL) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
if (len > MAXNAMELEN) {
*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
kmem_free(nm, len);
return;
}
resp->attrset = 0;
sarg.sbp = &sb;
nfs4_ntov_table_init(&ntov);
status = do_rfs4_set_attrs(&resp->attrset,
&args->createattrs, cs, &sarg,
&ntov, NFS4ATTR_SETIT);
if (sarg.vap->va_mask == 0 && status == NFS4_OK)
status = NFS4ERR_INVAL;
if (status != NFS4_OK) {
*cs->statusp = resp->status = status;
kmem_free(nm, len);
nfs4_ntov_table_free(&ntov, &sarg);
resp->attrset = 0;
return;
}
/* Get "before" change value */
bva.va_mask = AT_CTIME|AT_SEQ;
error = VOP_GETATTR(dvp, &bva, 0, cr);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
kmem_free(nm, len);
nfs4_ntov_table_free(&ntov, &sarg);
resp->attrset = 0;
return;
}
NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
vap = sarg.vap;
/*
* Set default initial values for attributes when not specified
* in createattrs.
*/
if ((vap->va_mask & AT_UID) == 0) {
vap->va_uid = crgetuid(cr);
vap->va_mask |= AT_UID;
}
if ((vap->va_mask & AT_GID) == 0) {
vap->va_gid = crgetgid(cr);
vap->va_mask |= AT_GID;
}
vap->va_mask |= AT_TYPE;
switch (args->type) {
case NF4DIR:
vap->va_type = VDIR;
if ((vap->va_mask & AT_MODE) == 0) {
vap->va_mode = 0700; /* default: owner rwx only */
vap->va_mask |= AT_MODE;
}
error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
if (error)
break;
/*
* Get the initial "after" sequence number, if it fails,
* set to zero
*/
iva.va_mask = AT_SEQ;
if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
iva.va_seq = 0;
break;
case NF4LNK:
vap->va_type = VLNK;
if ((vap->va_mask & AT_MODE) == 0) {
vap->va_mode = 0700; /* default: owner rwx only */
vap->va_mask |= AT_MODE;
}
/*
* symlink names must be treated as data
*/
lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
if (lnm == NULL) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
kmem_free(nm, len);
nfs4_ntov_table_free(&ntov, &sarg);
resp->attrset = 0;
return;
}
if (llen > MAXPATHLEN) {
*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
kmem_free(nm, len);
kmem_free(lnm, llen);
nfs4_ntov_table_free(&ntov, &sarg);
resp->attrset = 0;
return;
}
error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
if (lnm != NULL)
kmem_free(lnm, llen);
if (error)
break;
/*
* Get the initial "after" sequence number, if it fails,
* set to zero
*/
iva.va_mask = AT_SEQ;
if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
iva.va_seq = 0;
error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
if (error)
break;
/*
* va_seq is not safe over VOP calls, check it again
* if it has changed zero out iva to force atomic = FALSE.
*/
iva2.va_mask = AT_SEQ;
if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
iva2.va_seq != iva.va_seq)
iva.va_seq = 0;
break;
default:
/*
* probably a special file.
*/
if ((vap->va_mask & AT_MODE) == 0) {
vap->va_mode = 0600; /* default: owner rw only */
vap->va_mask |= AT_MODE;
}
syncval = FNODSYNC;
/*
* We know this will only generate one VOP call
*/
vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
if (vp == NULL) {
kmem_free(nm, len);
nfs4_ntov_table_free(&ntov, &sarg);
resp->attrset = 0;
return;
}
/*
* Get the initial "after" sequence number, if it fails,
* set to zero
*/
iva.va_mask = AT_SEQ;
if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
iva.va_seq = 0;
break;
}
kmem_free(nm, len);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
}
/*
* Force modified data and metadata out to stable storage.
*/
(void) VOP_FSYNC(dvp, 0, cr);
if (resp->status != NFS4_OK) {
if (vp != NULL)
VN_RELE(vp);
nfs4_ntov_table_free(&ntov, &sarg);
resp->attrset = 0;
return;
}
/*
* Finish setup of cinfo response, "before" value already set.
* Get "after" change value, if it fails, simply return the
* before value.
*/
ava.va_mask = AT_CTIME|AT_SEQ;
if (VOP_GETATTR(dvp, &ava, 0, cr)) {
ava.va_ctime = bva.va_ctime;
ava.va_seq = 0;
}
NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
/*
* True verification that object was created with correct
* attrs is impossible. The attrs could have been changed
* immediately after object creation. If attributes did
* not verify, the only recourse for the server is to
* destroy the object. Maybe if some attrs (like gid)
* are set incorrectly, the object should be destroyed;
* however, seems bad as a default policy. Do we really
* want to destroy an object over one of the times not
* verifying correctly? For these reasons, the server
* currently sets bits in attrset for createattrs
* that were set; however, no verification is done.
*
* vmask_to_nmask accounts for vattr bits set on create
* [do_rfs4_set_attrs() only sets resp bits for
* non-vattr/vfs bits.]
* Mask off any bits set by default so as not to return
* more attrset bits than were requested in createattrs
*/
nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
resp->attrset &= args->createattrs.attrmask;
nfs4_ntov_table_free(&ntov, &sarg);
error = makefh4(&cs->fh, vp, cs->exi);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
}
/*
* The cinfo.atomic = TRUE only if we got no errors, we have
* non-zero va_seq's, and it has incremented by exactly one
* during the creation and it didn't change during the VOP_LOOKUP
* or VOP_FSYNC.
*/
if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
iva.va_seq == (bva.va_seq + 1) &&
iva.va_seq == ava.va_seq)
resp->cinfo.atomic = TRUE;
else
resp->cinfo.atomic = FALSE;
(void) VOP_FSYNC(vp, syncval, cr);
if (resp->status != NFS4_OK) {
VN_RELE(vp);
return;
}
if (cs->vp)
VN_RELE(cs->vp);
cs->vp = vp;
*cs->statusp = resp->status = NFS4_OK;
}
/*ARGSUSED*/
static void
rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
rfs4_deleg_state_t *dsp;
nfsstat4 status;
status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
resp->status = *cs->statusp = status;
if (status != NFS4_OK)
return;
/* Ensure specified filehandle matches */
if (cs->vp != dsp->finfo->vp) {
resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
} else
rfs4_return_deleg(dsp, FALSE);
rfs4_update_lease(dsp->client);
rfs4_deleg_state_rele(dsp);
}
/*
* Check to see if a given "flavor" is an explicitly shared flavor.
* The assumption of this routine is the "flavor" is already a valid
* flavor in the secinfo list of "exi".
*
* e.g.
* # share -o sec=flavor1 /export
* # share -o sec=flavor2 /export/home
*
* flavor2 is not an explicitly shared flavor for /export,
* however it is in the secinfo list for /export thru the
* server namespace setup.
*/
int
is_exported_sec(int flavor, struct exportinfo *exi)
{
int i;
struct secinfo *sp;
sp = exi->exi_export.ex_secinfo;
for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
if (flavor == sp[i].s_secinfo.sc_nfsnum ||
sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
return (SEC_REF_EXPORTED(&sp[i]));
}
}
/* Should not reach this point based on the assumption */
return (0);
}
/*
* Check if the security flavor used in the request matches what is
* required at the export point or at the root pseudo node (exi_root).
*
* returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
*
*/
static int
secinfo_match_or_authnone(struct compound_state *cs)
{
int i;
struct secinfo *sp;
/*
* Check cs->nfsflavor (from the request) against
* the current export data in cs->exi.
*/
sp = cs->exi->exi_export.ex_secinfo;
for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
return (1);
}
return (0);
}
/*
* Check the access authority for the client and return the correct error.
*/
nfsstat4
call_checkauth4(struct compound_state *cs, struct svc_req *req)
{
int authres;
/*
* First, check if the security flavor used in the request
* are among the flavors set in the server namespace.
*/
if (!secinfo_match_or_authnone(cs)) {
*cs->statusp = NFS4ERR_WRONGSEC;
return (*cs->statusp);
}
authres = checkauth4(cs, req);
if (authres > 0) {
*cs->statusp = NFS4_OK;
if (! (cs->access & CS_ACCESS_LIMITED))
cs->access = CS_ACCESS_OK;
} else if (authres == 0) {
*cs->statusp = NFS4ERR_ACCESS;
} else if (authres == -2) {
*cs->statusp = NFS4ERR_WRONGSEC;
} else {
*cs->statusp = NFS4ERR_DELAY;
}
return (*cs->statusp);
}
/*
* bitmap4_to_attrmask is called by getattr and readdir.
* It sets up the vattr mask and determines whether vfsstat call is needed
* based on the input bitmap.
* Returns nfsv4 status.
*/
static nfsstat4
bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
{
int i;
uint_t va_mask;
struct statvfs64 *sbp = sargp->sbp;
sargp->sbp = NULL;
sargp->flag = 0;
sargp->rdattr_error = NFS4_OK;
sargp->mntdfid_set = FALSE;
if (sargp->cs->vp)
sargp->xattr = get_fh4_flag(&sargp->cs->fh,
FH4_ATTRDIR | FH4_NAMEDATTR);
else
sargp->xattr = 0;
/*
* Set rdattr_error_req to true if return error per
* failed entry rather than fail the readdir.
*/
if (breq & FATTR4_RDATTR_ERROR_MASK)
sargp->rdattr_error_req = 1;
else
sargp->rdattr_error_req = 0;
/*
* generate the va_mask
* Handle the easy cases first
*/
switch (breq) {
case NFS4_NTOV_ATTR_MASK:
sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
return (NFS4_OK);
case NFS4_FS_ATTR_MASK:
sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
sargp->sbp = sbp;
return (NFS4_OK);
case NFS4_NTOV_ATTR_CACHE_MASK:
sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
return (NFS4_OK);
case FATTR4_LEASE_TIME_MASK:
sargp->vap->va_mask = 0;
return (NFS4_OK);
default:
va_mask = 0;
for (i = 0; i < nfs4_ntov_map_size; i++) {
if ((breq & nfs4_ntov_map[i].fbit) &&
nfs4_ntov_map[i].vbit)
va_mask |= nfs4_ntov_map[i].vbit;
}
/*
* Check is vfsstat is needed
*/
if (breq & NFS4_FS_ATTR_MASK)
sargp->sbp = sbp;
sargp->vap->va_mask = va_mask;
return (NFS4_OK);
}
/* NOTREACHED */
}
/*
* bitmap4_get_sysattrs is called by getattr and readdir.
* It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
* Returns nfsv4 status.
*/
static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
{
int error;
struct compound_state *cs = sargp->cs;
vnode_t *vp = cs->vp;
if (sargp->sbp != NULL) {
if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
sargp->sbp = NULL; /* to identify error */
return (puterrno4(error));
}
}
return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
}
static void
nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
{
ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
KM_SLEEP);
ntovp->attrcnt = 0;
ntovp->vfsstat = FALSE;
}
static void
nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
struct nfs4_svgetit_arg *sargp)
{
int i;
union nfs4_attr_u *na;
uint8_t *amap;
/*
* XXX Should do the same checks for whether the bit is set
*/
for (i = 0, na = ntovp->na, amap = ntovp->amap;
i < ntovp->attrcnt; i++, na++, amap++) {
(void) (*nfs4_ntov_map[*amap].sv_getit)(
NFS4ATTR_FREEIT, sargp, na);
}
if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
/*
* xdr_free for getattr will be done later
*/
for (i = 0, na = ntovp->na, amap = ntovp->amap;
i < ntovp->attrcnt; i++, na++, amap++) {
xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
}
}
kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
}
/*
* do_rfs4_op_getattr gets the system attrs and converts into fattr4.
*/
static nfsstat4
do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
struct nfs4_svgetit_arg *sargp)
{
int error = 0;
int i, k;
struct nfs4_ntov_table ntov;
XDR xdr;
ulong_t xdr_size;
char *xdr_attrs;
nfsstat4 status = NFS4_OK;
nfsstat4 prev_rdattr_error = sargp->rdattr_error;
union nfs4_attr_u *na;
uint8_t *amap;
sargp->op = NFS4ATTR_GETIT;
sargp->flag = 0;
fattrp->attrmask = 0;
/* if no bits requested, then return empty fattr4 */
if (breq == 0) {
fattrp->attrlist4_len = 0;
fattrp->attrlist4 = NULL;
return (NFS4_OK);
}
/*
* return NFS4ERR_INVAL when client requests write-only attrs
*/
if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
return (NFS4ERR_INVAL);
nfs4_ntov_table_init(&ntov);
na = ntov.na;
amap = ntov.amap;
/*
* Now loop to get or verify the attrs
*/
for (i = 0; i < nfs4_ntov_map_size; i++) {
if (breq & nfs4_ntov_map[i].fbit) {
if ((*nfs4_ntov_map[i].sv_getit)(
NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
error = (*nfs4_ntov_map[i].sv_getit)(
NFS4ATTR_GETIT, sargp, na);
/*
* Possible error values:
* >0 if sv_getit failed to
* get the attr; 0 if succeeded;
* <0 if rdattr_error and the
* attribute cannot be returned.
*/
if (error && !(sargp->rdattr_error_req))
goto done;
/*
* If error then just for entry
*/
if (error == 0) {
fattrp->attrmask |=
nfs4_ntov_map[i].fbit;
*amap++ =
(uint8_t)nfs4_ntov_map[i].nval;
na++;
(ntov.attrcnt)++;
} else if ((error > 0) &&
(sargp->rdattr_error == NFS4_OK)) {
sargp->rdattr_error = puterrno4(error);
}
error = 0;
}
}
}
/*
* If rdattr_error was set after the return value for it was assigned,
* update it.
*/
if (prev_rdattr_error != sargp->rdattr_error) {
na = ntov.na;
amap = ntov.amap;
for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
k = *amap;
if (k < FATTR4_RDATTR_ERROR) {
continue;
}
if ((k == FATTR4_RDATTR_ERROR) &&
((*nfs4_ntov_map[k].sv_getit)(
NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
(void) (*nfs4_ntov_map[k].sv_getit)(
NFS4ATTR_GETIT, sargp, na);
}
break;
}
}
xdr_size = 0;
na = ntov.na;
amap = ntov.amap;
for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
}
fattrp->attrlist4_len = xdr_size;
if (xdr_size) {
/* freed by rfs4_op_getattr_free() */
fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
na = ntov.na;
amap = ntov.amap;
for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
"encode of attribute %d failed\n",
*amap);
status = NFS4ERR_SERVERFAULT;
break;
}
}
/* xdrmem_destroy(&xdrs); */ /* NO-OP */
} else {
fattrp->attrlist4 = NULL;
}
done:
nfs4_ntov_table_free(&ntov, sargp);
if (error != 0)
status = puterrno4(error);
return (status);
}
/* ARGSUSED */
static void
rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
struct nfs4_svgetit_arg sarg;
struct statvfs64 sb;
nfsstat4 status;
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
if (cs->access == CS_ACCESS_DENIED) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
sarg.sbp = &sb;
sarg.cs = cs;
status = bitmap4_to_attrmask(args->attr_request, &sarg);
if (status == NFS4_OK) {
status = bitmap4_get_sysattrs(&sarg);
if (status == NFS4_OK)
status = do_rfs4_op_getattr(args->attr_request,
&resp->obj_attributes, &sarg);
}
*cs->statusp = resp->status = status;
}
static void
rfs4_op_getattr_free(nfs_resop4 *resop)
{
GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
nfs4_fattr4_free(&resp->obj_attributes);
}
/* ARGSUSED */
static void
rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
if (cs->access == CS_ACCESS_DENIED) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
resp->object.nfs_fh4_val =
kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
nfs_fh4_copy(&cs->fh, &resp->object);
*cs->statusp = resp->status = NFS4_OK;
}
static void
rfs4_op_getfh_free(nfs_resop4 *resop)
{
GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
if (resp->status == NFS4_OK &&
resp->object.nfs_fh4_val != NULL) {
kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
resp->object.nfs_fh4_val = NULL;
resp->object.nfs_fh4_len = 0;
}
}
/*
* illegal: args: void
* res : status (NFS4ERR_OP_ILLEGAL)
*/
/* ARGSUSED */
static void
rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
struct svc_req *req, struct compound_state *cs)
{
ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
resop->resop = OP_ILLEGAL;
*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
}
/*
* link: args: SAVED_FH: file, CURRENT_FH: target directory
* res: status. If success - CURRENT_FH unchanged, return change_info
*/
/* ARGSUSED */
static void
rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
LINK4args *args = &argop->nfs_argop4_u.oplink;
LINK4res *resp = &resop->nfs_resop4_u.oplink;
int error;
vnode_t *vp;
vnode_t *dvp;
struct vattr bdva, idva, adva;
char *nm;
uint_t len;
/* SAVED_FH: source object */
vp = cs->saved_vp;
if (vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
/* CURRENT_FH: target directory */
dvp = cs->vp;
if (dvp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
/*
* If there is a non-shared filesystem mounted on this vnode,
* do not allow to link any file in this directory.
*/
if (vn_ismntpt(dvp)) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
if (cs->access == CS_ACCESS_DENIED) {
*cs->statusp = resp->status = NFS4ERR_ACCESS;
return;
}
/* Check source object's type validity */
if (vp->v_type == VDIR) {
*cs->statusp = resp->status = NFS4ERR_ISDIR;
return;
}
/* Check target directory's type */
if (dvp->v_type != VDIR) {
*cs->statusp = resp->status = NFS4ERR_NOTDIR;
return;
}
if (cs->saved_exi != cs->exi) {
*cs->statusp = resp->status = NFS4ERR_XDEV;
return;
}
if (!utf8_dir_verify(&args->newname)) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
nm = utf8_to_fn(&args->newname, &len, NULL);
if (nm == NULL) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
if (len > MAXNAMELEN) {
*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
kmem_free(nm, len);
return;
}
if (rdonly4(cs->exi, cs->vp, req)) {
*cs->statusp = resp->status = NFS4ERR_ROFS;
kmem_free(nm, len);
return;
}
/* Get "before" change value */
bdva.va_mask = AT_CTIME|AT_SEQ;
error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
kmem_free(nm, len);
return;
}
NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
error = VOP_LINK(dvp, vp, nm, cs->cr);
kmem_free(nm, len);
/*
* Get the initial "after" sequence number, if it fails, set to zero
*/
idva.va_mask = AT_SEQ;
if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
idva.va_seq = 0;
/*
* Force modified data and metadata out to stable storage.
*/
(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
(void) VOP_FSYNC(dvp, 0, cs->cr);
if (error) {
*cs->statusp = resp->status = puterrno4(error);
return;
}
/*
* Get "after" change value, if it fails, simply return the
* before value.
*/
adva.va_mask = AT_CTIME|AT_SEQ;
if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
adva.va_ctime = bdva.va_ctime;
adva.va_seq = 0;
}
NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
/*
* The cinfo.atomic = TRUE only if we have
* non-zero va_seq's, and it has incremented by exactly one
* during the VOP_LINK and it didn't change during the VOP_FSYNC.
*/
if (bdva.va_seq && idva.va_seq && adva.va_seq &&
idva.va_seq == (bdva.va_seq + 1) &&
idva.va_seq == adva.va_seq)
resp->cinfo.atomic = TRUE;
else
resp->cinfo.atomic = FALSE;
*cs->statusp = resp->status = NFS4_OK;
}
/*
* Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
*/
/* ARGSUSED */
static nfsstat4
do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
struct compound_state *cs)
{
int error;
int different_export = 0;
vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
struct exportinfo *exi = NULL, *pre_exi = NULL;
nfsstat4 stat;
fid_t fid;
int attrdir, dotdot, walk;
bool_t is_newvp = FALSE;
if (cs->vp->v_flag & V_XATTRDIR) {
attrdir = 1;
ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
} else {
attrdir = 0;
ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
}
dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
/*
* If dotdotting, then need to check whether it's
* above the root of a filesystem, or above an
* export point.
*/
if (dotdot) {
/*
* If dotdotting at the root of a filesystem, then
* need to traverse back to the mounted-on filesystem
* and do the dotdot lookup there.
*/
if (cs->vp->v_flag & VROOT) {
/*
* If at the system root, then can
* go up no further.
*/
if (VN_CMP(cs->vp, rootdir))
return (puterrno4(ENOENT));
/*
* Traverse back to the mounted-on filesystem
*/
cs->vp = untraverse(cs->vp);
/*
* Set the different_export flag so we remember
* to pick up a new exportinfo entry for
* this new filesystem.
*/
different_export = 1;
} else {
/*
* If dotdotting above an export point then set
* the different_export to get new export info.
*/
different_export = nfs_exported(cs->exi, cs->vp);
}
}
error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
if (error)
return (puterrno4(error));
/*
* If the vnode is in a pseudo filesystem, check whether it is visible.
*
* XXX if the vnode is a symlink and it is not visible in
* a pseudo filesystem, return ENOENT (not following symlink).
* V4 client can not mount such symlink. This is a regression
* from V2/V3.
*
* In the same exported filesystem, if the security flavor used
* is not an explicitly shared flavor, limit the view to the visible
* list entries only. This is not a WRONGSEC case because it's already
* checked via PUTROOTFH/PUTPUBFH or PUTFH.
*/
if (!different_export &&
(PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
cs->access & CS_ACCESS_LIMITED)) {
if (! nfs_visible(cs->exi, vp, &different_export)) {
VN_RELE(vp);
return (puterrno4(ENOENT));
}
}
/*
* If it's a mountpoint, then traverse it.
*/
if (vn_ismntpt(vp)) {
pre_exi = cs->exi; /* save pre-traversed exportinfo */
pre_tvp = vp; /* save pre-traversed vnode */
/*
* hold pre_tvp to counteract rele by traverse. We will
* need pre_tvp below if checkexport4 fails
*/
VN_HOLD(pre_tvp);
tvp = vp;
if ((error = traverse(&tvp)) != 0) {
VN_RELE(vp);
VN_RELE(pre_tvp);
return (puterrno4(error));
}
vp = tvp;
different_export = 1;
} else if (vp->v_vfsp != cs->vp->v_vfsp) {
/*
* The vfsp comparison is to handle the case where
* a LOFS mount is shared. lo_lookup traverses mount points,
* and NFS is unaware of local fs transistions because
* v_vfsmountedhere isn't set. For this special LOFS case,
* the dir and the obj returned by lookup will have different
* vfs ptrs.
*/
different_export = 1;
}
if (different_export) {
bzero(&fid, sizeof (fid));
fid.fid_len = MAXFIDSZ;
error = vop_fid_pseudo(vp, &fid);
if (error) {
VN_RELE(vp);
if (pre_tvp)
VN_RELE(pre_tvp);
return (puterrno4(error));
}
if (dotdot)
exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
else
exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
if (exi == NULL) {
if (pre_tvp) {
/*
* If this vnode is a mounted-on vnode,
* but the mounted-on file system is not
* exported, send back the filehandle for
* the mounted-on vnode, not the root of
* the mounted-on file system.
*/
VN_RELE(vp);
vp = pre_tvp;
exi = pre_exi;
} else {
VN_RELE(vp);
return (puterrno4(EACCES));
}
} else if (pre_tvp) {
/* we're done with pre_tvp now. release extra hold */
VN_RELE(pre_tvp);
}
cs->exi = exi;
/*
* Now we do a checkauth4. The reason is that
* this client/user may not have access to the new
* exported file system, and if he does,
* the client/user may be mapped to a different uid.
*
* We start with a new cr, because the checkauth4 done
* in the PUT*FH operation over wrote the cred's uid,
* gid, etc, and we want the real thing before calling
* checkauth4()
*/
crfree(cs->cr);
cs->cr = crdup(cs->basecr);
if (cs->vp)
oldvp = cs->vp;
cs->vp = vp;
is_newvp = TRUE;
stat = call_checkauth4(cs, req);
if (stat != NFS4_OK) {
VN_RELE(cs->vp);
cs->vp = oldvp;
return (stat);
}
}
/*
* After various NFS checks, do a label check on the path
* component. The label on this path should either be the
* global zone's label or a zone's label. We are only
* interested in the zone's label because exported files
* in global zone is accessible (though read-only) to
* clients. The exportability/visibility check is already
* done before reaching this code.
*/
if (is_system_labeled()) {
bslabel_t *clabel;
ASSERT(req->rq_label != NULL);
clabel = req->rq_label;
DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
"got client label from request(1)", struct svc_req *, req);
if (!blequal(&l_admin_low->tsl_label, clabel)) {
if (!do_rfs4_label_check(clabel, vp, DOMINANCE_CHECK)) {
error = EACCES;
goto err_out;
}
} else {
/*
* We grant access to admin_low label clients
* only if the client is trusted, i.e. also
* running Solaris Trusted Extension.
*/
struct sockaddr *ca;
int addr_type;
void *ipaddr;
tsol_tpc_t *tp;
ca = (struct sockaddr *)svc_getrpccaller(
req->rq_xprt)->buf;
if (ca->sa_family == AF_INET) {
addr_type = IPV4_VERSION;
ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
} else if (ca->sa_family == AF_INET6) {
addr_type = IPV6_VERSION;
ipaddr = &((struct sockaddr_in6 *)
ca)->sin6_addr;
}
tp = find_tpc(ipaddr, addr_type, B_FALSE);
if (tp == NULL || tp->tpc_tp.tp_doi !=
l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
SUN_CIPSO) {
error = EACCES;
goto err_out;
}
}
}
error = makefh4(&cs->fh, vp, cs->exi);
err_out:
if (error) {
if (is_newvp) {
VN_RELE(cs->vp);
cs->vp = oldvp;
} else
VN_RELE(vp);
return (puterrno4(error));
}
if (!is_newvp) {
if (cs->vp)
VN_RELE(cs->vp);
cs->vp = vp;
} else if (oldvp)
VN_RELE(oldvp);
/*
* if did lookup on attrdir and didn't lookup .., set named
* attr fh flag
*/
if (attrdir && ! dotdot)
set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
/* Assume false for now, open proc will set this */
cs->mandlock = FALSE;
return (NFS4_OK);
}
/* ARGSUSED */
static void
rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
char *nm;
uint_t len;
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
if (cs->vp->v_type == VLNK) {
*cs->statusp = resp->status = NFS4ERR_SYMLINK;
return;
}
if (cs->vp->v_type != VDIR) {
*cs->statusp = resp->status = NFS4ERR_NOTDIR;
return;
}
if (!utf8_dir_verify(&args->objname)) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
nm = utf8_to_str(&args->objname, &len, NULL);
if (nm == NULL) {
*cs->statusp = resp->status = NFS4ERR_INVAL;
return;
}
if (len > MAXNAMELEN) {
*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
kmem_free(nm, len);
return;
}
*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
kmem_free(nm, len);
}
/* ARGSUSED */
static void
rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
if (cs->vp->v_type != VDIR) {
*cs->statusp = resp->status = NFS4ERR_NOTDIR;
return;
}
*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
/*
* From NFSV4 Specification, LOOKUPP should not check for
* NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
*/
if (resp->status == NFS4ERR_WRONGSEC) {
*cs->statusp = resp->status = NFS4_OK;
}
}
/*ARGSUSED2*/
static void
rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct compound_state *cs)
{
OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
vnode_t *avp = NULL;
int lookup_flags = LOOKUP_XATTR, error;
int exp_ro = 0;
if (cs->vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
return;
}
/*
* Make a couple of checks made by copen()
*
* Check to make sure underlying fs supports xattrs. This
* is required because solaris filesystem implementations
* (UFS/TMPFS) don't enforce the noxattr mount option
* in VOP_LOOKUP(LOOKUP_XATTR). If fs doesn't support this
* pathconf cmd or if fs supports cmd but doesn't claim
* support for xattr, return NOTSUPP. It would be better
* to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
* that cmd is not available to VOP_PATHCONF interface
* (it's only implemented inside pathconf syscall)...
*
* Verify permission to put attributes on files (access
* checks from copen).
*/
if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
error = ENOTSUP;
goto error_out;
}
if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&<