| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| |
| /* |
| * Copyright (c) 2016 STRATO AG. All rights reserved. |
| */ |
| |
| /* |
| * Copyright 2015 Nexenta Systems, Inc. All rights reserved. |
| */ |
| |
| /* |
| * Copyright 2010 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| /* |
| * Copyright 1983,1984,1985,1986,1987,1988,1989 AT&T. |
| * All Rights Reserved |
| */ |
| |
| /* |
| * Copyright (c) 2013, Joyent, Inc. All rights reserved. |
| * Copyright 2022 Oxide Computer Company |
| */ |
| |
| #include <sys/param.h> |
| #include <sys/types.h> |
| #include <sys/systm.h> |
| #include <sys/cred.h> |
| #include <sys/time.h> |
| #include <sys/vnode.h> |
| #include <sys/vfs.h> |
| #include <sys/vfs_opreg.h> |
| #include <sys/file.h> |
| #include <sys/filio.h> |
| #include <sys/uio.h> |
| #include <sys/buf.h> |
| #include <sys/mman.h> |
| #include <sys/pathname.h> |
| #include <sys/dirent.h> |
| #include <sys/debug.h> |
| #include <sys/vmsystm.h> |
| #include <sys/fcntl.h> |
| #include <sys/flock.h> |
| #include <sys/swap.h> |
| #include <sys/errno.h> |
| #include <sys/strsubr.h> |
| #include <sys/sysmacros.h> |
| #include <sys/kmem.h> |
| #include <sys/cmn_err.h> |
| #include <sys/pathconf.h> |
| #include <sys/utsname.h> |
| #include <sys/dnlc.h> |
| #include <sys/acl.h> |
| #include <sys/systeminfo.h> |
| #include <sys/policy.h> |
| #include <sys/sdt.h> |
| #include <sys/list.h> |
| #include <sys/stat.h> |
| #include <sys/zone.h> |
| |
| #include <rpc/types.h> |
| #include <rpc/auth.h> |
| #include <rpc/clnt.h> |
| |
| #include <nfs/nfs.h> |
| #include <nfs/nfs_clnt.h> |
| #include <nfs/nfs_acl.h> |
| #include <nfs/lm.h> |
| #include <nfs/nfs4.h> |
| #include <nfs/nfs4_kprot.h> |
| #include <nfs/rnode4.h> |
| #include <nfs/nfs4_clnt.h> |
| |
| #include <vm/hat.h> |
| #include <vm/as.h> |
| #include <vm/page.h> |
| #include <vm/pvn.h> |
| #include <vm/seg.h> |
| #include <vm/seg_map.h> |
| #include <vm/seg_kpm.h> |
| #include <vm/seg_vn.h> |
| |
| #include <fs/fs_subr.h> |
| |
| #include <sys/ddi.h> |
| #include <sys/int_fmtio.h> |
| #include <sys/fs/autofs.h> |
| |
| typedef struct { |
| nfs4_ga_res_t *di_garp; |
| cred_t *di_cred; |
| hrtime_t di_time_call; |
| } dirattr_info_t; |
| |
| typedef enum nfs4_acl_op { |
| NFS4_ACL_GET, |
| NFS4_ACL_SET |
| } nfs4_acl_op_t; |
| |
| static struct lm_sysid *nfs4_find_sysid(mntinfo4_t *); |
| |
| static void nfs4_update_dircaches(change_info4 *, vnode_t *, vnode_t *, |
| char *, dirattr_info_t *); |
| |
| static void nfs4close_otw(rnode4_t *, cred_t *, nfs4_open_owner_t *, |
| nfs4_open_stream_t *, int *, int *, nfs4_close_type_t, |
| nfs4_error_t *, int *); |
| static int nfs4_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int, |
| cred_t *); |
| static int nfs4write(vnode_t *, caddr_t, u_offset_t, int, cred_t *, |
| stable_how4 *); |
| static int nfs4read(vnode_t *, caddr_t, offset_t, int, size_t *, |
| cred_t *, bool_t, struct uio *); |
| static int nfs4setattr(vnode_t *, struct vattr *, int, cred_t *, |
| vsecattr_t *); |
| static int nfs4openattr(vnode_t *, vnode_t **, int, cred_t *); |
| static int nfs4lookup(vnode_t *, char *, vnode_t **, cred_t *, int); |
| static int nfs4lookup_xattr(vnode_t *, char *, vnode_t **, int, cred_t *); |
| static int nfs4lookupvalidate_otw(vnode_t *, char *, vnode_t **, cred_t *); |
| static int nfs4lookupnew_otw(vnode_t *, char *, vnode_t **, cred_t *); |
| static int nfs4mknod(vnode_t *, char *, struct vattr *, enum vcexcl, |
| int, vnode_t **, cred_t *); |
| static int nfs4open_otw(vnode_t *, char *, struct vattr *, vnode_t **, |
| cred_t *, int, int, enum createmode4, int); |
| static int nfs4rename(vnode_t *, char *, vnode_t *, char *, cred_t *, |
| caller_context_t *); |
| static int nfs4rename_persistent_fh(vnode_t *, char *, vnode_t *, |
| vnode_t *, char *, cred_t *, nfsstat4 *); |
| static int nfs4rename_volatile_fh(vnode_t *, char *, vnode_t *, |
| vnode_t *, char *, cred_t *, nfsstat4 *); |
| static int do_nfs4readdir(vnode_t *, rddir4_cache *, cred_t *); |
| static void nfs4readdir(vnode_t *, rddir4_cache *, cred_t *); |
| static int nfs4_bio(struct buf *, stable_how4 *, cred_t *, bool_t); |
| static int nfs4_getapage(vnode_t *, u_offset_t, size_t, uint_t *, |
| page_t *[], size_t, struct seg *, caddr_t, |
| enum seg_rw, cred_t *); |
| static void nfs4_readahead(vnode_t *, u_offset_t, caddr_t, struct seg *, |
| cred_t *); |
| static int nfs4_sync_putapage(vnode_t *, page_t *, u_offset_t, size_t, |
| int, cred_t *); |
| static int nfs4_sync_pageio(vnode_t *, page_t *, u_offset_t, size_t, |
| int, cred_t *); |
| static int nfs4_commit(vnode_t *, offset4, count4, cred_t *); |
| static void nfs4_set_mod(vnode_t *); |
| static void nfs4_get_commit(vnode_t *); |
| static void nfs4_get_commit_range(vnode_t *, u_offset_t, size_t); |
| static int nfs4_putpage_commit(vnode_t *, offset_t, size_t, cred_t *); |
| static int nfs4_commit_vp(vnode_t *, u_offset_t, size_t, cred_t *, int); |
| static int nfs4_sync_commit(vnode_t *, page_t *, offset3, count3, |
| cred_t *); |
| static void do_nfs4_async_commit(vnode_t *, page_t *, offset3, count3, |
| cred_t *); |
| static int nfs4_update_attrcache(nfsstat4, nfs4_ga_res_t *, |
| hrtime_t, vnode_t *, cred_t *); |
| static int nfs4_open_non_reg_file(vnode_t **, int, cred_t *); |
| static int nfs4_safelock(vnode_t *, const struct flock64 *, cred_t *); |
| static void nfs4_register_lock_locally(vnode_t *, struct flock64 *, int, |
| u_offset_t); |
| static int nfs4_lockrelease(vnode_t *, int, offset_t, cred_t *); |
| static int nfs4_block_and_wait(clock_t *, rnode4_t *); |
| static cred_t *state_to_cred(nfs4_open_stream_t *); |
| static void denied_to_flk(LOCK4denied *, flock64_t *, LOCKT4args *); |
| static pid_t lo_to_pid(lock_owner4 *); |
| static void nfs4_reinstitute_local_lock_state(vnode_t *, flock64_t *, |
| cred_t *, nfs4_lock_owner_t *); |
| static void push_reinstate(vnode_t *, int, flock64_t *, cred_t *, |
| nfs4_lock_owner_t *); |
| static int open_and_get_osp(vnode_t *, cred_t *, nfs4_open_stream_t **); |
| static void nfs4_delmap_callback(struct as *, void *, uint_t); |
| static void nfs4_free_delmapcall(nfs4_delmapcall_t *); |
| static nfs4_delmapcall_t *nfs4_init_delmapcall(); |
| static int nfs4_find_and_delete_delmapcall(rnode4_t *, int *); |
| static int nfs4_is_acl_mask_valid(uint_t, nfs4_acl_op_t); |
| static int nfs4_create_getsecattr_return(vsecattr_t *, vsecattr_t *, |
| uid_t, gid_t, int); |
| |
| /* |
| * Routines that implement the setting of v4 args for the misc. ops |
| */ |
| static void nfs4args_lock_free(nfs_argop4 *); |
| static void nfs4args_lockt_free(nfs_argop4 *); |
| static void nfs4args_setattr(nfs_argop4 *, vattr_t *, vsecattr_t *, |
| int, rnode4_t *, cred_t *, bitmap4, int *, |
| nfs4_stateid_types_t *); |
| static void nfs4args_setattr_free(nfs_argop4 *); |
| static int nfs4args_verify(nfs_argop4 *, vattr_t *, enum nfs_opnum4, |
| bitmap4); |
| static void nfs4args_verify_free(nfs_argop4 *); |
| static void nfs4args_write(nfs_argop4 *, stable_how4, rnode4_t *, cred_t *, |
| WRITE4args **, nfs4_stateid_types_t *); |
| |
| /* |
| * These are the vnode ops functions that implement the vnode interface to |
| * the networked file system. See more comments below at nfs4_vnodeops. |
| */ |
| static int nfs4_open(vnode_t **, int, cred_t *, caller_context_t *); |
| static int nfs4_close(vnode_t *, int, int, offset_t, cred_t *, |
| caller_context_t *); |
| static int nfs4_read(vnode_t *, struct uio *, int, cred_t *, |
| caller_context_t *); |
| static int nfs4_write(vnode_t *, struct uio *, int, cred_t *, |
| caller_context_t *); |
| static int nfs4_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *, |
| caller_context_t *); |
| static int nfs4_setattr(vnode_t *, struct vattr *, int, cred_t *, |
| caller_context_t *); |
| static int nfs4_access(vnode_t *, int, int, cred_t *, caller_context_t *); |
| static int nfs4_readlink(vnode_t *, struct uio *, cred_t *, |
| caller_context_t *); |
| static int nfs4_fsync(vnode_t *, int, cred_t *, caller_context_t *); |
| static int nfs4_create(vnode_t *, char *, struct vattr *, enum vcexcl, |
| int, vnode_t **, cred_t *, int, caller_context_t *, |
| vsecattr_t *); |
| static int nfs4_remove(vnode_t *, char *, cred_t *, caller_context_t *, |
| int); |
| static int nfs4_link(vnode_t *, vnode_t *, char *, cred_t *, |
| caller_context_t *, int); |
| static int nfs4_rename(vnode_t *, char *, vnode_t *, char *, cred_t *, |
| caller_context_t *, int); |
| static int nfs4_mkdir(vnode_t *, char *, struct vattr *, vnode_t **, |
| cred_t *, caller_context_t *, int, vsecattr_t *); |
| static int nfs4_rmdir(vnode_t *, char *, vnode_t *, cred_t *, |
| caller_context_t *, int); |
| static int nfs4_symlink(vnode_t *, char *, struct vattr *, char *, |
| cred_t *, caller_context_t *, int); |
| static int nfs4_readdir(vnode_t *, struct uio *, cred_t *, int *, |
| caller_context_t *, int); |
| static int nfs4_seek(vnode_t *, offset_t, offset_t *, caller_context_t *); |
| static int nfs4_getpage(vnode_t *, offset_t, size_t, uint_t *, |
| page_t *[], size_t, struct seg *, caddr_t, |
| enum seg_rw, cred_t *, caller_context_t *); |
| static int nfs4_putpage(vnode_t *, offset_t, size_t, int, cred_t *, |
| caller_context_t *); |
| static int nfs4_map(vnode_t *, offset_t, struct as *, caddr_t *, size_t, |
| uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *); |
| static int nfs4_addmap(vnode_t *, offset_t, struct as *, caddr_t, size_t, |
| uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *); |
| static int nfs4_cmp(vnode_t *, vnode_t *, caller_context_t *); |
| static int nfs4_frlock(vnode_t *, int, struct flock64 *, int, offset_t, |
| struct flk_callback *, cred_t *, caller_context_t *); |
| static int nfs4_space(vnode_t *, int, struct flock64 *, int, offset_t, |
| cred_t *, caller_context_t *); |
| static int nfs4_delmap(vnode_t *, offset_t, struct as *, caddr_t, size_t, |
| uint_t, uint_t, uint_t, cred_t *, caller_context_t *); |
| static int nfs4_pageio(vnode_t *, page_t *, u_offset_t, size_t, int, |
| cred_t *, caller_context_t *); |
| static void nfs4_dispose(vnode_t *, page_t *, int, int, cred_t *, |
| caller_context_t *); |
| static int nfs4_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *, |
| caller_context_t *); |
| /* |
| * These vnode ops are required to be called from outside this source file, |
| * e.g. by ephemeral mount stub vnode ops, and so may not be declared |
| * as static. |
| */ |
| int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, |
| caller_context_t *); |
| void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); |
| int nfs4_lookup(vnode_t *, char *, vnode_t **, |
| struct pathname *, int, vnode_t *, cred_t *, |
| caller_context_t *, int *, pathname_t *); |
| int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); |
| int nfs4_rwlock(vnode_t *, int, caller_context_t *); |
| void nfs4_rwunlock(vnode_t *, int, caller_context_t *); |
| int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); |
| int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, |
| caller_context_t *); |
| int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, |
| caller_context_t *); |
| int nfs4_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *, |
| caller_context_t *); |
| |
| /* |
| * Used for nfs4_commit_vp() to indicate if we should |
| * wait on pending writes. |
| */ |
| #define NFS4_WRITE_NOWAIT 0 |
| #define NFS4_WRITE_WAIT 1 |
| |
| #define NFS4_BASE_WAIT_TIME 1 /* 1 second */ |
| |
| /* |
| * Error flags used to pass information about certain special errors |
| * which need to be handled specially. |
| */ |
| #define NFS_EOF -98 |
| #define NFS_VERF_MISMATCH -97 |
| |
| /* |
| * Flags used to differentiate between which operation drove the |
| * potential CLOSE OTW. (see nfs4_close_otw_if_necessary) |
| */ |
| #define NFS4_CLOSE_OP 0x1 |
| #define NFS4_DELMAP_OP 0x2 |
| #define NFS4_INACTIVE_OP 0x3 |
| |
| #define ISVDEV(t) ((t == VBLK) || (t == VCHR) || (t == VFIFO)) |
| |
| /* ALIGN64 aligns the given buffer and adjust buffer size to 64 bit */ |
| #define ALIGN64(x, ptr, sz) \ |
| x = ((uintptr_t)(ptr)) & (sizeof (uint64_t) - 1); \ |
| if (x) { \ |
| x = sizeof (uint64_t) - (x); \ |
| sz -= (x); \ |
| ptr += (x); \ |
| } |
| |
| #ifdef DEBUG |
| int nfs4_client_attr_debug = 0; |
| int nfs4_client_state_debug = 0; |
| int nfs4_client_shadow_debug = 0; |
| int nfs4_client_lock_debug = 0; |
| int nfs4_seqid_sync = 0; |
| int nfs4_client_map_debug = 0; |
| static int nfs4_pageio_debug = 0; |
| int nfs4_client_inactive_debug = 0; |
| int nfs4_client_recov_debug = 0; |
| int nfs4_client_failover_debug = 0; |
| int nfs4_client_call_debug = 0; |
| int nfs4_client_lookup_debug = 0; |
| int nfs4_client_zone_debug = 0; |
| int nfs4_lost_rqst_debug = 0; |
| int nfs4_rdattrerr_debug = 0; |
| int nfs4_open_stream_debug = 0; |
| |
| int nfs4read_error_inject; |
| |
| static int nfs4_create_misses = 0; |
| |
| static int nfs4_readdir_cache_shorts = 0; |
| static int nfs4_readdir_readahead = 0; |
| |
| static int nfs4_bio_do_stop = 0; |
| |
| static int nfs4_lostpage = 0; /* number of times we lost original page */ |
| |
| int nfs4_mmap_debug = 0; |
| |
| static int nfs4_pathconf_cache_hits = 0; |
| static int nfs4_pathconf_cache_misses = 0; |
| |
| int nfs4close_all_cnt; |
| int nfs4close_one_debug = 0; |
| int nfs4close_notw_debug = 0; |
| |
| int denied_to_flk_debug = 0; |
| void *lockt_denied_debug; |
| |
| #endif |
| |
| /* |
| * How long to wait before trying again if OPEN_CONFIRM gets ETIMEDOUT |
| * or NFS4ERR_RESOURCE. |
| */ |
| static int confirm_retry_sec = 30; |
| |
| static int nfs4_lookup_neg_cache = 1; |
| |
| /* |
| * number of pages to read ahead |
| * optimized for 100 base-T. |
| */ |
| static int nfs4_nra = 4; |
| |
| static int nfs4_do_symlink_cache = 1; |
| |
| static int nfs4_pathconf_disable_cache = 0; |
| |
| /* |
| * These are the vnode ops routines which implement the vnode interface to |
| * the networked file system. These routines just take their parameters, |
| * make them look networkish by putting the right info into interface structs, |
| * and then calling the appropriate remote routine(s) to do the work. |
| * |
| * Note on directory name lookup cacheing: If we detect a stale fhandle, |
| * we purge the directory cache relative to that vnode. This way, the |
| * user won't get burned by the cache repeatedly. See <nfs/rnode4.h> for |
| * more details on rnode locking. |
| */ |
| |
| struct vnodeops *nfs4_vnodeops; |
| |
| const fs_operation_def_t nfs4_vnodeops_template[] = { |
| VOPNAME_OPEN, { .vop_open = nfs4_open }, |
| VOPNAME_CLOSE, { .vop_close = nfs4_close }, |
| VOPNAME_READ, { .vop_read = nfs4_read }, |
| VOPNAME_WRITE, { .vop_write = nfs4_write }, |
| VOPNAME_IOCTL, { .vop_ioctl = nfs4_ioctl }, |
| VOPNAME_GETATTR, { .vop_getattr = nfs4_getattr }, |
| VOPNAME_SETATTR, { .vop_setattr = nfs4_setattr }, |
| VOPNAME_ACCESS, { .vop_access = nfs4_access }, |
| VOPNAME_LOOKUP, { .vop_lookup = nfs4_lookup }, |
| VOPNAME_CREATE, { .vop_create = nfs4_create }, |
| VOPNAME_REMOVE, { .vop_remove = nfs4_remove }, |
| VOPNAME_LINK, { .vop_link = nfs4_link }, |
| VOPNAME_RENAME, { .vop_rename = nfs4_rename }, |
| VOPNAME_MKDIR, { .vop_mkdir = nfs4_mkdir }, |
| VOPNAME_RMDIR, { .vop_rmdir = nfs4_rmdir }, |
| VOPNAME_READDIR, { .vop_readdir = nfs4_readdir }, |
| VOPNAME_SYMLINK, { .vop_symlink = nfs4_symlink }, |
| VOPNAME_READLINK, { .vop_readlink = nfs4_readlink }, |
| VOPNAME_FSYNC, { .vop_fsync = nfs4_fsync }, |
| VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, |
| VOPNAME_FID, { .vop_fid = nfs4_fid }, |
| VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, |
| VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, |
| VOPNAME_SEEK, { .vop_seek = nfs4_seek }, |
| VOPNAME_FRLOCK, { .vop_frlock = nfs4_frlock }, |
| VOPNAME_SPACE, { .vop_space = nfs4_space }, |
| VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, |
| VOPNAME_GETPAGE, { .vop_getpage = nfs4_getpage }, |
| VOPNAME_PUTPAGE, { .vop_putpage = nfs4_putpage }, |
| VOPNAME_MAP, { .vop_map = nfs4_map }, |
| VOPNAME_ADDMAP, { .vop_addmap = nfs4_addmap }, |
| VOPNAME_DELMAP, { .vop_delmap = nfs4_delmap }, |
| /* no separate nfs4_dump */ |
| VOPNAME_DUMP, { .vop_dump = nfs_dump }, |
| VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, |
| VOPNAME_PAGEIO, { .vop_pageio = nfs4_pageio }, |
| VOPNAME_DISPOSE, { .vop_dispose = nfs4_dispose }, |
| VOPNAME_SETSECATTR, { .vop_setsecattr = nfs4_setsecattr }, |
| VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, |
| VOPNAME_SHRLOCK, { .vop_shrlock = nfs4_shrlock }, |
| VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, |
| NULL, NULL |
| }; |
| |
| /* |
| * The following are subroutines and definitions to set args or get res |
| * for the different nfsv4 ops |
| */ |
| |
| void |
| nfs4args_lookup_free(nfs_argop4 *argop, int arglen) |
| { |
| int i; |
| |
| for (i = 0; i < arglen; i++) { |
| if (argop[i].argop == OP_LOOKUP) { |
| kmem_free( |
| argop[i].nfs_argop4_u.oplookup. |
| objname.utf8string_val, |
| argop[i].nfs_argop4_u.oplookup. |
| objname.utf8string_len); |
| } |
| } |
| } |
| |
| static void |
| nfs4args_lock_free(nfs_argop4 *argop) |
| { |
| locker4 *locker = &argop->nfs_argop4_u.oplock.locker; |
| |
| if (locker->new_lock_owner == TRUE) { |
| open_to_lock_owner4 *open_owner; |
| |
| open_owner = &locker->locker4_u.open_owner; |
| if (open_owner->lock_owner.owner_val != NULL) { |
| kmem_free(open_owner->lock_owner.owner_val, |
| open_owner->lock_owner.owner_len); |
| } |
| } |
| } |
| |
| static void |
| nfs4args_lockt_free(nfs_argop4 *argop) |
| { |
| lock_owner4 *lowner = &argop->nfs_argop4_u.oplockt.owner; |
| |
| if (lowner->owner_val != NULL) { |
| kmem_free(lowner->owner_val, lowner->owner_len); |
| } |
| } |
| |
| static void |
| nfs4args_setattr(nfs_argop4 *argop, vattr_t *vap, vsecattr_t *vsap, int flags, |
| rnode4_t *rp, cred_t *cr, bitmap4 supp, int *error, |
| nfs4_stateid_types_t *sid_types) |
| { |
| fattr4 *attr = &argop->nfs_argop4_u.opsetattr.obj_attributes; |
| mntinfo4_t *mi; |
| |
| argop->argop = OP_SETATTR; |
| /* |
| * The stateid is set to 0 if client is not modifying the size |
| * and otherwise to whatever nfs4_get_stateid() returns. |
| * |
| * XXX Note: nfs4_get_stateid() returns 0 if no lockowner and/or no |
| * state struct could be found for the process/file pair. We may |
| * want to change this in the future (by OPENing the file). See |
| * bug # 4474852. |
| */ |
| if (vap->va_mask & AT_SIZE) { |
| |
| ASSERT(rp != NULL); |
| mi = VTOMI4(RTOV4(rp)); |
| |
| argop->nfs_argop4_u.opsetattr.stateid = |
| nfs4_get_stateid(cr, rp, curproc->p_pidp->pid_id, mi, |
| OP_SETATTR, sid_types, FALSE); |
| } else { |
| bzero(&argop->nfs_argop4_u.opsetattr.stateid, |
| sizeof (stateid4)); |
| } |
| |
| *error = vattr_to_fattr4(vap, vsap, attr, flags, OP_SETATTR, supp); |
| if (*error) |
| bzero(attr, sizeof (*attr)); |
| } |
| |
| static void |
| nfs4args_setattr_free(nfs_argop4 *argop) |
| { |
| nfs4_fattr4_free(&argop->nfs_argop4_u.opsetattr.obj_attributes); |
| } |
| |
| static int |
| nfs4args_verify(nfs_argop4 *argop, vattr_t *vap, enum nfs_opnum4 op, |
| bitmap4 supp) |
| { |
| fattr4 *attr; |
| int error = 0; |
| |
| argop->argop = op; |
| switch (op) { |
| case OP_VERIFY: |
| attr = &argop->nfs_argop4_u.opverify.obj_attributes; |
| break; |
| case OP_NVERIFY: |
| attr = &argop->nfs_argop4_u.opnverify.obj_attributes; |
| break; |
| default: |
| return (EINVAL); |
| } |
| if (!error) |
| error = vattr_to_fattr4(vap, NULL, attr, 0, op, supp); |
| if (error) |
| bzero(attr, sizeof (*attr)); |
| return (error); |
| } |
| |
| static void |
| nfs4args_verify_free(nfs_argop4 *argop) |
| { |
| switch (argop->argop) { |
| case OP_VERIFY: |
| nfs4_fattr4_free(&argop->nfs_argop4_u.opverify.obj_attributes); |
| break; |
| case OP_NVERIFY: |
| nfs4_fattr4_free(&argop->nfs_argop4_u.opnverify.obj_attributes); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| static void |
| nfs4args_write(nfs_argop4 *argop, stable_how4 stable, rnode4_t *rp, cred_t *cr, |
| WRITE4args **wargs_pp, nfs4_stateid_types_t *sid_tp) |
| { |
| WRITE4args *wargs = &argop->nfs_argop4_u.opwrite; |
| mntinfo4_t *mi = VTOMI4(RTOV4(rp)); |
| |
| argop->argop = OP_WRITE; |
| wargs->stable = stable; |
| wargs->stateid = nfs4_get_w_stateid(cr, rp, curproc->p_pidp->pid_id, |
| mi, OP_WRITE, sid_tp); |
| wargs->mblk = NULL; |
| *wargs_pp = wargs; |
| } |
| |
| void |
| nfs4args_copen_free(OPEN4cargs *open_args) |
| { |
| if (open_args->owner.owner_val) { |
| kmem_free(open_args->owner.owner_val, |
| open_args->owner.owner_len); |
| } |
| if ((open_args->opentype == OPEN4_CREATE) && |
| (open_args->mode != EXCLUSIVE4)) { |
| nfs4_fattr4_free(&open_args->createhow4_u.createattrs); |
| } |
| } |
| |
| /* |
| * XXX: This is referenced in modstubs.s |
| */ |
| struct vnodeops * |
| nfs4_getvnodeops(void) |
| { |
| return (nfs4_vnodeops); |
| } |
| |
| /* |
| * The OPEN operation opens a regular file. |
| */ |
| /*ARGSUSED3*/ |
| static int |
| nfs4_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) |
| { |
| vnode_t *dvp = NULL; |
| rnode4_t *rp, *drp; |
| int error; |
| int just_been_created; |
| char fn[MAXNAMELEN]; |
| |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "nfs4_open: ")); |
| if (nfs_zone() != VTOMI4(*vpp)->mi_zone) |
| return (EIO); |
| rp = VTOR4(*vpp); |
| |
| /* |
| * Check to see if opening something besides a regular file; |
| * if so skip the OTW call |
| */ |
| if ((*vpp)->v_type != VREG) { |
| error = nfs4_open_non_reg_file(vpp, flag, cr); |
| return (error); |
| } |
| |
| /* |
| * XXX - would like a check right here to know if the file is |
| * executable or not, so as to skip OTW |
| */ |
| |
| if ((error = vtodv(*vpp, &dvp, cr, TRUE)) != 0) |
| return (error); |
| |
| drp = VTOR4(dvp); |
| if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) |
| return (EINTR); |
| |
| if ((error = vtoname(*vpp, fn, MAXNAMELEN)) != 0) { |
| nfs_rw_exit(&drp->r_rwlock); |
| return (error); |
| } |
| |
| /* |
| * See if this file has just been CREATEd. |
| * If so, clear the flag and update the dnlc, which was previously |
| * skipped in nfs4_create. |
| * XXX need better serilization on this. |
| * XXX move this into the nf4open_otw call, after we have |
| * XXX acquired the open owner seqid sync. |
| */ |
| mutex_enter(&rp->r_statev4_lock); |
| if (rp->created_v4) { |
| rp->created_v4 = 0; |
| mutex_exit(&rp->r_statev4_lock); |
| |
| dnlc_update(dvp, fn, *vpp); |
| /* This is needed so we don't bump the open ref count */ |
| just_been_created = 1; |
| } else { |
| mutex_exit(&rp->r_statev4_lock); |
| just_been_created = 0; |
| } |
| |
| /* |
| * If caller specified O_TRUNC/FTRUNC, then be sure to set |
| * FWRITE (to drive successful setattr(size=0) after open) |
| */ |
| if (flag & FTRUNC) |
| flag |= FWRITE; |
| |
| error = nfs4open_otw(dvp, fn, NULL, vpp, cr, 0, flag, 0, |
| just_been_created); |
| |
| if (!error && !((*vpp)->v_flag & VROOT)) |
| dnlc_update(dvp, fn, *vpp); |
| |
| nfs_rw_exit(&drp->r_rwlock); |
| |
| /* release the hold from vtodv */ |
| VN_RELE(dvp); |
| |
| /* exchange the shadow for the master vnode, if needed */ |
| |
| if (error == 0 && IS_SHADOW(*vpp, rp)) |
| sv_exchange(vpp); |
| |
| return (error); |
| } |
| |
| /* |
| * See if there's a "lost open" request to be saved and recovered. |
| */ |
| static void |
| nfs4open_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, |
| nfs4_open_owner_t *oop, cred_t *cr, vnode_t *vp, |
| vnode_t *dvp, OPEN4cargs *open_args) |
| { |
| vfs_t *vfsp; |
| char *srccfp; |
| |
| vfsp = (dvp ? dvp->v_vfsp : vp->v_vfsp); |
| |
| if (error != ETIMEDOUT && error != EINTR && |
| !NFS4_FRC_UNMT_ERR(error, vfsp)) { |
| lost_rqstp->lr_op = 0; |
| return; |
| } |
| |
| NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, |
| "nfs4open_save_lost_rqst: error %d", error)); |
| |
| lost_rqstp->lr_op = OP_OPEN; |
| |
| /* |
| * The vp (if it is not NULL) and dvp are held and rele'd via |
| * the recovery code. See nfs4_save_lost_rqst. |
| */ |
| lost_rqstp->lr_vp = vp; |
| lost_rqstp->lr_dvp = dvp; |
| lost_rqstp->lr_oop = oop; |
| lost_rqstp->lr_osp = NULL; |
| lost_rqstp->lr_lop = NULL; |
| lost_rqstp->lr_cr = cr; |
| lost_rqstp->lr_flk = NULL; |
| lost_rqstp->lr_oacc = open_args->share_access; |
| lost_rqstp->lr_odeny = open_args->share_deny; |
| lost_rqstp->lr_oclaim = open_args->claim; |
| if (open_args->claim == CLAIM_DELEGATE_CUR) { |
| lost_rqstp->lr_ostateid = |
| open_args->open_claim4_u.delegate_cur_info.delegate_stateid; |
| srccfp = open_args->open_claim4_u.delegate_cur_info.cfile; |
| } else { |
| srccfp = open_args->open_claim4_u.cfile; |
| } |
| lost_rqstp->lr_ofile.utf8string_len = 0; |
| lost_rqstp->lr_ofile.utf8string_val = NULL; |
| (void) str_to_utf8(srccfp, &lost_rqstp->lr_ofile); |
| lost_rqstp->lr_putfirst = FALSE; |
| } |
| |
| struct nfs4_excl_time { |
| uint32 seconds; |
| uint32 nseconds; |
| }; |
| |
| /* |
| * The OPEN operation creates and/or opens a regular file |
| * |
| * ARGSUSED |
| */ |
| static int |
| nfs4open_otw(vnode_t *dvp, char *file_name, struct vattr *in_va, |
| vnode_t **vpp, cred_t *cr, int create_flag, int open_flag, |
| enum createmode4 createmode, int file_just_been_created) |
| { |
| rnode4_t *rp; |
| rnode4_t *drp = VTOR4(dvp); |
| vnode_t *vp = NULL; |
| vnode_t *vpi = *vpp; |
| bool_t needrecov = FALSE; |
| |
| int doqueue = 1; |
| |
| COMPOUND4args_clnt args; |
| COMPOUND4res_clnt res; |
| nfs_argop4 *argop; |
| nfs_resop4 *resop; |
| int argoplist_size; |
| int idx_open, idx_fattr; |
| |
| GETFH4res *gf_res = NULL; |
| OPEN4res *op_res = NULL; |
| nfs4_ga_res_t *garp; |
| fattr4 *attr = NULL; |
| struct nfs4_excl_time verf; |
| bool_t did_excl_setup = FALSE; |
| int created_osp; |
| |
| OPEN4cargs *open_args; |
| nfs4_open_owner_t *oop = NULL; |
| nfs4_open_stream_t *osp = NULL; |
| seqid4 seqid = 0; |
| bool_t retry_open = FALSE; |
| nfs4_recov_state_t recov_state; |
| nfs4_lost_rqst_t lost_rqst; |
| nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; |
| hrtime_t t; |
| int acc = 0; |
| cred_t *cred_otw = NULL; /* cred used to do the RPC call */ |
| cred_t *ncr = NULL; |
| |
| nfs4_sharedfh_t *otw_sfh; |
| nfs4_sharedfh_t *orig_sfh; |
| int fh_differs = 0; |
| int numops, setgid_flag; |
| int num_bseqid_retry = NFS4_NUM_RETRY_BAD_SEQID + 1; |
| |
| /* |
| * Make sure we properly deal with setting the right gid on |
| * a newly created file to reflect the parent's setgid bit |
| */ |
| setgid_flag = 0; |
| if (create_flag && in_va) { |
| |
| /* |
| * If there is grpid mount flag used or |
| * the parent's directory has the setgid bit set |
| * _and_ the client was able to get a valid mapping |
| * for the parent dir's owner_group, we want to |
| * append NVERIFY(owner_group == dva.va_gid) and |
| * SETATTR to the CREATE compound. |
| */ |
| mutex_enter(&drp->r_statelock); |
| if ((VTOMI4(dvp)->mi_flags & MI4_GRPID || |
| drp->r_attr.va_mode & VSGID) && |
| drp->r_attr.va_gid != GID_NOBODY) { |
| in_va->va_mask |= AT_GID; |
| in_va->va_gid = drp->r_attr.va_gid; |
| setgid_flag = 1; |
| } |
| mutex_exit(&drp->r_statelock); |
| } |
| |
| /* |
| * Normal/non-create compound: |
| * PUTFH(dfh) + OPEN(create) + GETFH + GETATTR(new) |
| * |
| * Open(create) compound no setgid: |
| * PUTFH(dfh) + SAVEFH + OPEN(create) + GETFH + GETATTR(new) + |
| * RESTOREFH + GETATTR |
| * |
| * Open(create) setgid: |
| * PUTFH(dfh) + OPEN(create) + GETFH + GETATTR(new) + |
| * SAVEFH + PUTFH(dfh) + GETATTR(dvp) + RESTOREFH + |
| * NVERIFY(grp) + SETATTR |
| */ |
| if (setgid_flag) { |
| numops = 10; |
| idx_open = 1; |
| idx_fattr = 3; |
| } else if (create_flag) { |
| numops = 7; |
| idx_open = 2; |
| idx_fattr = 4; |
| } else { |
| numops = 4; |
| idx_open = 1; |
| idx_fattr = 3; |
| } |
| |
| args.array_len = numops; |
| argoplist_size = numops * sizeof (nfs_argop4); |
| argop = kmem_alloc(argoplist_size, KM_SLEEP); |
| |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "nfs4open_otw: " |
| "open %s open flag 0x%x cred %p", file_name, open_flag, |
| (void *)cr)); |
| |
| ASSERT(nfs_zone() == VTOMI4(dvp)->mi_zone); |
| if (create_flag) { |
| /* |
| * We are to create a file. Initialize the passed in vnode |
| * pointer. |
| */ |
| vpi = NULL; |
| } else { |
| /* |
| * Check to see if the client owns a read delegation and is |
| * trying to open for write. If so, then return the delegation |
| * to avoid the server doing a cb_recall and returning DELAY. |
| * NB - we don't use the statev4_lock here because we'd have |
| * to drop the lock anyway and the result would be stale. |
| */ |
| if ((open_flag & FWRITE) && |
| VTOR4(vpi)->r_deleg_type == OPEN_DELEGATE_READ) |
| (void) nfs4delegreturn(VTOR4(vpi), NFS4_DR_REOPEN); |
| |
| /* |
| * If the file has a delegation, then do an access check up |
| * front. This avoids having to an access check later after |
| * we've already done start_op, which could deadlock. |
| */ |
| if (VTOR4(vpi)->r_deleg_type != OPEN_DELEGATE_NONE) { |
| if (open_flag & FREAD && |
| nfs4_access(vpi, VREAD, 0, cr, NULL) == 0) |
| acc |= VREAD; |
| if (open_flag & FWRITE && |
| nfs4_access(vpi, VWRITE, 0, cr, NULL) == 0) |
| acc |= VWRITE; |
| } |
| } |
| |
| drp = VTOR4(dvp); |
| |
| recov_state.rs_flags = 0; |
| recov_state.rs_num_retry_despite_err = 0; |
| cred_otw = cr; |
| |
| recov_retry: |
| fh_differs = 0; |
| nfs4_error_zinit(&e); |
| |
| e.error = nfs4_start_op(VTOMI4(dvp), dvp, vpi, &recov_state); |
| if (e.error) { |
| if (ncr != NULL) |
| crfree(ncr); |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| |
| args.ctag = TAG_OPEN; |
| args.array_len = numops; |
| args.array = argop; |
| |
| /* putfh directory fh */ |
| argop[0].argop = OP_CPUTFH; |
| argop[0].nfs_argop4_u.opcputfh.sfh = drp->r_fh; |
| |
| /* OPEN: either op 1 or op 2 depending upon create/setgid flags */ |
| argop[idx_open].argop = OP_COPEN; |
| open_args = &argop[idx_open].nfs_argop4_u.opcopen; |
| open_args->claim = CLAIM_NULL; |
| |
| /* name of file */ |
| open_args->open_claim4_u.cfile = file_name; |
| open_args->owner.owner_len = 0; |
| open_args->owner.owner_val = NULL; |
| |
| if (create_flag) { |
| /* CREATE a file */ |
| open_args->opentype = OPEN4_CREATE; |
| open_args->mode = createmode; |
| if (createmode == EXCLUSIVE4) { |
| if (did_excl_setup == FALSE) { |
| verf.seconds = zone_get_hostid(NULL); |
| if (verf.seconds != 0) |
| verf.nseconds = newnum(); |
| else { |
| timestruc_t now; |
| |
| gethrestime(&now); |
| verf.seconds = now.tv_sec; |
| verf.nseconds = now.tv_nsec; |
| } |
| /* |
| * Since the server will use this value for the |
| * mtime, make sure that it can't overflow. Zero |
| * out the MSB. The actual value does not matter |
| * here, only its uniqeness. |
| */ |
| verf.seconds &= INT32_MAX; |
| did_excl_setup = TRUE; |
| } |
| |
| /* Now copy over verifier to OPEN4args. */ |
| open_args->createhow4_u.createverf = *(uint64_t *)&verf; |
| } else { |
| int v_error; |
| bitmap4 supp_attrs; |
| servinfo4_t *svp; |
| |
| attr = &open_args->createhow4_u.createattrs; |
| |
| svp = drp->r_server; |
| (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); |
| supp_attrs = svp->sv_supp_attrs; |
| nfs_rw_exit(&svp->sv_lock); |
| |
| /* GUARDED4 or UNCHECKED4 */ |
| v_error = vattr_to_fattr4(in_va, NULL, attr, 0, OP_OPEN, |
| supp_attrs); |
| if (v_error) { |
| bzero(attr, sizeof (*attr)); |
| nfs4args_copen_free(open_args); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, |
| &recov_state, FALSE); |
| if (ncr != NULL) |
| crfree(ncr); |
| kmem_free(argop, argoplist_size); |
| return (v_error); |
| } |
| } |
| } else { |
| /* NO CREATE */ |
| open_args->opentype = OPEN4_NOCREATE; |
| } |
| |
| if (recov_state.rs_sp != NULL) { |
| mutex_enter(&recov_state.rs_sp->s_lock); |
| open_args->owner.clientid = recov_state.rs_sp->clientid; |
| mutex_exit(&recov_state.rs_sp->s_lock); |
| } else { |
| /* XXX should we just fail here? */ |
| open_args->owner.clientid = 0; |
| } |
| |
| /* |
| * This increments oop's ref count or creates a temporary 'just_created' |
| * open owner that will become valid when this OPEN/OPEN_CONFIRM call |
| * completes. |
| */ |
| mutex_enter(&VTOMI4(dvp)->mi_lock); |
| |
| /* See if a permanent or just created open owner exists */ |
| oop = find_open_owner_nolock(cr, NFS4_JUST_CREATED, VTOMI4(dvp)); |
| if (!oop) { |
| /* |
| * This open owner does not exist so create a temporary |
| * just created one. |
| */ |
| oop = create_open_owner(cr, VTOMI4(dvp)); |
| ASSERT(oop != NULL); |
| } |
| mutex_exit(&VTOMI4(dvp)->mi_lock); |
| |
| /* this length never changes, do alloc before seqid sync */ |
| open_args->owner.owner_len = sizeof (oop->oo_name); |
| open_args->owner.owner_val = |
| kmem_alloc(open_args->owner.owner_len, KM_SLEEP); |
| |
| e.error = nfs4_start_open_seqid_sync(oop, VTOMI4(dvp)); |
| if (e.error == EAGAIN) { |
| open_owner_rele(oop); |
| nfs4args_copen_free(open_args); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, TRUE); |
| if (ncr != NULL) { |
| crfree(ncr); |
| ncr = NULL; |
| } |
| goto recov_retry; |
| } |
| |
| /* Check to see if we need to do the OTW call */ |
| if (!create_flag) { |
| if (!nfs4_is_otw_open_necessary(oop, open_flag, vpi, |
| file_just_been_created, &e.error, acc, &recov_state)) { |
| |
| /* |
| * The OTW open is not necessary. Either |
| * the open can succeed without it (eg. |
| * delegation, error == 0) or the open |
| * must fail due to an access failure |
| * (error != 0). In either case, tidy |
| * up and return. |
| */ |
| |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| nfs4args_copen_free(open_args); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, FALSE); |
| if (ncr != NULL) |
| crfree(ncr); |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| } |
| |
| bcopy(&oop->oo_name, open_args->owner.owner_val, |
| open_args->owner.owner_len); |
| |
| seqid = nfs4_get_open_seqid(oop) + 1; |
| open_args->seqid = seqid; |
| open_args->share_access = 0; |
| if (open_flag & FREAD) |
| open_args->share_access |= OPEN4_SHARE_ACCESS_READ; |
| if (open_flag & FWRITE) |
| open_args->share_access |= OPEN4_SHARE_ACCESS_WRITE; |
| open_args->share_deny = OPEN4_SHARE_DENY_NONE; |
| |
| |
| |
| /* |
| * getfh w/sanity check for idx_open/idx_fattr |
| */ |
| ASSERT((idx_open + 1) == (idx_fattr - 1)); |
| argop[idx_open + 1].argop = OP_GETFH; |
| |
| /* getattr */ |
| argop[idx_fattr].argop = OP_GETATTR; |
| argop[idx_fattr].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; |
| argop[idx_fattr].nfs_argop4_u.opgetattr.mi = VTOMI4(dvp); |
| |
| if (setgid_flag) { |
| vattr_t _v; |
| servinfo4_t *svp; |
| bitmap4 supp_attrs; |
| |
| svp = drp->r_server; |
| (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); |
| supp_attrs = svp->sv_supp_attrs; |
| nfs_rw_exit(&svp->sv_lock); |
| |
| /* |
| * For setgid case, we need to: |
| * 4:savefh(new) 5:putfh(dir) 6:getattr(dir) 7:restorefh(new) |
| */ |
| argop[4].argop = OP_SAVEFH; |
| |
| argop[5].argop = OP_CPUTFH; |
| argop[5].nfs_argop4_u.opcputfh.sfh = drp->r_fh; |
| |
| argop[6].argop = OP_GETATTR; |
| argop[6].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; |
| argop[6].nfs_argop4_u.opgetattr.mi = VTOMI4(dvp); |
| |
| argop[7].argop = OP_RESTOREFH; |
| |
| /* |
| * nverify |
| */ |
| _v.va_mask = AT_GID; |
| _v.va_gid = in_va->va_gid; |
| if (!(e.error = nfs4args_verify(&argop[8], &_v, OP_NVERIFY, |
| supp_attrs))) { |
| |
| /* |
| * setattr |
| * |
| * We _know_ we're not messing with AT_SIZE or |
| * AT_XTIME, so no need for stateid or flags. |
| * Also we specify NULL rp since we're only |
| * interested in setting owner_group attributes. |
| */ |
| nfs4args_setattr(&argop[9], &_v, NULL, 0, NULL, cr, |
| supp_attrs, &e.error, 0); |
| if (e.error) |
| nfs4args_verify_free(&argop[8]); |
| } |
| |
| if (e.error) { |
| /* |
| * XXX - Revisit the last argument to nfs4_end_op() |
| * once 5020486 is fixed. |
| */ |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| nfs4args_copen_free(open_args); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, TRUE); |
| if (ncr != NULL) |
| crfree(ncr); |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| } else if (create_flag) { |
| argop[1].argop = OP_SAVEFH; |
| |
| argop[5].argop = OP_RESTOREFH; |
| |
| argop[6].argop = OP_GETATTR; |
| argop[6].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; |
| argop[6].nfs_argop4_u.opgetattr.mi = VTOMI4(dvp); |
| } |
| |
| NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, |
| "nfs4open_otw: %s call, nm %s, rp %s", |
| needrecov ? "recov" : "first", file_name, |
| rnode4info(VTOR4(dvp)))); |
| |
| t = gethrtime(); |
| |
| rfs4call(VTOMI4(dvp), &args, &res, cred_otw, &doqueue, 0, &e); |
| |
| if (!e.error && nfs4_need_to_bump_seqid(&res)) |
| nfs4_set_open_seqid(seqid, oop, args.ctag); |
| |
| needrecov = nfs4_needs_recovery(&e, TRUE, dvp->v_vfsp); |
| |
| if (e.error || needrecov) { |
| bool_t abort = FALSE; |
| |
| if (needrecov) { |
| nfs4_bseqid_entry_t *bsep = NULL; |
| |
| nfs4open_save_lost_rqst(e.error, &lost_rqst, oop, |
| cred_otw, vpi, dvp, open_args); |
| |
| if (!e.error && res.status == NFS4ERR_BAD_SEQID) { |
| bsep = nfs4_create_bseqid_entry(oop, NULL, |
| vpi, 0, args.ctag, open_args->seqid); |
| num_bseqid_retry--; |
| } |
| |
| abort = nfs4_start_recovery(&e, VTOMI4(dvp), dvp, vpi, |
| NULL, lost_rqst.lr_op == OP_OPEN ? |
| &lost_rqst : NULL, OP_OPEN, bsep, NULL, NULL); |
| |
| if (bsep) |
| kmem_free(bsep, sizeof (*bsep)); |
| /* give up if we keep getting BAD_SEQID */ |
| if (num_bseqid_retry == 0) |
| abort = TRUE; |
| if (abort == TRUE && e.error == 0) |
| e.error = geterrno4(res.status); |
| } |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, needrecov); |
| nfs4args_copen_free(open_args); |
| if (setgid_flag) { |
| nfs4args_verify_free(&argop[8]); |
| nfs4args_setattr_free(&argop[9]); |
| } |
| if (!e.error) |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| if (ncr != NULL) { |
| crfree(ncr); |
| ncr = NULL; |
| } |
| if (!needrecov || abort == TRUE || e.error == EINTR || |
| NFS4_FRC_UNMT_ERR(e.error, dvp->v_vfsp)) { |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| goto recov_retry; |
| } |
| |
| /* |
| * Will check and update lease after checking the rflag for |
| * OPEN_CONFIRM in the successful OPEN call. |
| */ |
| if (res.status != NFS4_OK && res.array_len <= idx_fattr + 1) { |
| |
| /* |
| * XXX what if we're crossing mount points from server1:/drp |
| * to server2:/drp/rp. |
| */ |
| |
| /* Signal our end of use of the open seqid */ |
| nfs4_end_open_seqid_sync(oop); |
| |
| /* |
| * This will destroy the open owner if it was just created, |
| * and no one else has put a reference on it. |
| */ |
| open_owner_rele(oop); |
| if (create_flag && (createmode != EXCLUSIVE4) && |
| res.status == NFS4ERR_BADOWNER) |
| nfs4_log_badowner(VTOMI4(dvp), OP_OPEN); |
| |
| e.error = geterrno4(res.status); |
| nfs4args_copen_free(open_args); |
| if (setgid_flag) { |
| nfs4args_verify_free(&argop[8]); |
| nfs4args_setattr_free(&argop[9]); |
| } |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, needrecov); |
| /* |
| * If the reply is NFS4ERR_ACCESS, it may be because |
| * we are root (no root net access). If the real uid |
| * is not root, then retry with the real uid instead. |
| */ |
| if (ncr != NULL) { |
| crfree(ncr); |
| ncr = NULL; |
| } |
| if (res.status == NFS4ERR_ACCESS && |
| (ncr = crnetadjust(cred_otw)) != NULL) { |
| cred_otw = ncr; |
| goto recov_retry; |
| } |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| |
| resop = &res.array[idx_open]; /* open res */ |
| op_res = &resop->nfs_resop4_u.opopen; |
| |
| #ifdef DEBUG |
| /* |
| * verify attrset bitmap |
| */ |
| if (create_flag && |
| (createmode == UNCHECKED4 || createmode == GUARDED4)) { |
| /* make sure attrset returned is what we asked for */ |
| /* XXX Ignore this 'error' for now */ |
| if (attr->attrmask != op_res->attrset) |
| /* EMPTY */; |
| } |
| #endif |
| |
| if (op_res->rflags & OPEN4_RESULT_LOCKTYPE_POSIX) { |
| mutex_enter(&VTOMI4(dvp)->mi_lock); |
| VTOMI4(dvp)->mi_flags |= MI4_POSIX_LOCK; |
| mutex_exit(&VTOMI4(dvp)->mi_lock); |
| } |
| |
| resop = &res.array[idx_open + 1]; /* getfh res */ |
| gf_res = &resop->nfs_resop4_u.opgetfh; |
| |
| otw_sfh = sfh4_get(&gf_res->object, VTOMI4(dvp)); |
| |
| /* |
| * The open stateid has been updated on the server but not |
| * on the client yet. There is a path: makenfs4node->nfs4_attr_cache-> |
| * flush_pages->VOP_PUTPAGE->...->nfs4write where we will issue an OTW |
| * WRITE call. That, however, will use the old stateid, so go ahead |
| * and upate the open stateid now, before any call to makenfs4node. |
| */ |
| if (vpi) { |
| nfs4_open_stream_t *tmp_osp; |
| rnode4_t *tmp_rp = VTOR4(vpi); |
| |
| tmp_osp = find_open_stream(oop, tmp_rp); |
| if (tmp_osp) { |
| tmp_osp->open_stateid = op_res->stateid; |
| mutex_exit(&tmp_osp->os_sync_lock); |
| open_stream_rele(tmp_osp, tmp_rp); |
| } |
| |
| /* |
| * We must determine if the file handle given by the otw open |
| * is the same as the file handle which was passed in with |
| * *vpp. This case can be reached if the file we are trying |
| * to open has been removed and another file has been created |
| * having the same file name. The passed in vnode is released |
| * later. |
| */ |
| orig_sfh = VTOR4(vpi)->r_fh; |
| fh_differs = nfs4cmpfh(&orig_sfh->sfh_fh, &otw_sfh->sfh_fh); |
| } |
| |
| garp = &res.array[idx_fattr].nfs_resop4_u.opgetattr.ga_res; |
| |
| if (create_flag || fh_differs) { |
| int rnode_err = 0; |
| |
| vp = makenfs4node(otw_sfh, garp, dvp->v_vfsp, t, cr, |
| dvp, fn_get(VTOSV(dvp)->sv_name, file_name, otw_sfh)); |
| |
| if (e.error) |
| PURGE_ATTRCACHE4(vp); |
| /* |
| * For the newly created vp case, make sure the rnode |
| * isn't bad before using it. |
| */ |
| mutex_enter(&(VTOR4(vp))->r_statelock); |
| if (VTOR4(vp)->r_flags & R4RECOVERR) |
| rnode_err = EIO; |
| mutex_exit(&(VTOR4(vp))->r_statelock); |
| |
| if (rnode_err) { |
| nfs4_end_open_seqid_sync(oop); |
| nfs4args_copen_free(open_args); |
| if (setgid_flag) { |
| nfs4args_verify_free(&argop[8]); |
| nfs4args_setattr_free(&argop[9]); |
| } |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, |
| needrecov); |
| open_owner_rele(oop); |
| VN_RELE(vp); |
| if (ncr != NULL) |
| crfree(ncr); |
| sfh4_rele(&otw_sfh); |
| kmem_free(argop, argoplist_size); |
| return (EIO); |
| } |
| } else { |
| vp = vpi; |
| } |
| sfh4_rele(&otw_sfh); |
| |
| /* |
| * It seems odd to get a full set of attrs and then not update |
| * the object's attrcache in the non-create case. Create case uses |
| * the attrs since makenfs4node checks to see if the attrs need to |
| * be updated (and then updates them). The non-create case should |
| * update attrs also. |
| */ |
| if (! create_flag && ! fh_differs && !e.error) { |
| nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL); |
| } |
| |
| nfs4_error_zinit(&e); |
| if (op_res->rflags & OPEN4_RESULT_CONFIRM) { |
| /* This does not do recovery for vp explicitly. */ |
| nfs4open_confirm(vp, &seqid, &op_res->stateid, cred_otw, FALSE, |
| &retry_open, oop, FALSE, &e, &num_bseqid_retry); |
| |
| if (e.error || e.stat) { |
| nfs4_end_open_seqid_sync(oop); |
| nfs4args_copen_free(open_args); |
| if (setgid_flag) { |
| nfs4args_verify_free(&argop[8]); |
| nfs4args_setattr_free(&argop[9]); |
| } |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, |
| needrecov); |
| open_owner_rele(oop); |
| if (create_flag || fh_differs) { |
| /* rele the makenfs4node */ |
| VN_RELE(vp); |
| } |
| if (ncr != NULL) { |
| crfree(ncr); |
| ncr = NULL; |
| } |
| if (retry_open == TRUE) { |
| NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, |
| "nfs4open_otw: retry the open since OPEN " |
| "CONFIRM failed with error %d stat %d", |
| e.error, e.stat)); |
| if (create_flag && createmode == GUARDED4) { |
| NFS4_DEBUG(nfs4_client_recov_debug, |
| (CE_NOTE, "nfs4open_otw: switch " |
| "createmode from GUARDED4 to " |
| "UNCHECKED4")); |
| createmode = UNCHECKED4; |
| } |
| goto recov_retry; |
| } |
| if (!e.error) { |
| if (create_flag && (createmode != EXCLUSIVE4) && |
| e.stat == NFS4ERR_BADOWNER) |
| nfs4_log_badowner(VTOMI4(dvp), OP_OPEN); |
| |
| e.error = geterrno4(e.stat); |
| } |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| } |
| |
| rp = VTOR4(vp); |
| |
| mutex_enter(&rp->r_statev4_lock); |
| if (create_flag) |
| rp->created_v4 = 1; |
| mutex_exit(&rp->r_statev4_lock); |
| |
| mutex_enter(&oop->oo_lock); |
| /* Doesn't matter if 'oo_just_created' already was set as this */ |
| oop->oo_just_created = NFS4_PERM_CREATED; |
| if (oop->oo_cred_otw) |
| crfree(oop->oo_cred_otw); |
| oop->oo_cred_otw = cred_otw; |
| crhold(oop->oo_cred_otw); |
| mutex_exit(&oop->oo_lock); |
| |
| /* returns with 'os_sync_lock' held */ |
| osp = find_or_create_open_stream(oop, rp, &created_osp); |
| if (!osp) { |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, |
| "nfs4open_otw: failed to create an open stream")); |
| NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4open_otw: " |
| "signal our end of use of the open seqid")); |
| |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| nfs4args_copen_free(open_args); |
| if (setgid_flag) { |
| nfs4args_verify_free(&argop[8]); |
| nfs4args_setattr_free(&argop[9]); |
| } |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, needrecov); |
| if (create_flag || fh_differs) |
| VN_RELE(vp); |
| if (ncr != NULL) |
| crfree(ncr); |
| |
| kmem_free(argop, argoplist_size); |
| return (EINVAL); |
| |
| } |
| |
| osp->open_stateid = op_res->stateid; |
| |
| if (open_flag & FREAD) |
| osp->os_share_acc_read++; |
| if (open_flag & FWRITE) |
| osp->os_share_acc_write++; |
| osp->os_share_deny_none++; |
| |
| /* |
| * Need to reset this bitfield for the possible case where we were |
| * going to OTW CLOSE the file, got a non-recoverable error, and before |
| * we could retry the CLOSE, OPENed the file again. |
| */ |
| ASSERT(osp->os_open_owner->oo_seqid_inuse); |
| osp->os_final_close = 0; |
| osp->os_force_close = 0; |
| #ifdef DEBUG |
| if (osp->os_failed_reopen) |
| NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE, "nfs4open_otw:" |
| " clearing os_failed_reopen for osp %p, cr %p, rp %s", |
| (void *)osp, (void *)cr, rnode4info(rp))); |
| #endif |
| osp->os_failed_reopen = 0; |
| |
| mutex_exit(&osp->os_sync_lock); |
| |
| nfs4_end_open_seqid_sync(oop); |
| |
| if (created_osp && recov_state.rs_sp != NULL) { |
| mutex_enter(&recov_state.rs_sp->s_lock); |
| nfs4_inc_state_ref_count_nolock(recov_state.rs_sp, VTOMI4(dvp)); |
| mutex_exit(&recov_state.rs_sp->s_lock); |
| } |
| |
| /* get rid of our reference to find oop */ |
| open_owner_rele(oop); |
| |
| open_stream_rele(osp, rp); |
| |
| /* accept delegation, if any */ |
| nfs4_delegation_accept(rp, CLAIM_NULL, op_res, garp, cred_otw); |
| |
| nfs4_end_op(VTOMI4(dvp), dvp, vpi, &recov_state, needrecov); |
| |
| if (createmode == EXCLUSIVE4 && |
| (in_va->va_mask & ~(AT_GID | AT_SIZE))) { |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "nfs4open_otw:" |
| " EXCLUSIVE4: sending a SETATTR")); |
| /* |
| * If doing an exclusive create, then generate |
| * a SETATTR to set the initial attributes. |
| * Try to set the mtime and the atime to the |
| * server's current time. It is somewhat |
| * expected that these fields will be used to |
| * store the exclusive create cookie. If not, |
| * server implementors will need to know that |
| * a SETATTR will follow an exclusive create |
| * and the cookie should be destroyed if |
| * appropriate. |
| * |
| * The AT_GID and AT_SIZE bits are turned off |
| * so that the SETATTR request will not attempt |
| * to process these. The gid will be set |
| * separately if appropriate. The size is turned |
| * off because it is assumed that a new file will |
| * be created empty and if the file wasn't empty, |
| * then the exclusive create will have failed |
| * because the file must have existed already. |
| * Therefore, no truncate operation is needed. |
| */ |
| in_va->va_mask &= ~(AT_GID | AT_SIZE); |
| in_va->va_mask |= (AT_MTIME | AT_ATIME); |
| |
| e.error = nfs4setattr(vp, in_va, 0, cr, NULL); |
| if (e.error) { |
| nfs4_error_t err; |
| |
| /* |
| * Couldn't correct the attributes of |
| * the newly created file and the |
| * attributes are wrong. Remove the |
| * file and return an error to the |
| * application. |
| */ |
| /* XXX will this take care of client state ? */ |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, |
| "nfs4open_otw: EXCLUSIVE4: error %d on SETATTR:" |
| " remove file", e.error)); |
| |
| /* |
| * The file is currently open so try to close it first. |
| * |
| * If we do not close the file explicitly here then the |
| * VN_RELE() would do an (implicit and asynchronous) |
| * close for us. But such async close could race with |
| * the nfs4_remove() below. If the async close is |
| * slower than nfs4_remove() then nfs4_remove() |
| * wouldn't remove the file but rename it to .nfsXXXX |
| * instead. |
| */ |
| nfs4close_one(vp, NULL, cr, open_flag, NULL, &err, |
| CLOSE_NORM, 0, 0, 0); |
| VN_RELE(vp); |
| (void) nfs4_remove(dvp, file_name, cr, NULL, 0); |
| |
| /* |
| * Since we've reled the vnode and removed |
| * the file we now need to return the error. |
| * At this point we don't want to update the |
| * dircaches, call nfs4_waitfor_purge_complete |
| * or set vpp to vp so we need to skip these |
| * as well. |
| */ |
| goto skip_update_dircaches; |
| } |
| } |
| |
| /* |
| * If we created or found the correct vnode, due to create_flag or |
| * fh_differs being set, then update directory cache attribute, readdir |
| * and dnlc caches. |
| */ |
| if (create_flag || fh_differs) { |
| dirattr_info_t dinfo, *dinfop; |
| |
| /* |
| * Make sure getattr succeeded before using results. |
| * note: op 7 is getattr(dir) for both flavors of |
| * open(create). |
| */ |
| if (create_flag && res.status == NFS4_OK) { |
| dinfo.di_time_call = t; |
| dinfo.di_cred = cr; |
| dinfo.di_garp = |
| &res.array[6].nfs_resop4_u.opgetattr.ga_res; |
| dinfop = &dinfo; |
| } else { |
| dinfop = NULL; |
| } |
| |
| nfs4_update_dircaches(&op_res->cinfo, dvp, vp, file_name, |
| dinfop); |
| } |
| |
| /* |
| * If the page cache for this file was flushed from actions |
| * above, it was done asynchronously and if that is true, |
| * there is a need to wait here for it to complete. This must |
| * be done outside of start_fop/end_fop. |
| */ |
| (void) nfs4_waitfor_purge_complete(vp); |
| |
| /* |
| * It is implicit that we are in the open case (create_flag == 0) since |
| * fh_differs can only be set to a non-zero value in the open case. |
| */ |
| if (fh_differs != 0 && vpi != NULL) |
| VN_RELE(vpi); |
| |
| /* |
| * Be sure to set *vpp to the correct value before returning. |
| */ |
| *vpp = vp; |
| |
| skip_update_dircaches: |
| |
| nfs4args_copen_free(open_args); |
| if (setgid_flag) { |
| nfs4args_verify_free(&argop[8]); |
| nfs4args_setattr_free(&argop[9]); |
| } |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| |
| if (ncr) |
| crfree(ncr); |
| kmem_free(argop, argoplist_size); |
| return (e.error); |
| } |
| |
| /* |
| * Reopen an open instance. cf. nfs4open_otw(). |
| * |
| * Errors are returned by the nfs4_error_t parameter. |
| * - ep->error contains an errno value or zero. |
| * - if it is zero, ep->stat is set to an NFS status code, if any. |
| * If the file could not be reopened, but the caller should continue, the |
| * file is marked dead and no error values are returned. If the caller |
| * should stop recovering open files and start over, either the ep->error |
| * value or ep->stat will indicate an error (either something that requires |
| * recovery or EAGAIN). Note that some recovery (e.g., expired volatile |
| * filehandles) may be handled silently by this routine. |
| * - if it is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, recovery for lost state |
| * will be started, so the caller should not do it. |
| * |
| * Gotos: |
| * - kill_file : reopen failed in such a fashion to constitute marking the |
| * file dead and setting the open stream's 'os_failed_reopen' as 1. This |
| * is for cases where recovery is not possible. |
| * - failed_reopen : same as above, except that the file has already been |
| * marked dead, so no need to do it again. |
| * - bailout : reopen failed but we are able to recover and retry the reopen - |
| * either within this function immediately or via the calling function. |
| */ |
| |
| void |
| nfs4_reopen(vnode_t *vp, nfs4_open_stream_t *osp, nfs4_error_t *ep, |
| open_claim_type4 claim, bool_t frc_use_claim_previous, |
| bool_t is_recov) |
| { |
| COMPOUND4args_clnt args; |
| COMPOUND4res_clnt res; |
| nfs_argop4 argop[4]; |
| nfs_resop4 *resop; |
| OPEN4res *op_res = NULL; |
| OPEN4cargs *open_args; |
| GETFH4res *gf_res; |
| rnode4_t *rp = VTOR4(vp); |
| int doqueue = 1; |
| cred_t *cr = NULL, *cred_otw = NULL; |
| nfs4_open_owner_t *oop = NULL; |
| seqid4 seqid; |
| nfs4_ga_res_t *garp; |
| char fn[MAXNAMELEN]; |
| nfs4_recov_state_t recov = {NULL, 0}; |
| nfs4_lost_rqst_t lost_rqst; |
| mntinfo4_t *mi = VTOMI4(vp); |
| bool_t abort; |
| char *failed_msg = ""; |
| int fh_different; |
| hrtime_t t; |
| nfs4_bseqid_entry_t *bsep = NULL; |
| |
| ASSERT(nfs4_consistent_type(vp)); |
| ASSERT(nfs_zone() == mi->mi_zone); |
| |
| nfs4_error_zinit(ep); |
| |
| /* this is the cred used to find the open owner */ |
| cr = state_to_cred(osp); |
| if (cr == NULL) { |
| failed_msg = "Couldn't reopen: no cred"; |
| goto kill_file; |
| } |
| /* use this cred for OTW operations */ |
| cred_otw = nfs4_get_otw_cred(cr, mi, osp->os_open_owner); |
| |
| top: |
| nfs4_error_zinit(ep); |
| |
| if (mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) { |
| /* File system has been unmounted, quit */ |
| ep->error = EIO; |
| failed_msg = "Couldn't reopen: file system has been unmounted"; |
| goto kill_file; |
| } |
| |
| oop = osp->os_open_owner; |
| |
| ASSERT(oop != NULL); |
| if (oop == NULL) { /* be defensive in non-DEBUG */ |
| failed_msg = "can't reopen: no open owner"; |
| goto kill_file; |
| } |
| open_owner_hold(oop); |
| |
| ep->error = nfs4_start_open_seqid_sync(oop, mi); |
| if (ep->error) { |
| open_owner_rele(oop); |
| oop = NULL; |
| goto bailout; |
| } |
| |
| /* |
| * If the rnode has a delegation and the delegation has been |
| * recovered and the server didn't request a recall and the caller |
| * didn't specifically ask for CLAIM_PREVIOUS (nfs4frlock during |
| * recovery) and the rnode hasn't been marked dead, then install |
| * the delegation stateid in the open stream. Otherwise, proceed |
| * with a CLAIM_PREVIOUS or CLAIM_NULL OPEN. |
| */ |
| mutex_enter(&rp->r_statev4_lock); |
| if (rp->r_deleg_type != OPEN_DELEGATE_NONE && |
| !rp->r_deleg_return_pending && |
| (rp->r_deleg_needs_recovery == OPEN_DELEGATE_NONE) && |
| !rp->r_deleg_needs_recall && |
| claim != CLAIM_DELEGATE_CUR && !frc_use_claim_previous && |
| !(rp->r_flags & R4RECOVERR)) { |
| mutex_enter(&osp->os_sync_lock); |
| osp->os_delegation = 1; |
| osp->open_stateid = rp->r_deleg_stateid; |
| mutex_exit(&osp->os_sync_lock); |
| mutex_exit(&rp->r_statev4_lock); |
| goto bailout; |
| } |
| mutex_exit(&rp->r_statev4_lock); |
| |
| /* |
| * If the file failed recovery, just quit. This failure need not |
| * affect other reopens, so don't return an error. |
| */ |
| mutex_enter(&rp->r_statelock); |
| if (rp->r_flags & R4RECOVERR) { |
| mutex_exit(&rp->r_statelock); |
| ep->error = 0; |
| goto failed_reopen; |
| } |
| mutex_exit(&rp->r_statelock); |
| |
| /* |
| * argop is empty here |
| * |
| * PUTFH, OPEN, GETATTR |
| */ |
| args.ctag = TAG_REOPEN; |
| args.array_len = 4; |
| args.array = argop; |
| |
| NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, |
| "nfs4_reopen: file is type %d, id %s", |
| vp->v_type, rnode4info(VTOR4(vp)))); |
| |
| argop[0].argop = OP_CPUTFH; |
| |
| if (claim != CLAIM_PREVIOUS) { |
| /* |
| * if this is a file mount then |
| * use the mntinfo parentfh |
| */ |
| argop[0].nfs_argop4_u.opcputfh.sfh = |
| (vp->v_flag & VROOT) ? mi->mi_srvparentfh : |
| VTOSV(vp)->sv_dfh; |
| } else { |
| /* putfh fh to reopen */ |
| argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; |
| } |
| |
| argop[1].argop = OP_COPEN; |
| open_args = &argop[1].nfs_argop4_u.opcopen; |
| open_args->claim = claim; |
| |
| if (claim == CLAIM_NULL) { |
| |
| if ((ep->error = vtoname(vp, fn, MAXNAMELEN)) != 0) { |
| nfs_cmn_err(ep->error, CE_WARN, "nfs4_reopen: vtoname " |
| "failed for vp 0x%p for CLAIM_NULL with %m", |
| (void *)vp); |
| failed_msg = "Couldn't reopen: vtoname failed for " |
| "CLAIM_NULL"; |
| /* nothing allocated yet */ |
| goto kill_file; |
| } |
| |
| open_args->open_claim4_u.cfile = fn; |
| } else if (claim == CLAIM_PREVIOUS) { |
| |
| /* |
| * We have two cases to deal with here: |
| * 1) We're being called to reopen files in order to satisfy |
| * a lock operation request which requires us to explicitly |
| * reopen files which were opened under a delegation. If |
| * we're in recovery, we *must* use CLAIM_PREVIOUS. In |
| * that case, frc_use_claim_previous is TRUE and we must |
| * use the rnode's current delegation type (r_deleg_type). |
| * 2) We're reopening files during some form of recovery. |
| * In this case, frc_use_claim_previous is FALSE and we |
| * use the delegation type appropriate for recovery |
| * (r_deleg_needs_recovery). |
| */ |
| mutex_enter(&rp->r_statev4_lock); |
| open_args->open_claim4_u.delegate_type = |
| frc_use_claim_previous ? |
| rp->r_deleg_type : |
| rp->r_deleg_needs_recovery; |
| mutex_exit(&rp->r_statev4_lock); |
| |
| } else if (claim == CLAIM_DELEGATE_CUR) { |
| |
| if ((ep->error = vtoname(vp, fn, MAXNAMELEN)) != 0) { |
| nfs_cmn_err(ep->error, CE_WARN, "nfs4_reopen: vtoname " |
| "failed for vp 0x%p for CLAIM_DELEGATE_CUR " |
| "with %m", (void *)vp); |
| failed_msg = "Couldn't reopen: vtoname failed for " |
| "CLAIM_DELEGATE_CUR"; |
| /* nothing allocated yet */ |
| goto kill_file; |
| } |
| |
| mutex_enter(&rp->r_statev4_lock); |
| open_args->open_claim4_u.delegate_cur_info.delegate_stateid = |
| rp->r_deleg_stateid; |
| mutex_exit(&rp->r_statev4_lock); |
| |
| open_args->open_claim4_u.delegate_cur_info.cfile = fn; |
| } |
| open_args->opentype = OPEN4_NOCREATE; |
| open_args->owner.clientid = mi2clientid(mi); |
| open_args->owner.owner_len = sizeof (oop->oo_name); |
| open_args->owner.owner_val = |
| kmem_alloc(open_args->owner.owner_len, KM_SLEEP); |
| bcopy(&oop->oo_name, open_args->owner.owner_val, |
| open_args->owner.owner_len); |
| open_args->share_access = 0; |
| open_args->share_deny = 0; |
| |
| mutex_enter(&osp->os_sync_lock); |
| NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, "nfs4_reopen: osp %p rp " |
| "%p: read acc %"PRIu64" write acc %"PRIu64": open ref count %d: " |
| "mmap read %"PRIu64" mmap write %"PRIu64" claim %d ", |
| (void *)osp, (void *)rp, osp->os_share_acc_read, |
| osp->os_share_acc_write, osp->os_open_ref_count, |
| osp->os_mmap_read, osp->os_mmap_write, claim)); |
| |
| if (osp->os_share_acc_read || osp->os_mmap_read) |
| open_args->share_access |= OPEN4_SHARE_ACCESS_READ; |
| if (osp->os_share_acc_write || osp->os_mmap_write) |
| open_args->share_access |= OPEN4_SHARE_ACCESS_WRITE; |
| if (osp->os_share_deny_read) |
| open_args->share_deny |= OPEN4_SHARE_DENY_READ; |
| if (osp->os_share_deny_write) |
| open_args->share_deny |= OPEN4_SHARE_DENY_WRITE; |
| mutex_exit(&osp->os_sync_lock); |
| |
| seqid = nfs4_get_open_seqid(oop) + 1; |
| open_args->seqid = seqid; |
| |
| /* Construct the getfh part of the compound */ |
| argop[2].argop = OP_GETFH; |
| |
| /* Construct the getattr part of the compound */ |
| argop[3].argop = OP_GETATTR; |
| argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; |
| argop[3].nfs_argop4_u.opgetattr.mi = mi; |
| |
| t = gethrtime(); |
| |
| rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep); |
| |
| if (ep->error) { |
| if (!is_recov && !frc_use_claim_previous && |
| (ep->error == EINTR || ep->error == ETIMEDOUT || |
| NFS4_FRC_UNMT_ERR(ep->error, vp->v_vfsp))) { |
| nfs4open_save_lost_rqst(ep->error, &lost_rqst, oop, |
| cred_otw, vp, NULL, open_args); |
| abort = nfs4_start_recovery(ep, |
| VTOMI4(vp), vp, NULL, NULL, |
| lost_rqst.lr_op == OP_OPEN ? |
| &lost_rqst : NULL, OP_OPEN, NULL, NULL, NULL); |
| nfs4args_copen_free(open_args); |
| goto bailout; |
| } |
| |
| nfs4args_copen_free(open_args); |
| |
| if (ep->error == EACCES && cred_otw != cr) { |
| crfree(cred_otw); |
| cred_otw = cr; |
| crhold(cred_otw); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| goto top; |
| } |
| if (ep->error == ETIMEDOUT) |
| goto bailout; |
| failed_msg = "Couldn't reopen: rpc error"; |
| goto kill_file; |
| } |
| |
| if (nfs4_need_to_bump_seqid(&res)) |
| nfs4_set_open_seqid(seqid, oop, args.ctag); |
| |
| switch (res.status) { |
| case NFS4_OK: |
| if (recov.rs_flags & NFS4_RS_DELAY_MSG) { |
| mutex_enter(&rp->r_statelock); |
| rp->r_delay_interval = 0; |
| mutex_exit(&rp->r_statelock); |
| } |
| break; |
| case NFS4ERR_BAD_SEQID: |
| bsep = nfs4_create_bseqid_entry(oop, NULL, vp, 0, |
| args.ctag, open_args->seqid); |
| |
| abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, |
| NULL, lost_rqst.lr_op == OP_OPEN ? &lost_rqst : |
| NULL, OP_OPEN, bsep, NULL, NULL); |
| |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| kmem_free(bsep, sizeof (*bsep)); |
| |
| goto kill_file; |
| case NFS4ERR_NO_GRACE: |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| if (claim == CLAIM_PREVIOUS) { |
| /* |
| * Retry as a plain open. We don't need to worry about |
| * checking the changeinfo: it is acceptable for a |
| * client to re-open a file and continue processing |
| * (in the absence of locks). |
| */ |
| NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, |
| "nfs4_reopen: CLAIM_PREVIOUS: NFS4ERR_NO_GRACE; " |
| "will retry as CLAIM_NULL")); |
| claim = CLAIM_NULL; |
| nfs4_mi_kstat_inc_no_grace(mi); |
| goto top; |
| } |
| failed_msg = |
| "Couldn't reopen: tried reclaim outside grace period. "; |
| goto kill_file; |
| case NFS4ERR_GRACE: |
| nfs4_set_grace_wait(mi); |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| ep->error = nfs4_wait_for_grace(mi, &recov); |
| if (ep->error != 0) |
| goto bailout; |
| goto top; |
| case NFS4ERR_DELAY: |
| nfs4_set_delay_wait(vp); |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| ep->error = nfs4_wait_for_delay(vp, &recov); |
| nfs4_mi_kstat_inc_delay(mi); |
| if (ep->error != 0) |
| goto bailout; |
| goto top; |
| case NFS4ERR_FHEXPIRED: |
| /* recover filehandle and retry */ |
| abort = nfs4_start_recovery(ep, |
| mi, vp, NULL, NULL, NULL, OP_OPEN, NULL, NULL, NULL); |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| if (abort == FALSE) |
| goto top; |
| failed_msg = "Couldn't reopen: recovery aborted"; |
| goto kill_file; |
| case NFS4ERR_RESOURCE: |
| case NFS4ERR_STALE_CLIENTID: |
| case NFS4ERR_WRONGSEC: |
| case NFS4ERR_EXPIRED: |
| /* |
| * Do not mark the file dead and let the calling |
| * function initiate recovery. |
| */ |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| goto bailout; |
| case NFS4ERR_ACCESS: |
| if (cred_otw != cr) { |
| crfree(cred_otw); |
| cred_otw = cr; |
| crhold(cred_otw); |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| goto top; |
| } |
| /* fall through */ |
| default: |
| NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, |
| "nfs4_reopen: r_server 0x%p, mi_curr_serv 0x%p, rnode %s", |
| (void*)VTOR4(vp)->r_server, (void*)mi->mi_curr_serv, |
| rnode4info(VTOR4(vp)))); |
| failed_msg = "Couldn't reopen: NFSv4 error"; |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| goto kill_file; |
| } |
| |
| resop = &res.array[1]; /* open res */ |
| op_res = &resop->nfs_resop4_u.opopen; |
| |
| garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res; |
| |
| /* |
| * Check if the path we reopened really is the same |
| * file. We could end up in a situation where the file |
| * was removed and a new file created with the same name. |
| */ |
| resop = &res.array[2]; |
| gf_res = &resop->nfs_resop4_u.opgetfh; |
| (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); |
| fh_different = (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0); |
| if (fh_different) { |
| if (mi->mi_fh_expire_type == FH4_PERSISTENT || |
| mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) { |
| /* Oops, we don't have the same file */ |
| if (mi->mi_fh_expire_type == FH4_PERSISTENT) |
| failed_msg = "Couldn't reopen: Persistent " |
| "file handle changed"; |
| else |
| failed_msg = "Couldn't reopen: Volatile " |
| "(no expire on open) file handle changed"; |
| |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs_rw_exit(&mi->mi_fh_lock); |
| goto kill_file; |
| |
| } else { |
| /* |
| * We have volatile file handles that don't compare. |
| * If the fids are the same then we assume that the |
| * file handle expired but the rnode still refers to |
| * the same file object. |
| * |
| * First check that we have fids or not. |
| * If we don't we have a dumb server so we will |
| * just assume every thing is ok for now. |
| */ |
| if (!ep->error && garp->n4g_va.va_mask & AT_NODEID && |
| rp->r_attr.va_mask & AT_NODEID && |
| rp->r_attr.va_nodeid != garp->n4g_va.va_nodeid) { |
| /* |
| * We have fids, but they don't |
| * compare. So kill the file. |
| */ |
| failed_msg = |
| "Couldn't reopen: file handle changed" |
| " due to mismatched fids"; |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, |
| (caddr_t)&res); |
| nfs_rw_exit(&mi->mi_fh_lock); |
| goto kill_file; |
| } else { |
| /* |
| * We have volatile file handles that refers |
| * to the same file (at least they have the |
| * same fid) or we don't have fids so we |
| * can't tell. :(. We'll be a kind and accepting |
| * client so we'll update the rnode's file |
| * handle with the otw handle. |
| * |
| * We need to drop mi->mi_fh_lock since |
| * sh4_update acquires it. Since there is |
| * only one recovery thread there is no |
| * race. |
| */ |
| nfs_rw_exit(&mi->mi_fh_lock); |
| sfh4_update(rp->r_fh, &gf_res->object); |
| } |
| } |
| } else { |
| nfs_rw_exit(&mi->mi_fh_lock); |
| } |
| |
| ASSERT(nfs4_consistent_type(vp)); |
| |
| /* |
| * If the server wanted an OPEN_CONFIRM but that fails, just start |
| * over. Presumably if there is a persistent error it will show up |
| * when we resend the OPEN. |
| */ |
| if (op_res->rflags & OPEN4_RESULT_CONFIRM) { |
| bool_t retry_open = FALSE; |
| |
| nfs4open_confirm(vp, &seqid, &op_res->stateid, |
| cred_otw, is_recov, &retry_open, |
| oop, FALSE, ep, NULL); |
| if (ep->error || ep->stat) { |
| nfs4args_copen_free(open_args); |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| oop = NULL; |
| goto top; |
| } |
| } |
| |
| mutex_enter(&osp->os_sync_lock); |
| osp->open_stateid = op_res->stateid; |
| osp->os_delegation = 0; |
| /* |
| * Need to reset this bitfield for the possible case where we were |
| * going to OTW CLOSE the file, got a non-recoverable error, and before |
| * we could retry the CLOSE, OPENed the file again. |
| */ |
| ASSERT(osp->os_open_owner->oo_seqid_inuse); |
| osp->os_final_close = 0; |
| osp->os_force_close = 0; |
| if (claim == CLAIM_DELEGATE_CUR || claim == CLAIM_PREVIOUS) |
| osp->os_dc_openacc = open_args->share_access; |
| mutex_exit(&osp->os_sync_lock); |
| |
| nfs4_end_open_seqid_sync(oop); |
| |
| /* accept delegation, if any */ |
| nfs4_delegation_accept(rp, claim, op_res, garp, cred_otw); |
| |
| nfs4args_copen_free(open_args); |
| |
| nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL); |
| |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| |
| ASSERT(nfs4_consistent_type(vp)); |
| |
| open_owner_rele(oop); |
| crfree(cr); |
| crfree(cred_otw); |
| return; |
| |
| kill_file: |
| nfs4_fail_recov(vp, failed_msg, ep->error, ep->stat); |
| failed_reopen: |
| NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE, |
| "nfs4_reopen: setting os_failed_reopen for osp %p, cr %p, rp %s", |
| (void *)osp, (void *)cr, rnode4info(rp))); |
| mutex_enter(&osp->os_sync_lock); |
| osp->os_failed_reopen = 1; |
| mutex_exit(&osp->os_sync_lock); |
| bailout: |
| if (oop != NULL) { |
| nfs4_end_open_seqid_sync(oop); |
| open_owner_rele(oop); |
| } |
| if (cr != NULL) |
| crfree(cr); |
| if (cred_otw != NULL) |
| crfree(cred_otw); |
| } |
| |
| /* for . and .. OPENs */ |
| /* ARGSUSED */ |
| static int |
| nfs4_open_non_reg_file(vnode_t **vpp, int flag, cred_t *cr) |
| { |
| rnode4_t *rp; |
| nfs4_ga_res_t gar; |
| |
| ASSERT(nfs_zone() == VTOMI4(*vpp)->mi_zone); |
| |
| /* |
| * If close-to-open consistency checking is turned off or |
| * if there is no cached data, we can avoid |
| * the over the wire getattr. Otherwise, force a |
| * call to the server to get fresh attributes and to |
| * check caches. This is required for close-to-open |
| * consistency. |
| */ |
| rp = VTOR4(*vpp); |
| if (VTOMI4(*vpp)->mi_flags & MI4_NOCTO || |
| (rp->r_dir == NULL && !nfs4_has_pages(*vpp))) |
| return (0); |
| |
| return (nfs4_getattr_otw(*vpp, &gar, cr, 0)); |
| } |
| |
| /* |
| * CLOSE a file |
| */ |
| /* ARGSUSED */ |
| static int |
| nfs4_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, |
| caller_context_t *ct) |
| { |
| rnode4_t *rp; |
| int error = 0; |
| int r_error = 0; |
| int n4error = 0; |
| nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; |
| |
| /* |
| * Remove client state for this (lockowner, file) pair. |
| * Issue otw v4 call to have the server do the same. |
| */ |
| |
| rp = VTOR4(vp); |
| |
| /* |
| * zone_enter(2) prevents processes from changing zones with NFS files |
| * open; if we happen to get here from the wrong zone we can't do |
| * anything over the wire. |
| */ |
| if (VTOMI4(vp)->mi_zone != nfs_zone()) { |
| /* |
| * We could attempt to clean up locks, except we're sure |
| * that the current process didn't acquire any locks on |
| * the file: any attempt to lock a file belong to another zone |
| * will fail, and one can't lock an NFS file and then change |
| * zones, as that fails too. |
| * |
| * Returning an error here is the sane thing to do. A |
| * subsequent call to VN_RELE() which translates to a |
| * nfs4_inactive() will clean up state: if the zone of the |
| * vnode's origin is still alive and kicking, the inactive |
| * thread will handle the request (from the correct zone), and |
| * everything (minus the OTW close call) should be OK. If the |
| * zone is going away nfs4_async_inactive() will throw away |
| * delegations, open streams and cached pages inline. |
| */ |
| return (EIO); |
| } |
| |
| /* |
| * If we are using local locking for this filesystem, then |
| * release all of the SYSV style record locks. Otherwise, |
| * we are doing network locking and we need to release all |
| * of the network locks. All of the locks held by this |
| * process on this file are released no matter what the |
| * incoming reference count is. |
| */ |
| if (VTOMI4(vp)->mi_flags & MI4_LLOCK) { |
| cleanlocks(vp, ttoproc(curthread)->p_pid, 0); |
| cleanshares(vp, ttoproc(curthread)->p_pid); |
| } else |
| e.error = nfs4_lockrelease(vp, flag, offset, cr); |
| |
| if (e.error) { |
| struct lm_sysid *lmsid; |
| lmsid = nfs4_find_sysid(VTOMI4(vp)); |
| if (lmsid == NULL) { |
| DTRACE_PROBE2(unknown__sysid, int, e.error, |
| vnode_t *, vp); |
| } else { |
| cleanlocks(vp, ttoproc(curthread)->p_pid, |
| (lm_sysidt(lmsid) | LM_SYSID_CLIENT)); |
| |
| lm_rel_sysid(lmsid); |
| } |
| return (e.error); |
| } |
| |
| if (count > 1) |
| return (0); |
| |
| /* |
| * If the file has been `unlinked', then purge the |
| * DNLC so that this vnode will get reycled quicker |
| * and the .nfs* file on the server will get removed. |
| */ |
| if (rp->r_unldvp != NULL) |
| dnlc_purge_vp(vp); |
| |
| /* |
| * If the file was open for write and there are pages, |
| * do a synchronous flush and commit of all of the |
| * dirty and uncommitted pages. |
| */ |
| ASSERT(!e.error); |
| if ((flag & FWRITE) && nfs4_has_pages(vp)) |
| error = nfs4_putpage_commit(vp, 0, 0, cr); |
| |
| mutex_enter(&rp->r_statelock); |
| r_error = rp->r_error; |
| rp->r_error = 0; |
| mutex_exit(&rp->r_statelock); |
| |
| /* |
| * If this file type is one for which no explicit 'open' was |
| * done, then bail now (ie. no need for protocol 'close'). If |
| * there was an error w/the vm subsystem, return _that_ error, |
| * otherwise, return any errors that may've been reported via |
| * the rnode. |
| */ |
| if (vp->v_type != VREG) |
| return (error ? error : r_error); |
| |
| /* |
| * The sync putpage commit may have failed above, but since |
| * we're working w/a regular file, we need to do the protocol |
| * 'close' (nfs4close_one will figure out if an otw close is |
| * needed or not). Report any errors _after_ doing the protocol |
| * 'close'. |
| */ |
| nfs4close_one(vp, NULL, cr, flag, NULL, &e, CLOSE_NORM, 0, 0, 0); |
| n4error = e.error ? e.error : geterrno4(e.stat); |
| |
| /* |
| * Error reporting prio (Hi -> Lo) |
| * |
| * i) nfs4_putpage_commit (error) |
| * ii) rnode's (r_error) |
| * iii) nfs4close_one (n4error) |
| */ |
| return (error ? error : (r_error ? r_error : n4error)); |
| } |
| |
| /* |
| * Initialize *lost_rqstp. |
| */ |
| |
| static void |
| nfs4close_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, |
| nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr, |
| vnode_t *vp) |
| { |
| if (error != ETIMEDOUT && error != EINTR && |
| !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { |
| lost_rqstp->lr_op = 0; |
| return; |
| } |
| |
| NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, |
| "nfs4close_save_lost_rqst: error %d", error)); |
| |
| lost_rqstp->lr_op = OP_CLOSE; |
| /* |
| * The vp is held and rele'd via the recovery code. |
| * See nfs4_save_lost_rqst. |
| */ |
| lost_rqstp->lr_vp = vp; |
| lost_rqstp->lr_dvp = NULL; |
| lost_rqstp->lr_oop = oop; |
| lost_rqstp->lr_osp = osp; |
| ASSERT(osp != NULL); |
| ASSERT(mutex_owned(&osp->os_sync_lock)); |
| osp->os_pending_close = 1; |
| lost_rqstp->lr_lop = NULL; |
| lost_rqstp->lr_cr = cr; |
| lost_rqstp->lr_flk = NULL; |
| lost_rqstp->lr_putfirst = FALSE; |
| } |
| |
| /* |
| * Assumes you already have the open seqid sync grabbed as well as the |
| * 'os_sync_lock'. Note: this will release the open seqid sync and |
| * 'os_sync_lock' if client recovery starts. Calling functions have to |
| * be prepared to handle this. |
| * |
| * 'recov' is returned as 1 if the CLOSE operation detected client recovery |
| * was needed and was started, and that the calling function should retry |
| * this function; otherwise it is returned as 0. |
| * |
| * Errors are returned via the nfs4_error_t parameter. |
| */ |
| static void |
| nfs4close_otw(rnode4_t *rp, cred_t *cred_otw, nfs4_open_owner_t *oop, |
| nfs4_open_stream_t *osp, int *recov, int *did_start_seqid_syncp, |
| nfs4_close_type_t close_type, nfs4_error_t *ep, int *have_sync_lockp) |
| { |
| COMPOUND4args_clnt args; |
| COMPOUND4res_clnt res; |
| CLOSE4args *close_args; |
| nfs_resop4 *resop; |
| nfs_argop4 argop[3]; |
| int doqueue = 1; |
| mntinfo4_t *mi; |
| seqid4 seqid; |
| vnode_t *vp; |
| bool_t needrecov = FALSE; |
| nfs4_lost_rqst_t lost_rqst; |
| hrtime_t t; |
| |
| ASSERT(nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone); |
| |
| ASSERT(MUTEX_HELD(&osp->os_sync_lock)); |
| |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "nfs4close_otw")); |
| |
| /* Only set this to 1 if recovery is started */ |
| *recov = 0; |
| |
| /* do the OTW call to close the file */ |
| |
| if (close_type == CLOSE_RESEND) |
| args.ctag = TAG_CLOSE_LOST; |
| else if (close_type == CLOSE_AFTER_RESEND) |
| args.ctag = TAG_CLOSE_UNDO; |
| else |
| args.ctag = TAG_CLOSE; |
| |
| args.array_len = 3; |
| args.array = argop; |
| |
| vp = RTOV4(rp); |
| |
| mi = VTOMI4(vp); |
| |
| /* putfh target fh */ |
| argop[0].argop = OP_CPUTFH; |
| argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; |
| |
| argop[1].argop = OP_GETATTR; |
| argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; |
| argop[1].nfs_argop4_u.opgetattr.mi = mi; |
| |
| argop[2].argop = OP_CLOSE; |
| close_args = &argop[2].nfs_argop4_u.opclose; |
| |
| seqid = nfs4_get_open_seqid(oop) + 1; |
| |
| close_args->seqid = seqid; |
| close_args->open_stateid = osp->open_stateid; |
| |
| NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, |
| "nfs4close_otw: %s call, rp %s", needrecov ? "recov" : "first", |
| rnode4info(rp))); |
| |
| t = gethrtime(); |
| |
| rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep); |
| |
| if (!ep->error && nfs4_need_to_bump_seqid(&res)) { |
| nfs4_set_open_seqid(seqid, oop, args.ctag); |
| } |
| |
| needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp); |
| if (ep->error && !needrecov) { |
| /* |
| * if there was an error and no recovery is to be done |
| * then then set up the file to flush its cache if |
| * needed for the next caller. |
| */ |
| mutex_enter(&rp->r_statelock); |
| PURGE_ATTRCACHE4_LOCKED(rp); |
| rp->r_flags &= ~R4WRITEMODIFIED; |
| mutex_exit(&rp->r_statelock); |
| return; |
| } |
| |
| if (needrecov) { |
| bool_t abort; |
| nfs4_bseqid_entry_t *bsep = NULL; |
| |
| if (close_type != CLOSE_RESEND) |
| nfs4close_save_lost_rqst(ep->error, &lost_rqst, oop, |
| osp, cred_otw, vp); |
| |
| if (!ep->error && res.status == NFS4ERR_BAD_SEQID) |
| bsep = nfs4_create_bseqid_entry(oop, NULL, vp, |
| 0, args.ctag, close_args->seqid); |
| |
| NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, |
| "nfs4close_otw: initiating recovery. error %d " |
| "res.status %d", ep->error, res.status)); |
| |
| /* |
| * Drop the 'os_sync_lock' here so we don't hit |
| * a potential recursive mutex_enter via an |
| * 'open_stream_hold()'. |
| */ |
| mutex_exit(&osp->os_sync_lock); |
| *have_sync_lockp = 0; |
| abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, NULL, |
| (close_type != CLOSE_RESEND && |
| lost_rqst.lr_op == OP_CLOSE) ? &lost_rqst : NULL, |
| OP_CLOSE, bsep, NULL, NULL); |
| |
| /* drop open seq sync, and let the calling function regrab it */ |
| nfs4_end_open_seqid_sync(oop); |
| *did_start_seqid_syncp = 0; |
| |
| if (bsep) |
| kmem_free(bsep, sizeof (*bsep)); |
| /* |
| * For signals, the caller wants to quit, so don't say to |
| * retry. For forced unmount, if it's a user thread, it |
| * wants to quit. If it's a recovery thread, the retry |
| * will happen higher-up on the call stack. Either way, |
| * don't say to retry. |
| */ |
| if (abort == FALSE && ep->error != EINTR && |
| !NFS4_FRC_UNMT_ERR(ep->error, mi->mi_vfsp) && |
| close_type != CLOSE_RESEND && |
| close_type != CLOSE_AFTER_RESEND) |
| *recov = 1; |
| else |
| *recov = 0; |
| |
| if (!ep->error) |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| return; |
| } |
| |
| if (res.status) { |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| return; |
| } |
| |
| mutex_enter(&rp->r_statev4_lock); |
| rp->created_v4 = 0; |
| mutex_exit(&rp->r_statev4_lock); |
| |
| resop = &res.array[2]; |
| osp->open_stateid = resop->nfs_resop4_u.opclose.open_stateid; |
| osp->os_valid = 0; |
| |
| /* |
| * This removes the reference obtained at OPEN; ie, when the |
| * open stream structure was created. |
| * |
| * We don't have to worry about calling 'open_stream_rele' |
| * since we our currently holding a reference to the open |
| * stream which means the count cannot go to 0 with this |
| * decrement. |
| */ |
| ASSERT(osp->os_ref_count >= 2); |
| osp->os_ref_count--; |
| |
| if (ep->error == 0) { |
| mutex_exit(&osp->os_sync_lock); |
| *have_sync_lockp = 0; |
| |
| nfs4_attr_cache(vp, |
| &res.array[1].nfs_resop4_u.opgetattr.ga_res, |
| t, cred_otw, TRUE, NULL); |
| } |
| |
| NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "nfs4close_otw:" |
| " returning %d", ep->error)); |
| |
| xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); |
| } |
| |
| /* ARGSUSED */ |
| static int |
| nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, |
| caller_context_t *ct) |
| { |
| rnode4_t *rp; |
| u_offset_t off; |
| offset_t diff; |
| uint_t on; |
| uint_t n; |
| caddr_t base; |
| uint_t flags; |
| int error; |
| mntinfo4_t *mi; |
| |
| rp = VTOR4(vp); |
| |
| ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); |
| |
| if (IS_SHADOW(vp, rp)) |
| vp = RTOV4(rp); |
| |
| if (vp->v_type != VREG) |
| return (EISDIR); |
| |
| mi = VTOMI4(vp); |
| |
| if (nfs_zone() != mi->mi_zone) |
| return (EIO); |
| |
| if (uiop->uio_resid == 0) |
| return (0); |
| |
| if (uiop->uio_loffset < 0 || uiop->uio_loffset + uiop->uio_resid < 0) |
| return (EINVAL); |
| |
| mutex_enter(&rp->r_statelock); |
| if (rp->r_flags & R4RECOVERRP) |
| error = (rp->r_error ? rp->r_error : EIO); |
| else |
| error = 0; |
| mutex_exit(&rp->r_statelock); |
| if (error) |
| return (error); |
| |
| /* |
| * Bypass VM if caching has been disabled (e.g., locking) or if |
| * using client-side direct I/O and the file is not mmap'd and |
| * there are no cached pages. |
| */ |
| if ((vp->v_flag & VNOCACHE) || |
| (((rp->r_flags & R4DIRECTIO) || (mi->mi_flags & MI4_DIRECTIO)) && |
| rp->r_mapcnt == 0 && rp->r_inmap == 0 && !nfs4_has_pages(vp))) { |
| size_t resid = 0; |
| |
| return (nfs4read(vp, NULL, uiop->uio_loffset, |
| uiop->uio_resid, &resid, cr, FALSE, uiop)); |
| } |
| |
| error = 0; |
| |
| do { |
| off = uiop->uio_loffset & MAXBMASK; /* mapping offset */ |
| on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */ |
| n = MIN(MAXBSIZE - on, uiop->uio_resid); |
| |
| if (error = nfs4_validate_caches(vp, cr)) |
| break; |
| |
| mutex_enter(&rp->r_statelock); |
| while (rp->r_flags & R4INCACHEPURGE) { |
| if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) { |
| mutex_exit(&rp->r_statelock); |
| return (EINTR); |
| } |
| } |
| diff = rp->r_size - uiop->uio_loffset; |
| mutex_exit(&rp->r_statelock); |
| if (diff <= 0) |
| break; |
| if (diff < n) |
| n = (uint_t)diff; |
| |
| if (vpm_enable) { |
| /* |
| * Copy data. |
| */ |
| error = vpm_data_copy(vp, off + on, n, uiop, |
| 1, NULL, 0, S_READ); |
| } else { |
| base = segmap_getmapflt(segkmap, vp, off + on, n, 1, |
| S_READ); |
| |
| error = uiomove(base + on, n, UIO_READ, uiop); |
| } |
| |
| if (!error) { |
| /* |
| * If read a whole block or read to eof, |
| |