| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2010 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ |
| /* All Rights Reserved */ |
| |
| #ifndef _NFS_RNODE_H |
| #define _NFS_RNODE_H |
| |
| #include <sys/avl.h> |
| #include <sys/list.h> |
| #include <nfs/nfs.h> |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| typedef enum nfs_access_type { |
| NFS_ACCESS_UNKNOWN, |
| NFS_ACCESS_ALLOWED, |
| NFS_ACCESS_DENIED |
| } nfs_access_type_t; |
| |
| typedef struct acache_hash { |
| struct acache *next; /* next and prev must be first */ |
| struct acache *prev; |
| krwlock_t lock; |
| } acache_hash_t; |
| |
| typedef struct acache { |
| struct acache *next; /* next and prev must be first */ |
| struct acache *prev; |
| uint32_t known; |
| uint32_t allowed; |
| struct rnode *rnode; |
| cred_t *cred; |
| struct acache *list; |
| struct acache_hash *hashq; |
| } acache_t; |
| |
| #define NFS_FHANDLE_LEN 72 |
| |
| typedef struct nfs_fhandle { |
| int fh_len; |
| char fh_buf[NFS_FHANDLE_LEN]; |
| } nfs_fhandle; |
| |
| typedef struct rddir_cache { |
| lloff_t _cookie; /* cookie used to find this cache entry */ |
| lloff_t _ncookie; /* cookie used to find the next cache entry */ |
| char *entries; /* buffer containing dirent entries */ |
| int eof; /* EOF reached after this request */ |
| int entlen; /* size of dirent entries in buf */ |
| int buflen; /* size of the buffer used to store entries */ |
| int flags; /* control flags, see below */ |
| kcondvar_t cv; /* cv for blocking */ |
| int error; /* error from RPC operation */ |
| kmutex_t lock; |
| uint_t count; /* reference count */ |
| avl_node_t tree; /* AVL tree links */ |
| } rddir_cache; |
| |
| #define nfs_cookie _cookie._p._l |
| #define nfs_ncookie _ncookie._p._l |
| #define nfs3_cookie _cookie._f |
| #define nfs3_ncookie _ncookie._f |
| |
| #define RDDIR 0x1 /* readdir operation in progress */ |
| #define RDDIRWAIT 0x2 /* waiting on readdir in progress */ |
| #define RDDIRREQ 0x4 /* a new readdir is required */ |
| #define RDDIRCACHED 0x8 /* entry is in the cache */ |
| |
| #define HAVE_RDDIR_CACHE(rp) (avl_numnodes(&(rp)->r_dir) > 0) |
| |
| typedef struct symlink_cache { |
| char *contents; /* contents of the symbolic link */ |
| int len; /* length of the contents */ |
| int size; /* size of the allocated buffer */ |
| } symlink_cache; |
| |
| typedef struct commit { |
| page_t *c_pages; /* list of pages to commit */ |
| offset3 c_commbase; /* base offset to do commit from */ |
| count3 c_commlen; /* len to commit */ |
| kcondvar_t c_cv; /* condvar for waiting for commit */ |
| } commit_t; |
| |
| /* |
| * The various values for the commit states. These are stored in |
| * the p_fsdata byte in the page struct. |
| * NFSv3,4 can use asynchronous writes - the NFS server can send a response |
| * before storing the data to the stable store (disk). The response contains |
| * information if the data are on a disk or not. NFS client marks pages |
| * which are already on the stable store as C_NOCOMMIT. The pages which were |
| * sent but are not yet on the stable store are only partially 'safe' and are |
| * marked as C_DELAYCOMMIT, which can be later changed to C_COMMIT if the |
| * commit operation is in progress. If the NFS server is e.g. rebooted, the |
| * client needs to resend all the uncommitted data. The client walks all the |
| * vp->v_pages and if C_DELAYCOMMIT or C_COMMIT is set, the page is marked as |
| * dirty and thus will be written to the server again. |
| */ |
| #define C_NOCOMMIT 0 /* no commit is required */ |
| #define C_COMMIT 1 /* a commit is required so do it now */ |
| #define C_DELAYCOMMIT 2 /* a commit is required, but can be delayed */ |
| |
| /* |
| * The lock manager holds state making it possible for the client |
| * and server to be out of sync. For example, if the response from |
| * the server granting a lock request is lost, the server will think |
| * the lock is granted and the client will think the lock is lost. |
| * To deal with this, a list of processes for which the client is |
| * not sure if the server holds a lock is attached to the rnode. |
| * When such a process closes the rnode, an unlock request is sent |
| * to the server to unlock the entire file. |
| * |
| * The list is kept as a singularly linked NULL terminated list. |
| * Because it is only added to under extreme error conditions, the |
| * list shouldn't get very big. DEBUG kernels print a console warning |
| * when the number of entries on a list go beyond nfs_lmpl_high_water |
| * an arbitrary number defined in nfs_add_locking_id() |
| */ |
| #define RLMPL_PID 1 |
| #define RLMPL_OWNER 2 |
| typedef struct lock_manager_pid_list { |
| int lmpl_type; |
| pid_t lmpl_pid; |
| union { |
| pid_t _pid; |
| struct { |
| int len; |
| char *owner; |
| } _own; |
| } un; |
| struct lock_manager_pid_list *lmpl_next; |
| } lmpl_t; |
| |
| #define lmpl_opid un._pid |
| #define lmpl_own_len un._own.len |
| #define lmpl_owner un._own.owner |
| |
| /* |
| * A homegrown reader/writer lock implementation. It addresses |
| * two requirements not addressed by the system primitives. They |
| * are that the `enter" operation is optionally interruptible and |
| * that they can be re`enter'ed by writers without deadlock. |
| */ |
| typedef struct nfs_rwlock { |
| int count; |
| int waiters; |
| kthread_t *owner; |
| kmutex_t lock; |
| kcondvar_t cv; |
| kcondvar_t cv_rd; |
| } nfs_rwlock_t; |
| |
| /* |
| * The format of the hash bucket used to lookup rnodes from a file handle. |
| */ |
| typedef struct rhashq { |
| struct rnode *r_hashf; |
| struct rnode *r_hashb; |
| krwlock_t r_lock; |
| } rhashq_t; |
| |
| /* |
| * Remote file information structure. |
| * |
| * The rnode is the "inode" for remote files. It contains all the |
| * information necessary to handle remote file on the client side. |
| * |
| * Note on file sizes: we keep two file sizes in the rnode: the size |
| * according to the client (r_size) and the size according to the server |
| * (r_attr.va_size). They can differ because we modify r_size during a |
| * write system call (nfs_rdwr), before the write request goes over the |
| * wire (before the file is actually modified on the server). If an OTW |
| * request occurs before the cached data is written to the server the file |
| * size returned from the server (r_attr.va_size) may not match r_size. |
| * r_size is the one we use, in general. r_attr.va_size is only used to |
| * determine whether or not our cached data is valid. |
| * |
| * Each rnode has 3 locks associated with it (not including the rnode |
| * hash table and free list locks): |
| * |
| * r_rwlock: Serializes nfs_write and nfs_setattr requests |
| * and allows nfs_read requests to proceed in parallel. |
| * Serializes reads/updates to directories. |
| * |
| * r_lkserlock: Serializes lock requests with map, write, and |
| * readahead operations. |
| * |
| * r_statelock: Protects all fields in the rnode except for |
| * those listed below. This lock is intented |
| * to be held for relatively short periods of |
| * time (not accross entire putpage operations, |
| * for example). |
| * |
| * The following members are protected by the mutex rpfreelist_lock: |
| * r_freef |
| * r_freeb |
| * |
| * The following members are protected by the hash bucket rwlock: |
| * r_hashf |
| * r_hashb |
| * |
| * Note: r_modaddr is only accessed when the r_statelock mutex is held. |
| * Its value is also controlled via r_rwlock. It is assumed that |
| * there will be only 1 writer active at a time, so it safe to |
| * set r_modaddr and release r_statelock as long as the r_rwlock |
| * writer lock is held. |
| * |
| * r_inmap informs nfsX_read()/write() that there is a call to nfsX_map() |
| * in progress. nfsX_read()/write() check r_inmap to decide whether |
| * to perform directio on the file or not. r_inmap is atomically |
| * incremented in nfsX_map() before the address space routines are |
| * called and atomically decremented just before nfsX_map() exits. |
| * r_inmap is not protected by any lock. |
| * |
| * r_mapcnt tells that the rnode has mapped pages. r_inmap can be 0 |
| * while the rnode has mapped pages. |
| * |
| * 64-bit offsets: the code formerly assumed that atomic reads of |
| * r_size were safe and reliable; on 32-bit architectures, this is |
| * not true since an intervening bus cycle from another processor |
| * could update half of the size field. The r_statelock must now |
| * be held whenever any kind of access of r_size is made. |
| * |
| * Lock ordering: |
| * r_rwlock > r_lkserlock > r_statelock |
| */ |
| struct exportinfo; /* defined in nfs/export.h */ |
| struct servinfo; /* defined in nfs/nfs_clnt.h */ |
| struct failinfo; /* defined in nfs/nfs_clnt.h */ |
| struct mntinfo; /* defined in nfs/nfs_clnt.h */ |
| |
| #ifdef _KERNEL |
| |
| typedef struct rnode { |
| /* the hash fields must be first to match the rhashq_t */ |
| struct rnode *r_hashf; /* hash queue forward pointer */ |
| struct rnode *r_hashb; /* hash queue back pointer */ |
| struct rnode *r_freef; /* free list forward pointer */ |
| struct rnode *r_freeb; /* free list back pointer */ |
| rhashq_t *r_hashq; /* pointer to the hash bucket */ |
| vnode_t *r_vnode; /* vnode for remote file */ |
| nfs_rwlock_t r_rwlock; /* serializes write/setattr requests */ |
| nfs_rwlock_t r_lkserlock; /* serialize lock with other ops */ |
| kmutex_t r_statelock; /* protects (most of) rnode contents */ |
| nfs_fhandle r_fh; /* file handle */ |
| struct servinfo *r_server; /* current server */ |
| char *r_path; /* path to this rnode */ |
| u_offset_t r_nextr; /* next byte read offset (read-ahead) */ |
| cred_t *r_cred; /* current credentials */ |
| cred_t *r_unlcred; /* unlinked credentials */ |
| char *r_unlname; /* unlinked file name */ |
| vnode_t *r_unldvp; /* parent dir of unlinked file */ |
| len_t r_size; /* client's view of file size */ |
| struct vattr r_attr; /* cached vnode attributes */ |
| hrtime_t r_attrtime; /* time attributes become invalid */ |
| hrtime_t r_mtime; /* client time file last modified */ |
| long r_mapcnt; /* count of mmapped pages */ |
| uint_t r_count; /* # of refs not reflect in v_count */ |
| uint_t r_awcount; /* # of outstanding async write */ |
| uint_t r_gcount; /* getattrs waiting to flush pages */ |
| ushort_t r_flags; /* flags, see below */ |
| short r_error; /* async write error */ |
| kcondvar_t r_cv; /* condvar for blocked threads */ |
| int (*r_putapage) /* address of putapage routine */ |
| (vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *); |
| avl_tree_t r_dir; /* cache of readdir responses */ |
| rddir_cache *r_direof; /* pointer to the EOF entry */ |
| symlink_cache r_symlink; /* cached readlink response */ |
| writeverf3 r_verf; /* version 3 write verifier */ |
| u_offset_t r_modaddr; /* address for page in writerp */ |
| commit_t r_commit; /* commit information */ |
| u_offset_t r_truncaddr; /* base for truncate operation */ |
| vsecattr_t *r_secattr; /* cached security attributes (acls) */ |
| cookieverf3 r_cookieverf; /* version 3 readdir cookie verifier */ |
| lmpl_t *r_lmpl; /* pids that may be holding locks */ |
| nfs3_pathconf_info *r_pathconf; /* cached pathconf information */ |
| acache_t *r_acache; /* list of access cache entries */ |
| kthread_t *r_serial; /* id of purging thread */ |
| list_t r_indelmap; /* list of delmap callers */ |
| uint_t r_inmap; /* to serialize read/write and mmap */ |
| list_node_t r_mi_link; /* linkage into list of rnodes for */ |
| /* this mntinfo */ |
| } rnode_t; |
| #endif /* _KERNEL */ |
| |
| /* |
| * Flags |
| */ |
| #define RREADDIRPLUS 0x1 /* issue a READDIRPLUS instead of READDIR */ |
| #define RDIRTY 0x2 /* dirty pages from write operation */ |
| #define RSTALE 0x4 /* file handle is stale */ |
| #define RMODINPROGRESS 0x8 /* page modification happening */ |
| #define RTRUNCATE 0x10 /* truncating, don't commit */ |
| #define RHAVEVERF 0x20 /* have a write verifier to compare against */ |
| #define RCOMMIT 0x40 /* commit in progress */ |
| #define RCOMMITWAIT 0x80 /* someone is waiting to do a commit */ |
| #define RHASHED 0x100 /* rnode is in hash queues */ |
| #define ROUTOFSPACE 0x200 /* an out of space error has happened */ |
| #define RDIRECTIO 0x400 /* bypass the buffer cache */ |
| #define RLOOKUP 0x800 /* a lookup has been performed */ |
| #define RWRITEATTR 0x1000 /* attributes came from WRITE */ |
| #define RINDNLCPURGE 0x2000 /* in the process of purging DNLC references */ |
| #define RDELMAPLIST 0x4000 /* delmap callers tracking for as callback */ |
| #define RINCACHEPURGE 0x8000 /* purging caches due to file size change */ |
| |
| /* |
| * Convert between vnode and rnode |
| */ |
| #define RTOV(rp) ((rp)->r_vnode) |
| #define VTOR(vp) ((rnode_t *)((vp)->v_data)) |
| |
| #define VTOFH(vp) (RTOFH(VTOR(vp))) |
| #define RTOFH(rp) ((fhandle_t *)(&(rp)->r_fh.fh_buf)) |
| #define VTOFH3(vp) (RTOFH3(VTOR(vp))) |
| #define RTOFH3(rp) ((nfs_fh3 *)(&(rp)->r_fh)) |
| |
| #ifdef _KERNEL |
| extern int nfs_async_readahead(vnode_t *, u_offset_t, caddr_t, |
| struct seg *, cred_t *, |
| void (*)(vnode_t *, u_offset_t, |
| caddr_t, struct seg *, cred_t *)); |
| extern int nfs_async_putapage(vnode_t *, page_t *, u_offset_t, size_t, |
| int, cred_t *, int (*)(vnode_t *, page_t *, |
| u_offset_t, size_t, int, cred_t *)); |
| extern int nfs_async_pageio(vnode_t *, page_t *, u_offset_t, size_t, |
| int, cred_t *, int (*)(vnode_t *, page_t *, |
| u_offset_t, size_t, int, cred_t *)); |
| extern void nfs_async_readdir(vnode_t *, rddir_cache *, |
| cred_t *, int (*)(vnode_t *, |
| rddir_cache *, cred_t *)); |
| extern void nfs_async_commit(vnode_t *, page_t *, offset3, count3, |
| cred_t *, void (*)(vnode_t *, page_t *, |
| offset3, count3, cred_t *)); |
| extern void nfs_async_inactive(vnode_t *, cred_t *, void (*)(vnode_t *, |
| cred_t *, caller_context_t *)); |
| extern int writerp(rnode_t *, caddr_t, int, struct uio *, int); |
| extern int nfs_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *); |
| extern void nfs_invalidate_pages(vnode_t *, u_offset_t, cred_t *); |
| extern int rfs2call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t, |
| xdrproc_t, caddr_t, cred_t *, int *, enum nfsstat *, |
| int, struct failinfo *); |
| extern int rfs3call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t, |
| xdrproc_t, caddr_t, cred_t *, int *, nfsstat3 *, |
| int, struct failinfo *); |
| extern void nfs_setswaplike(vnode_t *, vattr_t *); |
| extern vnode_t *makenfsnode(fhandle_t *, struct nfsfattr *, struct vfs *, |
| hrtime_t, cred_t *, char *, char *); |
| extern vnode_t *makenfs3node_va(nfs_fh3 *, vattr_t *, struct vfs *, hrtime_t, |
| cred_t *, char *, char *); |
| extern vnode_t *makenfs3node(nfs_fh3 *, fattr3 *, struct vfs *, hrtime_t, |
| cred_t *, char *, char *); |
| extern void rp_addfree(rnode_t *, cred_t *); |
| extern void rp_rmhash(rnode_t *); |
| extern int check_rtable(struct vfs *); |
| extern void destroy_rtable(struct vfs *, cred_t *); |
| extern void rflush(struct vfs *, cred_t *); |
| extern nfs_access_type_t nfs_access_check(rnode_t *, uint32_t, cred_t *); |
| extern void nfs_access_cache(rnode_t *rp, uint32_t, uint32_t, cred_t *); |
| extern int nfs_access_purge_rp(rnode_t *); |
| extern int nfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *, |
| int, cred_t *); |
| extern int nfs3_putapage(vnode_t *, page_t *, u_offset_t *, size_t *, |
| int, cred_t *); |
| extern void nfs_printfhandle(nfs_fhandle *); |
| extern void nfs_write_error(vnode_t *, int, cred_t *); |
| extern rddir_cache *rddir_cache_alloc(int); |
| extern void rddir_cache_hold(rddir_cache *); |
| extern void rddir_cache_rele(rddir_cache *); |
| #ifdef DEBUG |
| extern char *rddir_cache_buf_alloc(size_t, int); |
| extern void rddir_cache_buf_free(void *, size_t); |
| #endif |
| extern int nfs_rw_enter_sig(nfs_rwlock_t *, krw_t, int); |
| extern int nfs_rw_tryenter(nfs_rwlock_t *, krw_t); |
| extern void nfs_rw_exit(nfs_rwlock_t *); |
| extern int nfs_rw_lock_held(nfs_rwlock_t *, krw_t); |
| extern void nfs_rw_init(nfs_rwlock_t *, char *, krw_type_t, void *); |
| extern void nfs_rw_destroy(nfs_rwlock_t *); |
| extern int nfs_directio(vnode_t *, int, cred_t *); |
| extern int nfs3_rddir_compar(const void *, const void *); |
| extern int nfs_rddir_compar(const void *, const void *); |
| extern struct zone *nfs_zone(void); |
| extern zoneid_t nfs_zoneid(void); |
| |
| #endif |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif /* _NFS_RNODE_H */ |