| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| |
| #pragma ident "%Z%%M% %I% %E% SMI" |
| |
| #include <sys/param.h> |
| #include <sys/errno.h> |
| #include <sys/vfs.h> |
| #include <sys/vnode.h> |
| #include <sys/uio.h> |
| #include <sys/pathname.h> |
| #include <sys/kmem.h> |
| #include <sys/cred.h> |
| #include <sys/statvfs.h> |
| #include <sys/fs/lofs_info.h> |
| #include <sys/fs/lofs_node.h> |
| #include <sys/mount.h> |
| #include <sys/mntent.h> |
| #include <sys/mkdev.h> |
| #include <sys/priv.h> |
| #include <sys/sysmacros.h> |
| #include <sys/systm.h> |
| #include <sys/cmn_err.h> |
| #include <sys/policy.h> |
| #include <sys/tsol/label.h> |
| #include "fs/fs_subr.h" |
| |
| /* |
| * This is the loadable module wrapper. |
| */ |
| #include <sys/modctl.h> |
| |
| static mntopts_t lofs_mntopts; |
| |
| static int lofsinit(int, char *); |
| |
| static vfsdef_t vfw = { |
| VFSDEF_VERSION, |
| "lofs", |
| lofsinit, |
| VSW_HASPROTO|VSW_STATS, |
| &lofs_mntopts |
| }; |
| |
| /* |
| * Stuff needed to support "zonedevfs" mode. |
| */ |
| static major_t lofs_major; |
| static minor_t lofs_minor; |
| static kmutex_t lofs_minor_lock; |
| |
| /* |
| * LOFS mount options table |
| */ |
| static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; |
| static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; |
| static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL }; |
| static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL }; |
| static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; |
| static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; |
| |
| static mntopt_t mntopts[] = { |
| /* |
| * option name cancel option default arg flags |
| * private data |
| */ |
| { MNTOPT_XATTR, xattr_cancel, NULL, 0, |
| (void *)0 }, |
| { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, |
| (void *)0 }, |
| { MNTOPT_LOFS_ZONEDEVFS, zonedevfs_cancel, NULL, 0, |
| (void *)0 }, |
| { MNTOPT_LOFS_NOZONEDEVFS, nozonedevfs_cancel, NULL, 0, |
| (void *)0 }, |
| { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, |
| (void *)0 }, |
| { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, |
| (void *)0 }, |
| }; |
| |
| static mntopts_t lofs_mntopts = { |
| sizeof (mntopts) / sizeof (mntopt_t), |
| mntopts |
| }; |
| |
| /* |
| * Module linkage information for the kernel. |
| */ |
| |
| static struct modlfs modlfs = { |
| &mod_fsops, "filesystem for lofs", &vfw |
| }; |
| |
| static struct modlinkage modlinkage = { |
| MODREV_1, (void *)&modlfs, NULL |
| }; |
| |
| /* |
| * This is the module initialization routine. |
| */ |
| |
| int |
| _init(void) |
| { |
| int status; |
| |
| lofs_subrinit(); |
| status = mod_install(&modlinkage); |
| if (status != 0) { |
| /* |
| * Cleanup previously initialized work. |
| */ |
| lofs_subrfini(); |
| } |
| |
| return (status); |
| } |
| |
| /* |
| * Don't allow the lofs module to be unloaded for now. |
| * There is a memory leak if it gets unloaded. |
| */ |
| |
| int |
| _fini(void) |
| { |
| return (EBUSY); |
| } |
| |
| int |
| _info(struct modinfo *modinfop) |
| { |
| return (mod_info(&modlinkage, modinfop)); |
| } |
| |
| |
| static int lofsfstype; |
| vfsops_t *lo_vfsops; |
| |
| /* |
| * lo mount vfsop |
| * Set up mount info record and attach it to vfs struct. |
| */ |
| /*ARGSUSED*/ |
| static int |
| lo_mount(struct vfs *vfsp, |
| struct vnode *vp, |
| struct mounta *uap, |
| struct cred *cr) |
| { |
| int error; |
| struct vnode *srootvp = NULL; /* the server's root */ |
| struct vnode *realrootvp; |
| struct loinfo *li; |
| int is_zonedevfs = 0; |
| int nodev; |
| |
| nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); |
| |
| if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) |
| return (EPERM); |
| |
| /* |
| * Loopback devices which get "nodevices" added can be done without |
| * "nodevices" set because we cannot import devices into a zone |
| * with loopback. Note that we have all zone privileges when |
| * this happens; if not, we'd have gotten "nosuid". |
| */ |
| if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) |
| vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); |
| |
| /* |
| * We must ensure that only the global zone applies the 'zonedevfs' |
| * option; we don't want non-global zones to be able to establish |
| * lofs mounts using the special dev_t we use to ensure that the |
| * contents of a zone's /dev cannot be victim to link(2) or rename(2). |
| * See below, where we set all of this up. |
| * |
| * Since this is more like a privilege check, we use crgetzoneid(cr) |
| * instead of getzoneid(). |
| */ |
| is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL); |
| if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs) |
| return (EPERM); |
| |
| mutex_enter(&vp->v_lock); |
| if (!(uap->flags & MS_OVERLAY) && |
| (vp->v_count != 1 || (vp->v_flag & VROOT))) { |
| mutex_exit(&vp->v_lock); |
| return (EBUSY); |
| } |
| mutex_exit(&vp->v_lock); |
| |
| /* |
| * Find real root, and make vfs point to real vfs |
| */ |
| if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? |
| UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, |
| &realrootvp)) |
| return (error); |
| |
| /* |
| * Enforce MAC policy if needed. |
| * |
| * Loopback mounts must not allow writing up. The dominance test |
| * is intended to prevent a global zone caller from accidentally |
| * creating write-up conditions between two labeled zones. |
| * Local zones can't violate MAC on their own without help from |
| * the global zone because they can't name a pathname that |
| * they don't already have. |
| * |
| * The special case check for the NET_MAC_AWARE process flag is |
| * to support the case of the automounter in the global zone. We |
| * permit automounting of local zone directories such as home |
| * directories, into the global zone as required by setlabel, |
| * zonecopy, and saving of desktop sessions. Such mounts are |
| * trusted not to expose the contents of one zone's directories |
| * to another by leaking them through the global zone. |
| */ |
| if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { |
| void *specname; |
| zone_t *from_zptr; |
| zone_t *to_zptr; |
| |
| if (uap->flags & MS_SYSSPACE) { |
| specname = uap->spec; |
| } else { |
| specname = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
| error = copyinstr(uap->spec, specname, MAXPATHLEN, |
| NULL); |
| if (error) { |
| kmem_free(specname, MAXPATHLEN); |
| return (error); |
| } |
| } |
| from_zptr = zone_find_by_path(specname); |
| if (!(uap->flags & MS_SYSSPACE)) |
| kmem_free(specname, MAXPATHLEN); |
| |
| to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); |
| |
| /* |
| * Special case for zone devfs: the zone for /dev will |
| * incorrectly appear as the global zone since it's not |
| * under the zone rootpath. So for zone devfs check allow |
| * read-write mounts. |
| */ |
| |
| if (from_zptr != to_zptr && !is_zonedevfs) { |
| /* |
| * We know at this point that the labels aren't equal |
| * because the zone pointers aren't equal, and zones |
| * can't share a label. |
| * |
| * If the source is the global zone then making |
| * it available to a local zone must be done in |
| * read-only mode as the label will become admin_low. |
| * |
| * If it is a mount between local zones then if |
| * the current process is in the global zone and has |
| * the NET_MAC_AWARE flag, then regular read-write |
| * access is allowed. If it's in some other zone, but |
| * the label on the mount point dominates the original |
| * source, then allow the mount as read-only |
| * ("read-down"). |
| */ |
| if (from_zptr->zone_id == GLOBAL_ZONEID) { |
| /* make the mount read-only */ |
| vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); |
| } else { /* cross-zone mount */ |
| if (to_zptr->zone_id == GLOBAL_ZONEID && |
| /* LINTED: no consequent */ |
| getpflags(NET_MAC_AWARE, cr) != 0) { |
| /* Allow the mount as read-write */ |
| } else if (bldominates( |
| label2bslabel(to_zptr->zone_slabel), |
| label2bslabel(from_zptr->zone_slabel))) { |
| /* make the mount read-only */ |
| vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); |
| } else { |
| zone_rele(to_zptr); |
| zone_rele(from_zptr); |
| return (EACCES); |
| } |
| } |
| } |
| zone_rele(to_zptr); |
| zone_rele(from_zptr); |
| } |
| |
| /* |
| * realrootvp may be an AUTOFS node, in which case we |
| * perform a VOP_ACCESS() to trigger the mount of the |
| * intended filesystem, so we loopback mount the intended |
| * filesystem instead of the AUTOFS filesystem. |
| */ |
| (void) VOP_ACCESS(realrootvp, 0, 0, cr); |
| |
| /* |
| * We're interested in the top most filesystem. |
| * This is specially important when uap->spec is a trigger |
| * AUTOFS node, since we're really interested in mounting the |
| * filesystem AUTOFS mounted as result of the VOP_ACCESS() |
| * call not the AUTOFS node itself. |
| */ |
| if (vn_mountedvfs(realrootvp) != NULL) { |
| if (error = traverse(&realrootvp)) { |
| VN_RELE(realrootvp); |
| return (error); |
| } |
| } |
| |
| /* |
| * Allocate a vfs info struct and attach it |
| */ |
| li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); |
| li->li_realvfs = realrootvp->v_vfsp; |
| li->li_mountvfs = vfsp; |
| |
| /* |
| * Set mount flags to be inherited by loopback vfs's |
| */ |
| if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { |
| li->li_mflag |= VFS_RDONLY; |
| } |
| if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { |
| li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); |
| } |
| if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { |
| li->li_mflag |= VFS_NODEVICES; |
| } |
| if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { |
| li->li_mflag |= VFS_NOSETUID; |
| } |
| /* |
| * Permissive flags are added to the "deny" bitmap. |
| */ |
| if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { |
| li->li_dflag |= VFS_XATTR; |
| } |
| if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { |
| li->li_dflag |= VFS_NBMAND; |
| } |
| |
| /* |
| * Propagate inheritable mount flags from the real vfs. |
| */ |
| if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && |
| !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) |
| vfs_setmntopt(vfsp, MNTOPT_RO, NULL, |
| VFS_NODISPLAY); |
| if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && |
| !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) |
| vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, |
| VFS_NODISPLAY); |
| if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && |
| !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) |
| vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, |
| VFS_NODISPLAY); |
| /* |
| * Permissive flags such as VFS_XATTR, as opposed to restrictive flags |
| * such as VFS_RDONLY, are handled differently. An explicit |
| * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. |
| */ |
| if ((li->li_realvfs->vfs_flag & VFS_XATTR) && |
| !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && |
| !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) |
| vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, |
| VFS_NODISPLAY); |
| if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && |
| !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && |
| !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) |
| vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, |
| VFS_NODISPLAY); |
| |
| li->li_refct = 0; |
| vfsp->vfs_data = (caddr_t)li; |
| vfsp->vfs_bcount = 0; |
| vfsp->vfs_fstype = lofsfstype; |
| vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; |
| |
| /* |
| * Test to see if we need to be in "zone /dev" mode. In zonedevfs |
| * mode, we pull a nasty trick; we make sure that the lofs dev_t does |
| * *not* reflect the underlying device, so that no renames or links |
| * can occur to or from the /dev hierarchy. |
| */ |
| if (is_zonedevfs) { |
| dev_t dev; |
| |
| mutex_enter(&lofs_minor_lock); |
| do { |
| lofs_minor = (lofs_minor + 1) & MAXMIN32; |
| dev = makedevice(lofs_major, lofs_minor); |
| } while (vfs_devismounted(dev)); |
| mutex_exit(&lofs_minor_lock); |
| |
| vfsp->vfs_dev = dev; |
| vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype); |
| |
| li->li_flag |= LO_ZONEDEVFS; |
| } else { |
| vfsp->vfs_dev = li->li_realvfs->vfs_dev; |
| vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; |
| vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; |
| } |
| |
| if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { |
| li->li_flag |= LO_NOSUB; |
| } |
| |
| /* |
| * Setup the hashtable. If the root of this mount isn't a directory, |
| * there's no point in allocating a large hashtable. A table with one |
| * bucket is sufficient. |
| */ |
| if (realrootvp->v_type != VDIR) |
| lsetup(li, 1); |
| else |
| lsetup(li, 0); |
| |
| /* |
| * Make the root vnode |
| */ |
| srootvp = makelonode(realrootvp, li, 0); |
| srootvp->v_flag |= VROOT; |
| li->li_rootvp = srootvp; |
| |
| #ifdef LODEBUG |
| lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", |
| vfsp, li->li_realvfs, srootvp, realrootvp, li); |
| #endif |
| return (0); |
| } |
| |
| /* |
| * Undo loopback mount |
| */ |
| static int |
| lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) |
| { |
| struct loinfo *li; |
| |
| if (secpolicy_fs_unmount(cr, vfsp) != 0) |
| return (EPERM); |
| |
| /* |
| * Forced unmount is not supported by this file system |
| * and thus, ENOTSUP, is being returned. |
| */ |
| if (flag & MS_FORCE) |
| return (ENOTSUP); |
| |
| li = vtoli(vfsp); |
| #ifdef LODEBUG |
| lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); |
| #endif |
| if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { |
| #ifdef LODEBUG |
| lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, |
| li->li_rootvp->v_count); |
| #endif |
| return (EBUSY); |
| } |
| VN_RELE(li->li_rootvp); |
| return (0); |
| } |
| |
| /* |
| * Find root of lofs mount. |
| */ |
| static int |
| lo_root(struct vfs *vfsp, struct vnode **vpp) |
| { |
| *vpp = vtoli(vfsp)->li_rootvp; |
| #ifdef LODEBUG |
| lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); |
| #endif |
| /* |
| * If the root of the filesystem is a special file, return the specvp |
| * version of the vnode. We don't save the specvp vnode in our |
| * hashtable since that's exclusively for lnodes. |
| */ |
| if (IS_DEVVP(*vpp)) { |
| struct vnode *svp; |
| |
| svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); |
| if (svp == NULL) |
| return (ENOSYS); |
| *vpp = svp; |
| } else { |
| VN_HOLD(*vpp); |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * Get file system statistics. |
| */ |
| static int |
| lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) |
| { |
| vnode_t *realrootvp; |
| |
| #ifdef LODEBUG |
| lo_dprint(4, "lostatvfs %p\n", vfsp); |
| #endif |
| /* |
| * Using realrootvp->v_vfsp (instead of the realvfsp that was |
| * cached) is necessary to make lofs work woth forced UFS unmounts. |
| * In the case of a forced unmount, UFS stores a set of dummy vfsops |
| * in all the (i)vnodes in the filesystem. The dummy ops simply |
| * returns back EIO. |
| */ |
| (void) lo_realvfs(vfsp, &realrootvp); |
| if (realrootvp != NULL) |
| return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); |
| else |
| return (EIO); |
| } |
| |
| /* |
| * LOFS doesn't have any data or metadata to flush, pending I/O on the |
| * underlying filesystem will be flushed when such filesystem is synched. |
| */ |
| /* ARGSUSED */ |
| static int |
| lo_sync(struct vfs *vfsp, |
| short flag, |
| struct cred *cr) |
| { |
| #ifdef LODEBUG |
| lo_dprint(4, "lo_sync: %p\n", vfsp); |
| #endif |
| return (0); |
| } |
| |
| /* |
| * Obtain the vnode from the underlying filesystem. |
| */ |
| static int |
| lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) |
| { |
| vnode_t *realrootvp; |
| |
| #ifdef LODEBUG |
| lo_dprint(4, "lo_vget: %p\n", vfsp); |
| #endif |
| (void) lo_realvfs(vfsp, &realrootvp); |
| if (realrootvp != NULL) |
| return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); |
| else |
| return (EIO); |
| } |
| |
| /* |
| * Free mount-specific data. |
| */ |
| static void |
| lo_freevfs(struct vfs *vfsp) |
| { |
| struct loinfo *li = vtoli(vfsp); |
| |
| ldestroy(li); |
| kmem_free(li, sizeof (struct loinfo)); |
| } |
| |
| static int |
| lofsinit(int fstyp, char *name) |
| { |
| static const fs_operation_def_t lo_vfsops_template[] = { |
| VFSNAME_MOUNT, lo_mount, |
| VFSNAME_UNMOUNT, lo_unmount, |
| VFSNAME_ROOT, lo_root, |
| VFSNAME_STATVFS, lo_statvfs, |
| VFSNAME_SYNC, (fs_generic_func_p) lo_sync, |
| VFSNAME_VGET, lo_vget, |
| VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs, |
| NULL, NULL |
| }; |
| int error; |
| |
| error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); |
| if (error != 0) { |
| cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); |
| return (error); |
| } |
| |
| error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); |
| if (error != 0) { |
| (void) vfs_freevfsops_by_type(fstyp); |
| cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); |
| return (error); |
| } |
| |
| lofsfstype = fstyp; |
| |
| if ((lofs_major = getudev()) == (major_t)-1) { |
| (void) vfs_freevfsops_by_type(fstyp); |
| cmn_err(CE_WARN, "lofsinit: Can't get unique device number."); |
| return (ENXIO); |
| } |
| |
| lofs_minor = 0; |
| mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL); |
| |
| return (0); |
| } |