| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. |
| * Copyright 2014, Joyent, Inc. All rights reserved. |
| */ |
| |
| /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ |
| /* All Rights Reserved */ |
| |
| /* |
| * University Copyright- Copyright (c) 1982, 1986, 1988 |
| * The Regents of the University of California |
| * All Rights Reserved |
| * |
| * University Acknowledgment- Portions of this document are derived from |
| * software developed by the University of California, Berkeley, and its |
| * contributors. |
| */ |
| |
| /* |
| * VM - shared or copy-on-write from a vnode/anonymous memory. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/param.h> |
| #include <sys/t_lock.h> |
| #include <sys/errno.h> |
| #include <sys/systm.h> |
| #include <sys/mman.h> |
| #include <sys/debug.h> |
| #include <sys/cred.h> |
| #include <sys/vmsystm.h> |
| #include <sys/tuneable.h> |
| #include <sys/bitmap.h> |
| #include <sys/swap.h> |
| #include <sys/kmem.h> |
| #include <sys/sysmacros.h> |
| #include <sys/vtrace.h> |
| #include <sys/cmn_err.h> |
| #include <sys/callb.h> |
| #include <sys/vm.h> |
| #include <sys/dumphdr.h> |
| #include <sys/lgrp.h> |
| |
| #include <vm/hat.h> |
| #include <vm/as.h> |
| #include <vm/seg.h> |
| #include <vm/seg_vn.h> |
| #include <vm/pvn.h> |
| #include <vm/anon.h> |
| #include <vm/page.h> |
| #include <vm/vpage.h> |
| #include <sys/proc.h> |
| #include <sys/task.h> |
| #include <sys/project.h> |
| #include <sys/zone.h> |
| #include <sys/shm_impl.h> |
| /* |
| * Private seg op routines. |
| */ |
| static int segvn_dup(struct seg *seg, struct seg *newseg); |
| static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len); |
| static void segvn_free(struct seg *seg); |
| static faultcode_t segvn_fault(struct hat *hat, struct seg *seg, |
| caddr_t addr, size_t len, enum fault_type type, |
| enum seg_rw rw); |
| static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr); |
| static int segvn_setprot(struct seg *seg, caddr_t addr, |
| size_t len, uint_t prot); |
| static int segvn_checkprot(struct seg *seg, caddr_t addr, |
| size_t len, uint_t prot); |
| static int segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta); |
| static size_t segvn_swapout(struct seg *seg); |
| static int segvn_sync(struct seg *seg, caddr_t addr, size_t len, |
| int attr, uint_t flags); |
| static size_t segvn_incore(struct seg *seg, caddr_t addr, size_t len, |
| char *vec); |
| static int segvn_lockop(struct seg *seg, caddr_t addr, size_t len, |
| int attr, int op, ulong_t *lockmap, size_t pos); |
| static int segvn_getprot(struct seg *seg, caddr_t addr, size_t len, |
| uint_t *protv); |
| static u_offset_t segvn_getoffset(struct seg *seg, caddr_t addr); |
| static int segvn_gettype(struct seg *seg, caddr_t addr); |
| static int segvn_getvp(struct seg *seg, caddr_t addr, |
| struct vnode **vpp); |
| static int segvn_advise(struct seg *seg, caddr_t addr, size_t len, |
| uint_t behav); |
| static void segvn_dump(struct seg *seg); |
| static int segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, |
| struct page ***ppp, enum lock_type type, enum seg_rw rw); |
| static int segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, |
| uint_t szc); |
| static int segvn_getmemid(struct seg *seg, caddr_t addr, |
| memid_t *memidp); |
| static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t); |
| static int segvn_capable(struct seg *seg, segcapability_t capable); |
| |
| struct seg_ops segvn_ops = { |
| segvn_dup, |
| segvn_unmap, |
| segvn_free, |
| segvn_fault, |
| segvn_faulta, |
| segvn_setprot, |
| segvn_checkprot, |
| segvn_kluster, |
| segvn_swapout, |
| segvn_sync, |
| segvn_incore, |
| segvn_lockop, |
| segvn_getprot, |
| segvn_getoffset, |
| segvn_gettype, |
| segvn_getvp, |
| segvn_advise, |
| segvn_dump, |
| segvn_pagelock, |
| segvn_setpagesize, |
| segvn_getmemid, |
| segvn_getpolicy, |
| segvn_capable, |
| }; |
| |
| /* |
| * Common zfod structures, provided as a shorthand for others to use. |
| */ |
| static segvn_crargs_t zfod_segvn_crargs = |
| SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); |
| static segvn_crargs_t kzfod_segvn_crargs = |
| SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_USER, |
| PROT_ALL & ~PROT_USER); |
| static segvn_crargs_t stack_noexec_crargs = |
| SEGVN_ZFOD_ARGS(PROT_ZFOD & ~PROT_EXEC, PROT_ALL); |
| |
| caddr_t zfod_argsp = (caddr_t)&zfod_segvn_crargs; /* user zfod argsp */ |
| caddr_t kzfod_argsp = (caddr_t)&kzfod_segvn_crargs; /* kernel zfod argsp */ |
| caddr_t stack_exec_argsp = (caddr_t)&zfod_segvn_crargs; /* executable stack */ |
| caddr_t stack_noexec_argsp = (caddr_t)&stack_noexec_crargs; /* noexec stack */ |
| |
| #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */ |
| |
| size_t segvn_comb_thrshld = UINT_MAX; /* patchable -- see 1196681 */ |
| |
| size_t segvn_pglock_comb_thrshld = (1UL << 16); /* 64K */ |
| size_t segvn_pglock_comb_balign = (1UL << 16); /* 64K */ |
| uint_t segvn_pglock_comb_bshift; |
| size_t segvn_pglock_comb_palign; |
| |
| static int segvn_concat(struct seg *, struct seg *, int); |
| static int segvn_extend_prev(struct seg *, struct seg *, |
| struct segvn_crargs *, size_t); |
| static int segvn_extend_next(struct seg *, struct seg *, |
| struct segvn_crargs *, size_t); |
| static void segvn_softunlock(struct seg *, caddr_t, size_t, enum seg_rw); |
| static void segvn_pagelist_rele(page_t **); |
| static void segvn_setvnode_mpss(vnode_t *); |
| static void segvn_relocate_pages(page_t **, page_t *); |
| static int segvn_full_szcpages(page_t **, uint_t, int *, uint_t *); |
| static int segvn_fill_vp_pages(struct segvn_data *, vnode_t *, u_offset_t, |
| uint_t, page_t **, page_t **, uint_t *, int *); |
| static faultcode_t segvn_fault_vnodepages(struct hat *, struct seg *, caddr_t, |
| caddr_t, enum fault_type, enum seg_rw, caddr_t, caddr_t, int); |
| static faultcode_t segvn_fault_anonpages(struct hat *, struct seg *, caddr_t, |
| caddr_t, enum fault_type, enum seg_rw, caddr_t, caddr_t, int); |
| static faultcode_t segvn_faultpage(struct hat *, struct seg *, caddr_t, |
| u_offset_t, struct vpage *, page_t **, uint_t, |
| enum fault_type, enum seg_rw, int); |
| static void segvn_vpage(struct seg *); |
| static size_t segvn_count_swap_by_vpages(struct seg *); |
| |
| static void segvn_purge(struct seg *seg); |
| static int segvn_reclaim(void *, caddr_t, size_t, struct page **, |
| enum seg_rw, int); |
| static int shamp_reclaim(void *, caddr_t, size_t, struct page **, |
| enum seg_rw, int); |
| |
| static int sameprot(struct seg *, caddr_t, size_t); |
| |
| static int segvn_demote_range(struct seg *, caddr_t, size_t, int, uint_t); |
| static int segvn_clrszc(struct seg *); |
| static struct seg *segvn_split_seg(struct seg *, caddr_t); |
| static int segvn_claim_pages(struct seg *, struct vpage *, u_offset_t, |
| ulong_t, uint_t); |
| |
| static void segvn_hat_rgn_unload_callback(caddr_t, caddr_t, caddr_t, |
| size_t, void *, u_offset_t); |
| |
| static struct kmem_cache *segvn_cache; |
| static struct kmem_cache **segvn_szc_cache; |
| |
| #ifdef VM_STATS |
| static struct segvnvmstats_str { |
| ulong_t fill_vp_pages[31]; |
| ulong_t fltvnpages[49]; |
| ulong_t fullszcpages[10]; |
| ulong_t relocatepages[3]; |
| ulong_t fltanpages[17]; |
| ulong_t pagelock[2]; |
| ulong_t demoterange[3]; |
| } segvnvmstats; |
| #endif /* VM_STATS */ |
| |
| #define SDR_RANGE 1 /* demote entire range */ |
| #define SDR_END 2 /* demote non aligned ends only */ |
| |
| #define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \ |
| if ((len) != 0) { \ |
| lpgaddr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); \ |
| ASSERT(lpgaddr >= (seg)->s_base); \ |
| lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)((addr) + \ |
| (len)), pgsz); \ |
| ASSERT(lpgeaddr > lpgaddr); \ |
| ASSERT(lpgeaddr <= (seg)->s_base + (seg)->s_size); \ |
| } else { \ |
| lpgeaddr = lpgaddr = (addr); \ |
| } \ |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| segvn_cache_constructor(void *buf, void *cdrarg, int kmflags) |
| { |
| struct segvn_data *svd = buf; |
| |
| rw_init(&svd->lock, NULL, RW_DEFAULT, NULL); |
| mutex_init(&svd->segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL); |
| svd->svn_trnext = svd->svn_trprev = NULL; |
| return (0); |
| } |
| |
| /*ARGSUSED1*/ |
| static void |
| segvn_cache_destructor(void *buf, void *cdrarg) |
| { |
| struct segvn_data *svd = buf; |
| |
| rw_destroy(&svd->lock); |
| mutex_destroy(&svd->segfree_syncmtx); |
| } |
| |
| /*ARGSUSED*/ |
| static int |
| svntr_cache_constructor(void *buf, void *cdrarg, int kmflags) |
| { |
| bzero(buf, sizeof (svntr_t)); |
| return (0); |
| } |
| |
| /* |
| * Patching this variable to non-zero allows the system to run with |
| * stacks marked as "not executable". It's a bit of a kludge, but is |
| * provided as a tweakable for platforms that export those ABIs |
| * (e.g. sparc V8) that have executable stacks enabled by default. |
| * There are also some restrictions for platforms that don't actually |
| * implement 'noexec' protections. |
| * |
| * Once enabled, the system is (therefore) unable to provide a fully |
| * ABI-compliant execution environment, though practically speaking, |
| * most everything works. The exceptions are generally some interpreters |
| * and debuggers that create executable code on the stack and jump |
| * into it (without explicitly mprotecting the address range to include |
| * PROT_EXEC). |
| * |
| * One important class of applications that are disabled are those |
| * that have been transformed into malicious agents using one of the |
| * numerous "buffer overflow" attacks. See 4007890. |
| */ |
| int noexec_user_stack = 0; |
| int noexec_user_stack_log = 1; |
| |
| int segvn_lpg_disable = 0; |
| uint_t segvn_maxpgszc = 0; |
| |
| ulong_t segvn_vmpss_clrszc_cnt; |
| ulong_t segvn_vmpss_clrszc_err; |
| ulong_t segvn_fltvnpages_clrszc_cnt; |
| ulong_t segvn_fltvnpages_clrszc_err; |
| ulong_t segvn_setpgsz_align_err; |
| ulong_t segvn_setpgsz_anon_align_err; |
| ulong_t segvn_setpgsz_getattr_err; |
| ulong_t segvn_setpgsz_eof_err; |
| ulong_t segvn_faultvnmpss_align_err1; |
| ulong_t segvn_faultvnmpss_align_err2; |
| ulong_t segvn_faultvnmpss_align_err3; |
| ulong_t segvn_faultvnmpss_align_err4; |
| ulong_t segvn_faultvnmpss_align_err5; |
| ulong_t segvn_vmpss_pageio_deadlk_err; |
| |
| int segvn_use_regions = 1; |
| |
| /* |
| * Segvn supports text replication optimization for NUMA platforms. Text |
| * replica's are represented by anon maps (amp). There's one amp per text file |
| * region per lgroup. A process chooses the amp for each of its text mappings |
| * based on the lgroup assignment of its main thread (t_tid = 1). All |
| * processes that want a replica on a particular lgroup for the same text file |
| * mapping share the same amp. amp's are looked up in svntr_hashtab hash table |
| * with vp,off,size,szc used as a key. Text replication segments are read only |
| * MAP_PRIVATE|MAP_TEXT segments that map vnode. Replication is achieved by |
| * forcing COW faults from vnode to amp and mapping amp pages instead of vnode |
| * pages. Replication amp is assigned to a segment when it gets its first |
| * pagefault. To handle main thread lgroup rehoming segvn_trasync_thread |
| * rechecks periodically if the process still maps an amp local to the main |
| * thread. If not async thread forces process to remap to an amp in the new |
| * home lgroup of the main thread. Current text replication implementation |
| * only provides the benefit to workloads that do most of their work in the |
| * main thread of a process or all the threads of a process run in the same |
| * lgroup. To extend text replication benefit to different types of |
| * multithreaded workloads further work would be needed in the hat layer to |
| * allow the same virtual address in the same hat to simultaneously map |
| * different physical addresses (i.e. page table replication would be needed |
| * for x86). |
| * |
| * amp pages are used instead of vnode pages as long as segment has a very |
| * simple life cycle. It's created via segvn_create(), handles S_EXEC |
| * (S_READ) pagefaults and is fully unmapped. If anything more complicated |
| * happens such as protection is changed, real COW fault happens, pagesize is |
| * changed, MC_LOCK is requested or segment is partially unmapped we turn off |
| * text replication by converting the segment back to vnode only segment |
| * (unmap segment's address range and set svd->amp to NULL). |
| * |
| * The original file can be changed after amp is inserted into |
| * svntr_hashtab. Processes that are launched after the file is already |
| * changed can't use the replica's created prior to the file change. To |
| * implement this functionality hash entries are timestamped. Replica's can |
| * only be used if current file modification time is the same as the timestamp |
| * saved when hash entry was created. However just timestamps alone are not |
| * sufficient to detect file modification via mmap(MAP_SHARED) mappings. We |
| * deal with file changes via MAP_SHARED mappings differently. When writable |
| * MAP_SHARED mappings are created to vnodes marked as executable we mark all |
| * existing replica's for this vnode as not usable for future text |
| * mappings. And we don't create new replica's for files that currently have |
| * potentially writable MAP_SHARED mappings (i.e. vn_is_mapped(V_WRITE) is |
| * true). |
| */ |
| |
| #define SEGVN_TEXTREPL_MAXBYTES_FACTOR (20) |
| size_t segvn_textrepl_max_bytes_factor = SEGVN_TEXTREPL_MAXBYTES_FACTOR; |
| |
| static ulong_t svntr_hashtab_sz = 512; |
| static svntr_bucket_t *svntr_hashtab = NULL; |
| static struct kmem_cache *svntr_cache; |
| static svntr_stats_t *segvn_textrepl_stats; |
| static ksema_t segvn_trasync_sem; |
| |
| int segvn_disable_textrepl = 1; |
| size_t textrepl_size_thresh = (size_t)-1; |
| size_t segvn_textrepl_bytes = 0; |
| size_t segvn_textrepl_max_bytes = 0; |
| clock_t segvn_update_textrepl_interval = 0; |
| int segvn_update_tr_time = 10; |
| int segvn_disable_textrepl_update = 0; |
| |
| static void segvn_textrepl(struct seg *); |
| static void segvn_textunrepl(struct seg *, int); |
| static void segvn_inval_trcache(vnode_t *); |
| static void segvn_trasync_thread(void); |
| static void segvn_trupdate_wakeup(void *); |
| static void segvn_trupdate(void); |
| static void segvn_trupdate_seg(struct seg *, segvn_data_t *, svntr_t *, |
| ulong_t); |
| |
| /* |
| * Initialize segvn data structures |
| */ |
| void |
| segvn_init(void) |
| { |
| uint_t maxszc; |
| uint_t szc; |
| size_t pgsz; |
| |
| segvn_cache = kmem_cache_create("segvn_cache", |
| sizeof (struct segvn_data), 0, |
| segvn_cache_constructor, segvn_cache_destructor, NULL, |
| NULL, NULL, 0); |
| |
| if (segvn_lpg_disable == 0) { |
| szc = maxszc = page_num_pagesizes() - 1; |
| if (szc == 0) { |
| segvn_lpg_disable = 1; |
| } |
| if (page_get_pagesize(0) != PAGESIZE) { |
| panic("segvn_init: bad szc 0"); |
| /*NOTREACHED*/ |
| } |
| while (szc != 0) { |
| pgsz = page_get_pagesize(szc); |
| if (pgsz <= PAGESIZE || !IS_P2ALIGNED(pgsz, pgsz)) { |
| panic("segvn_init: bad szc %d", szc); |
| /*NOTREACHED*/ |
| } |
| szc--; |
| } |
| if (segvn_maxpgszc == 0 || segvn_maxpgszc > maxszc) |
| segvn_maxpgszc = maxszc; |
| } |
| |
| if (segvn_maxpgszc) { |
| segvn_szc_cache = (struct kmem_cache **)kmem_alloc( |
| (segvn_maxpgszc + 1) * sizeof (struct kmem_cache *), |
| KM_SLEEP); |
| } |
| |
| for (szc = 1; szc <= segvn_maxpgszc; szc++) { |
| char str[32]; |
| |
| (void) sprintf(str, "segvn_szc_cache%d", szc); |
| segvn_szc_cache[szc] = kmem_cache_create(str, |
| page_get_pagecnt(szc) * sizeof (page_t *), 0, |
| NULL, NULL, NULL, NULL, NULL, KMC_NODEBUG); |
| } |
| |
| |
| if (segvn_use_regions && !hat_supported(HAT_SHARED_REGIONS, NULL)) |
| segvn_use_regions = 0; |
| |
| /* |
| * For now shared regions and text replication segvn support |
| * are mutually exclusive. This is acceptable because |
| * currently significant benefit from text replication was |
| * only observed on AMD64 NUMA platforms (due to relatively |
| * small L2$ size) and currently we don't support shared |
| * regions on x86. |
| */ |
| if (segvn_use_regions && !segvn_disable_textrepl) { |
| segvn_disable_textrepl = 1; |
| } |
| |
| #if defined(_LP64) |
| if (lgrp_optimizations() && textrepl_size_thresh != (size_t)-1 && |
| !segvn_disable_textrepl) { |
| ulong_t i; |
| size_t hsz = svntr_hashtab_sz * sizeof (svntr_bucket_t); |
| |
| svntr_cache = kmem_cache_create("svntr_cache", |
| sizeof (svntr_t), 0, svntr_cache_constructor, NULL, |
| NULL, NULL, NULL, 0); |
| svntr_hashtab = kmem_zalloc(hsz, KM_SLEEP); |
| for (i = 0; i < svntr_hashtab_sz; i++) { |
| mutex_init(&svntr_hashtab[i].tr_lock, NULL, |
| MUTEX_DEFAULT, NULL); |
| } |
| segvn_textrepl_max_bytes = ptob(physmem) / |
| segvn_textrepl_max_bytes_factor; |
| segvn_textrepl_stats = kmem_zalloc(NCPU * |
| sizeof (svntr_stats_t), KM_SLEEP); |
| sema_init(&segvn_trasync_sem, 0, NULL, SEMA_DEFAULT, NULL); |
| (void) thread_create(NULL, 0, segvn_trasync_thread, |
| NULL, 0, &p0, TS_RUN, minclsyspri); |
| } |
| #endif |
| |
| if (!ISP2(segvn_pglock_comb_balign) || |
| segvn_pglock_comb_balign < PAGESIZE) { |
| segvn_pglock_comb_balign = 1UL << 16; /* 64K */ |
| } |
| segvn_pglock_comb_bshift = highbit(segvn_pglock_comb_balign) - 1; |
| segvn_pglock_comb_palign = btop(segvn_pglock_comb_balign); |
| } |
| |
| #define SEGVN_PAGEIO ((void *)0x1) |
| #define SEGVN_NOPAGEIO ((void *)0x2) |
| |
| static void |
| segvn_setvnode_mpss(vnode_t *vp) |
| { |
| int err; |
| |
| ASSERT(vp->v_mpssdata == NULL || |
| vp->v_mpssdata == SEGVN_PAGEIO || |
| vp->v_mpssdata == SEGVN_NOPAGEIO); |
| |
| if (vp->v_mpssdata == NULL) { |
| if (vn_vmpss_usepageio(vp)) { |
| err = VOP_PAGEIO(vp, (page_t *)NULL, |
| (u_offset_t)0, 0, 0, CRED(), NULL); |
| } else { |
| err = ENOSYS; |
| } |
| /* |
| * set v_mpssdata just once per vnode life |
| * so that it never changes. |
| */ |
| mutex_enter(&vp->v_lock); |
| if (vp->v_mpssdata == NULL) { |
| if (err == EINVAL) { |
| vp->v_mpssdata = SEGVN_PAGEIO; |
| } else { |
| vp->v_mpssdata = SEGVN_NOPAGEIO; |
| } |
| } |
| mutex_exit(&vp->v_lock); |
| } |
| } |
| |
| int |
| segvn_create(struct seg *seg, void *argsp) |
| { |
| struct segvn_crargs *a = (struct segvn_crargs *)argsp; |
| struct segvn_data *svd; |
| size_t swresv = 0; |
| struct cred *cred; |
| struct anon_map *amp; |
| int error = 0; |
| size_t pgsz; |
| lgrp_mem_policy_t mpolicy = LGRP_MEM_POLICY_DEFAULT; |
| int use_rgn = 0; |
| int trok = 0; |
| |
| ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); |
| |
| if (a->type != MAP_PRIVATE && a->type != MAP_SHARED) { |
| panic("segvn_create type"); |
| /*NOTREACHED*/ |
| } |
| |
| /* |
| * Check arguments. If a shared anon structure is given then |
| * it is illegal to also specify a vp. |
| */ |
| if (a->amp != NULL && a->vp != NULL) { |
| panic("segvn_create anon_map"); |
| /*NOTREACHED*/ |
| } |
| |
| if (a->type == MAP_PRIVATE && (a->flags & MAP_TEXT) && |
| a->vp != NULL && a->prot == (PROT_USER | PROT_READ | PROT_EXEC) && |
| segvn_use_regions) { |
| use_rgn = 1; |
| } |
| |
| /* MAP_NORESERVE on a MAP_SHARED segment is meaningless. */ |
| if (a->type == MAP_SHARED) |
| a->flags &= ~MAP_NORESERVE; |
| |
| if (a->szc != 0) { |
| if (segvn_lpg_disable != 0 || (a->szc == AS_MAP_NO_LPOOB) || |
| (a->amp != NULL && a->type == MAP_PRIVATE) || |
| (a->flags & MAP_NORESERVE) || seg->s_as == &kas) { |
| a->szc = 0; |
| } else { |
| if (a->szc > segvn_maxpgszc) |
| a->szc = segvn_maxpgszc; |
| pgsz = page_get_pagesize(a->szc); |
| if (!IS_P2ALIGNED(seg->s_base, pgsz) || |
| !IS_P2ALIGNED(seg->s_size, pgsz)) { |
| a->szc = 0; |
| } else if (a->vp != NULL) { |
| if (IS_SWAPFSVP(a->vp) || VN_ISKAS(a->vp)) { |
| /* |
| * paranoid check. |
| * hat_page_demote() is not supported |
| * on swapfs pages. |
| */ |
| a->szc = 0; |
| } else if (map_addr_vacalign_check(seg->s_base, |
| a->offset & PAGEMASK)) { |
| a->szc = 0; |
| } |
| } else if (a->amp != NULL) { |
| pgcnt_t anum = btopr(a->offset); |
| pgcnt_t pgcnt = page_get_pagecnt(a->szc); |
| if (!IS_P2ALIGNED(anum, pgcnt)) { |
| a->szc = 0; |
| } |
| } |
| } |
| } |
| |
| /* |
| * If segment may need private pages, reserve them now. |
| */ |
| if (!(a->flags & MAP_NORESERVE) && ((a->vp == NULL && a->amp == NULL) || |
| (a->type == MAP_PRIVATE && (a->prot & PROT_WRITE)))) { |
| if (anon_resv_zone(seg->s_size, |
| seg->s_as->a_proc->p_zone) == 0) |
| return (EAGAIN); |
| swresv = seg->s_size; |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", |
| seg, swresv, 1); |
| } |
| |
| /* |
| * Reserve any mapping structures that may be required. |
| * |
| * Don't do it for segments that may use regions. It's currently a |
| * noop in the hat implementations anyway. |
| */ |
| if (!use_rgn) { |
| hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP); |
| } |
| |
| if (a->cred) { |
| cred = a->cred; |
| crhold(cred); |
| } else { |
| crhold(cred = CRED()); |
| } |
| |
| /* Inform the vnode of the new mapping */ |
| if (a->vp != NULL) { |
| error = VOP_ADDMAP(a->vp, a->offset & PAGEMASK, |
| seg->s_as, seg->s_base, seg->s_size, a->prot, |
| a->maxprot, a->type, cred, NULL); |
| if (error) { |
| if (swresv != 0) { |
| anon_unresv_zone(swresv, |
| seg->s_as->a_proc->p_zone); |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, |
| "anon proc:%p %lu %u", seg, swresv, 0); |
| } |
| crfree(cred); |
| if (!use_rgn) { |
| hat_unload(seg->s_as->a_hat, seg->s_base, |
| seg->s_size, HAT_UNLOAD_UNMAP); |
| } |
| return (error); |
| } |
| /* |
| * svntr_hashtab will be NULL if we support shared regions. |
| */ |
| trok = ((a->flags & MAP_TEXT) && |
| (seg->s_size > textrepl_size_thresh || |
| (a->flags & _MAP_TEXTREPL)) && |
| lgrp_optimizations() && svntr_hashtab != NULL && |
| a->type == MAP_PRIVATE && swresv == 0 && |
| !(a->flags & MAP_NORESERVE) && |
| seg->s_as != &kas && a->vp->v_type == VREG); |
| |
| ASSERT(!trok || !use_rgn); |
| } |
| |
| /* |
| * MAP_NORESERVE mappings don't count towards the VSZ of a process |
| * until we fault the pages in. |
| */ |
| if ((a->vp == NULL || a->vp->v_type != VREG) && |
| a->flags & MAP_NORESERVE) { |
| seg->s_as->a_resvsize -= seg->s_size; |
| } |
| |
| /* |
| * If more than one segment in the address space, and they're adjacent |
| * virtually, try to concatenate them. Don't concatenate if an |
| * explicit anon_map structure was supplied (e.g., SystemV shared |
| * memory) or if we'll use text replication for this segment. |
| */ |
| if (a->amp == NULL && !use_rgn && !trok) { |
| struct seg *pseg, *nseg; |
| struct segvn_data *psvd, *nsvd; |
| lgrp_mem_policy_t ppolicy, npolicy; |
| uint_t lgrp_mem_policy_flags = 0; |
| extern lgrp_mem_policy_t lgrp_mem_default_policy; |
| |
| /* |
| * Memory policy flags (lgrp_mem_policy_flags) is valid when |
| * extending stack/heap segments. |
| */ |
| if ((a->vp == NULL) && (a->type == MAP_PRIVATE) && |
| !(a->flags & MAP_NORESERVE) && (seg->s_as != &kas)) { |
| lgrp_mem_policy_flags = a->lgrp_mem_policy_flags; |
| } else { |
| /* |
| * Get policy when not extending it from another segment |
| */ |
| mpolicy = lgrp_mem_policy_default(seg->s_size, a->type); |
| } |
| |
| /* |
| * First, try to concatenate the previous and new segments |
| */ |
| pseg = AS_SEGPREV(seg->s_as, seg); |
| if (pseg != NULL && |
| pseg->s_base + pseg->s_size == seg->s_base && |
| pseg->s_ops == &segvn_ops) { |
| /* |
| * Get memory allocation policy from previous segment. |
| * When extension is specified (e.g. for heap) apply |
| * this policy to the new segment regardless of the |
| * outcome of segment concatenation. Extension occurs |
| * for non-default policy otherwise default policy is |
| * used and is based on extended segment size. |
| */ |
| psvd = (struct segvn_data *)pseg->s_data; |
| ppolicy = psvd->policy_info.mem_policy; |
| if (lgrp_mem_policy_flags == |
| LGRP_MP_FLAG_EXTEND_UP) { |
| if (ppolicy != lgrp_mem_default_policy) { |
| mpolicy = ppolicy; |
| } else { |
| mpolicy = lgrp_mem_policy_default( |
| pseg->s_size + seg->s_size, |
| a->type); |
| } |
| } |
| |
| if (mpolicy == ppolicy && |
| (pseg->s_size + seg->s_size <= |
| segvn_comb_thrshld || psvd->amp == NULL) && |
| segvn_extend_prev(pseg, seg, a, swresv) == 0) { |
| /* |
| * success! now try to concatenate |
| * with following seg |
| */ |
| crfree(cred); |
| nseg = AS_SEGNEXT(pseg->s_as, pseg); |
| if (nseg != NULL && |
| nseg != pseg && |
| nseg->s_ops == &segvn_ops && |
| pseg->s_base + pseg->s_size == |
| nseg->s_base) |
| (void) segvn_concat(pseg, nseg, 0); |
| ASSERT(pseg->s_szc == 0 || |
| (a->szc == pseg->s_szc && |
| IS_P2ALIGNED(pseg->s_base, pgsz) && |
| IS_P2ALIGNED(pseg->s_size, pgsz))); |
| return (0); |
| } |
| } |
| |
| /* |
| * Failed, so try to concatenate with following seg |
| */ |
| nseg = AS_SEGNEXT(seg->s_as, seg); |
| if (nseg != NULL && |
| seg->s_base + seg->s_size == nseg->s_base && |
| nseg->s_ops == &segvn_ops) { |
| /* |
| * Get memory allocation policy from next segment. |
| * When extension is specified (e.g. for stack) apply |
| * this policy to the new segment regardless of the |
| * outcome of segment concatenation. Extension occurs |
| * for non-default policy otherwise default policy is |
| * used and is based on extended segment size. |
| */ |
| nsvd = (struct segvn_data *)nseg->s_data; |
| npolicy = nsvd->policy_info.mem_policy; |
| if (lgrp_mem_policy_flags == |
| LGRP_MP_FLAG_EXTEND_DOWN) { |
| if (npolicy != lgrp_mem_default_policy) { |
| mpolicy = npolicy; |
| } else { |
| mpolicy = lgrp_mem_policy_default( |
| nseg->s_size + seg->s_size, |
| a->type); |
| } |
| } |
| |
| if (mpolicy == npolicy && |
| segvn_extend_next(seg, nseg, a, swresv) == 0) { |
| crfree(cred); |
| ASSERT(nseg->s_szc == 0 || |
| (a->szc == nseg->s_szc && |
| IS_P2ALIGNED(nseg->s_base, pgsz) && |
| IS_P2ALIGNED(nseg->s_size, pgsz))); |
| return (0); |
| } |
| } |
| } |
| |
| if (a->vp != NULL) { |
| VN_HOLD(a->vp); |
| if (a->type == MAP_SHARED) |
| lgrp_shm_policy_init(NULL, a->vp); |
| } |
| svd = kmem_cache_alloc(segvn_cache, KM_SLEEP); |
| |
| seg->s_ops = &segvn_ops; |
| seg->s_data = (void *)svd; |
| seg->s_szc = a->szc; |
| |
| svd->seg = seg; |
| svd->vp = a->vp; |
| /* |
| * Anonymous mappings have no backing file so the offset is meaningless. |
| */ |
| svd->offset = a->vp ? (a->offset & PAGEMASK) : 0; |
| svd->prot = a->prot; |
| svd->maxprot = a->maxprot; |
| svd->pageprot = 0; |
| svd->type = a->type; |
| svd->vpage = NULL; |
| svd->cred = cred; |
| svd->advice = MADV_NORMAL; |
| svd->pageadvice = 0; |
| svd->flags = (ushort_t)a->flags; |
| svd->softlockcnt = 0; |
| svd->softlockcnt_sbase = 0; |
| svd->softlockcnt_send = 0; |
| svd->rcookie = HAT_INVALID_REGION_COOKIE; |
| svd->pageswap = 0; |
| |
| if (a->szc != 0 && a->vp != NULL) { |
| segvn_setvnode_mpss(a->vp); |
| } |
| if (svd->type == MAP_SHARED && svd->vp != NULL && |
| (svd->vp->v_flag & VVMEXEC) && (svd->prot & PROT_WRITE)) { |
| ASSERT(vn_is_mapped(svd->vp, V_WRITE)); |
| segvn_inval_trcache(svd->vp); |
| } |
| |
| amp = a->amp; |
| if ((svd->amp = amp) == NULL) { |
| svd->anon_index = 0; |
| if (svd->type == MAP_SHARED) { |
| svd->swresv = 0; |
| /* |
| * Shared mappings to a vp need no other setup. |
| * If we have a shared mapping to an anon_map object |
| * which hasn't been allocated yet, allocate the |
| * struct now so that it will be properly shared |
| * by remembering the swap reservation there. |
| */ |
| if (a->vp == NULL) { |
| svd->amp = anonmap_alloc(seg->s_size, swresv, |
| ANON_SLEEP); |
| svd->amp->a_szc = seg->s_szc; |
| } |
| } else { |
| /* |
| * Private mapping (with or without a vp). |
| * Allocate anon_map when needed. |
| */ |
| svd->swresv = swresv; |
| } |
| } else { |
| pgcnt_t anon_num; |
| |
| /* |
| * Mapping to an existing anon_map structure without a vp. |
| * For now we will insure that the segment size isn't larger |
| * than the size - offset gives us. Later on we may wish to |
| * have the anon array dynamically allocated itself so that |
| * we don't always have to allocate all the anon pointer slots. |
| * This of course involves adding extra code to check that we |
| * aren't trying to use an anon pointer slot beyond the end |
| * of the currently allocated anon array. |
| */ |
| if ((amp->size - a->offset) < seg->s_size) { |
| panic("segvn_create anon_map size"); |
| /*NOTREACHED*/ |
| } |
| |
| anon_num = btopr(a->offset); |
| |
| if (a->type == MAP_SHARED) { |
| /* |
| * SHARED mapping to a given anon_map. |
| */ |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| amp->refcnt++; |
| if (a->szc > amp->a_szc) { |
| amp->a_szc = a->szc; |
| } |
| ANON_LOCK_EXIT(&->a_rwlock); |
| svd->anon_index = anon_num; |
| svd->swresv = 0; |
| } else { |
| /* |
| * PRIVATE mapping to a given anon_map. |
| * Make sure that all the needed anon |
| * structures are created (so that we will |
| * share the underlying pages if nothing |
| * is written by this mapping) and then |
| * duplicate the anon array as is done |
| * when a privately mapped segment is dup'ed. |
| */ |
| struct anon *ap; |
| caddr_t addr; |
| caddr_t eaddr; |
| ulong_t anon_idx; |
| int hat_flag = HAT_LOAD; |
| |
| if (svd->flags & MAP_TEXT) { |
| hat_flag |= HAT_LOAD_TEXT; |
| } |
| |
| svd->amp = anonmap_alloc(seg->s_size, 0, ANON_SLEEP); |
| svd->amp->a_szc = seg->s_szc; |
| svd->anon_index = 0; |
| svd->swresv = swresv; |
| |
| /* |
| * Prevent 2 threads from allocating anon |
| * slots simultaneously. |
| */ |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| eaddr = seg->s_base + seg->s_size; |
| |
| for (anon_idx = anon_num, addr = seg->s_base; |
| addr < eaddr; addr += PAGESIZE, anon_idx++) { |
| page_t *pp; |
| |
| if ((ap = anon_get_ptr(amp->ahp, |
| anon_idx)) != NULL) |
| continue; |
| |
| /* |
| * Allocate the anon struct now. |
| * Might as well load up translation |
| * to the page while we're at it... |
| */ |
| pp = anon_zero(seg, addr, &ap, cred); |
| if (ap == NULL || pp == NULL) { |
| panic("segvn_create anon_zero"); |
| /*NOTREACHED*/ |
| } |
| |
| /* |
| * Re-acquire the anon_map lock and |
| * initialize the anon array entry. |
| */ |
| ASSERT(anon_get_ptr(amp->ahp, |
| anon_idx) == NULL); |
| (void) anon_set_ptr(amp->ahp, anon_idx, ap, |
| ANON_SLEEP); |
| |
| ASSERT(seg->s_szc == 0); |
| ASSERT(!IS_VMODSORT(pp->p_vnode)); |
| |
| ASSERT(use_rgn == 0); |
| hat_memload(seg->s_as->a_hat, addr, pp, |
| svd->prot & ~PROT_WRITE, hat_flag); |
| |
| page_unlock(pp); |
| } |
| ASSERT(seg->s_szc == 0); |
| anon_dup(amp->ahp, anon_num, svd->amp->ahp, |
| 0, seg->s_size); |
| ANON_LOCK_EXIT(&->a_rwlock); |
| } |
| } |
| |
| /* |
| * Set default memory allocation policy for segment |
| * |
| * Always set policy for private memory at least for initialization |
| * even if this is a shared memory segment |
| */ |
| (void) lgrp_privm_policy_set(mpolicy, &svd->policy_info, seg->s_size); |
| |
| if (svd->type == MAP_SHARED) |
| (void) lgrp_shm_policy_set(mpolicy, svd->amp, svd->anon_index, |
| svd->vp, svd->offset, seg->s_size); |
| |
| if (use_rgn) { |
| ASSERT(!trok); |
| ASSERT(svd->amp == NULL); |
| svd->rcookie = hat_join_region(seg->s_as->a_hat, seg->s_base, |
| seg->s_size, (void *)svd->vp, svd->offset, svd->prot, |
| (uchar_t)seg->s_szc, segvn_hat_rgn_unload_callback, |
| HAT_REGION_TEXT); |
| } |
| |
| ASSERT(!trok || !(svd->prot & PROT_WRITE)); |
| svd->tr_state = trok ? SEGVN_TR_INIT : SEGVN_TR_OFF; |
| |
| return (0); |
| } |
| |
| /* |
| * Concatenate two existing segments, if possible. |
| * Return 0 on success, -1 if two segments are not compatible |
| * or -2 on memory allocation failure. |
| * If amp_cat == 1 then try and concat segments with anon maps |
| */ |
| static int |
| segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat) |
| { |
| struct segvn_data *svd1 = seg1->s_data; |
| struct segvn_data *svd2 = seg2->s_data; |
| struct anon_map *amp1 = svd1->amp; |
| struct anon_map *amp2 = svd2->amp; |
| struct vpage *vpage1 = svd1->vpage; |
| struct vpage *vpage2 = svd2->vpage, *nvpage = NULL; |
| size_t size, nvpsize; |
| pgcnt_t npages1, npages2; |
| |
| ASSERT(seg1->s_as && seg2->s_as && seg1->s_as == seg2->s_as); |
| ASSERT(AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock)); |
| ASSERT(seg1->s_ops == seg2->s_ops); |
| |
| if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie) || |
| HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) { |
| return (-1); |
| } |
| |
| /* both segments exist, try to merge them */ |
| #define incompat(x) (svd1->x != svd2->x) |
| if (incompat(vp) || incompat(maxprot) || |
| (!svd1->pageadvice && !svd2->pageadvice && incompat(advice)) || |
| (!svd1->pageprot && !svd2->pageprot && incompat(prot)) || |
| incompat(type) || incompat(cred) || incompat(flags) || |
| seg1->s_szc != seg2->s_szc || incompat(policy_info.mem_policy) || |
| (svd2->softlockcnt > 0) || svd1->softlockcnt_send > 0) |
| return (-1); |
| #undef incompat |
| |
| /* |
| * vp == NULL implies zfod, offset doesn't matter |
| */ |
| if (svd1->vp != NULL && |
| svd1->offset + seg1->s_size != svd2->offset) { |
| return (-1); |
| } |
| |
| /* |
| * Don't concatenate if either segment uses text replication. |
| */ |
| if (svd1->tr_state != SEGVN_TR_OFF || svd2->tr_state != SEGVN_TR_OFF) { |
| return (-1); |
| } |
| |
| /* |
| * Fail early if we're not supposed to concatenate |
| * segments with non NULL amp. |
| */ |
| if (amp_cat == 0 && (amp1 != NULL || amp2 != NULL)) { |
| return (-1); |
| } |
| |
| if (svd1->vp == NULL && svd1->type == MAP_SHARED) { |
| if (amp1 != amp2) { |
| return (-1); |
| } |
| if (amp1 != NULL && svd1->anon_index + btop(seg1->s_size) != |
| svd2->anon_index) { |
| return (-1); |
| } |
| ASSERT(amp1 == NULL || amp1->refcnt >= 2); |
| } |
| |
| /* |
| * If either seg has vpages, create a new merged vpage array. |
| */ |
| if (vpage1 != NULL || vpage2 != NULL) { |
| struct vpage *vp, *evp; |
| |
| npages1 = seg_pages(seg1); |
| npages2 = seg_pages(seg2); |
| nvpsize = vpgtob(npages1 + npages2); |
| |
| if ((nvpage = kmem_zalloc(nvpsize, KM_NOSLEEP)) == NULL) { |
| return (-2); |
| } |
| |
| if (vpage1 != NULL) { |
| bcopy(vpage1, nvpage, vpgtob(npages1)); |
| } else { |
| evp = nvpage + npages1; |
| for (vp = nvpage; vp < evp; vp++) { |
| VPP_SETPROT(vp, svd1->prot); |
| VPP_SETADVICE(vp, svd1->advice); |
| } |
| } |
| |
| if (vpage2 != NULL) { |
| bcopy(vpage2, nvpage + npages1, vpgtob(npages2)); |
| } else { |
| evp = nvpage + npages1 + npages2; |
| for (vp = nvpage + npages1; vp < evp; vp++) { |
| VPP_SETPROT(vp, svd2->prot); |
| VPP_SETADVICE(vp, svd2->advice); |
| } |
| } |
| |
| if (svd2->pageswap && (!svd1->pageswap && svd1->swresv)) { |
| ASSERT(svd1->swresv == seg1->s_size); |
| ASSERT(!(svd1->flags & MAP_NORESERVE)); |
| ASSERT(!(svd2->flags & MAP_NORESERVE)); |
| evp = nvpage + npages1; |
| for (vp = nvpage; vp < evp; vp++) { |
| VPP_SETSWAPRES(vp); |
| } |
| } |
| |
| if (svd1->pageswap && (!svd2->pageswap && svd2->swresv)) { |
| ASSERT(svd2->swresv == seg2->s_size); |
| ASSERT(!(svd1->flags & MAP_NORESERVE)); |
| ASSERT(!(svd2->flags & MAP_NORESERVE)); |
| vp = nvpage + npages1; |
| evp = vp + npages2; |
| for (; vp < evp; vp++) { |
| VPP_SETSWAPRES(vp); |
| } |
| } |
| } |
| ASSERT((vpage1 != NULL || vpage2 != NULL) || |
| (svd1->pageswap == 0 && svd2->pageswap == 0)); |
| |
| /* |
| * If either segment has private pages, create a new merged anon |
| * array. If mergeing shared anon segments just decrement anon map's |
| * refcnt. |
| */ |
| if (amp1 != NULL && svd1->type == MAP_SHARED) { |
| ASSERT(amp1 == amp2 && svd1->vp == NULL); |
| ANON_LOCK_ENTER(&1->a_rwlock, RW_WRITER); |
| ASSERT(amp1->refcnt >= 2); |
| amp1->refcnt--; |
| ANON_LOCK_EXIT(&1->a_rwlock); |
| svd2->amp = NULL; |
| } else if (amp1 != NULL || amp2 != NULL) { |
| struct anon_hdr *nahp; |
| struct anon_map *namp = NULL; |
| size_t asize; |
| |
| ASSERT(svd1->type == MAP_PRIVATE); |
| |
| asize = seg1->s_size + seg2->s_size; |
| if ((nahp = anon_create(btop(asize), ANON_NOSLEEP)) == NULL) { |
| if (nvpage != NULL) { |
| kmem_free(nvpage, nvpsize); |
| } |
| return (-2); |
| } |
| if (amp1 != NULL) { |
| /* |
| * XXX anon rwlock is not really needed because |
| * this is a private segment and we are writers. |
| */ |
| ANON_LOCK_ENTER(&1->a_rwlock, RW_WRITER); |
| ASSERT(amp1->refcnt == 1); |
| if (anon_copy_ptr(amp1->ahp, svd1->anon_index, |
| nahp, 0, btop(seg1->s_size), ANON_NOSLEEP)) { |
| anon_release(nahp, btop(asize)); |
| ANON_LOCK_EXIT(&1->a_rwlock); |
| if (nvpage != NULL) { |
| kmem_free(nvpage, nvpsize); |
| } |
| return (-2); |
| } |
| } |
| if (amp2 != NULL) { |
| ANON_LOCK_ENTER(&2->a_rwlock, RW_WRITER); |
| ASSERT(amp2->refcnt == 1); |
| if (anon_copy_ptr(amp2->ahp, svd2->anon_index, |
| nahp, btop(seg1->s_size), btop(seg2->s_size), |
| ANON_NOSLEEP)) { |
| anon_release(nahp, btop(asize)); |
| ANON_LOCK_EXIT(&2->a_rwlock); |
| if (amp1 != NULL) { |
| ANON_LOCK_EXIT(&1->a_rwlock); |
| } |
| if (nvpage != NULL) { |
| kmem_free(nvpage, nvpsize); |
| } |
| return (-2); |
| } |
| } |
| if (amp1 != NULL) { |
| namp = amp1; |
| anon_release(amp1->ahp, btop(amp1->size)); |
| } |
| if (amp2 != NULL) { |
| if (namp == NULL) { |
| ASSERT(amp1 == NULL); |
| namp = amp2; |
| anon_release(amp2->ahp, btop(amp2->size)); |
| } else { |
| amp2->refcnt--; |
| ANON_LOCK_EXIT(&2->a_rwlock); |
| anonmap_free(amp2); |
| } |
| svd2->amp = NULL; /* needed for seg_free */ |
| } |
| namp->ahp = nahp; |
| namp->size = asize; |
| svd1->amp = namp; |
| svd1->anon_index = 0; |
| ANON_LOCK_EXIT(&namp->a_rwlock); |
| } |
| /* |
| * Now free the old vpage structures. |
| */ |
| if (nvpage != NULL) { |
| if (vpage1 != NULL) { |
| kmem_free(vpage1, vpgtob(npages1)); |
| } |
| if (vpage2 != NULL) { |
| svd2->vpage = NULL; |
| kmem_free(vpage2, vpgtob(npages2)); |
| } |
| if (svd2->pageprot) { |
| svd1->pageprot = 1; |
| } |
| if (svd2->pageadvice) { |
| svd1->pageadvice = 1; |
| } |
| if (svd2->pageswap) { |
| svd1->pageswap = 1; |
| } |
| svd1->vpage = nvpage; |
| } |
| |
| /* all looks ok, merge segments */ |
| svd1->swresv += svd2->swresv; |
| svd2->swresv = 0; /* so seg_free doesn't release swap space */ |
| size = seg2->s_size; |
| seg_free(seg2); |
| seg1->s_size += size; |
| return (0); |
| } |
| |
| /* |
| * Extend the previous segment (seg1) to include the |
| * new segment (seg2 + a), if possible. |
| * Return 0 on success. |
| */ |
| static int |
| segvn_extend_prev(seg1, seg2, a, swresv) |
| struct seg *seg1, *seg2; |
| struct segvn_crargs *a; |
| size_t swresv; |
| { |
| struct segvn_data *svd1 = (struct segvn_data *)seg1->s_data; |
| size_t size; |
| struct anon_map *amp1; |
| struct vpage *new_vpage; |
| |
| /* |
| * We don't need any segment level locks for "segvn" data |
| * since the address space is "write" locked. |
| */ |
| ASSERT(seg1->s_as && AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock)); |
| |
| if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie)) { |
| return (-1); |
| } |
| |
| /* second segment is new, try to extend first */ |
| /* XXX - should also check cred */ |
| if (svd1->vp != a->vp || svd1->maxprot != a->maxprot || |
| (!svd1->pageprot && (svd1->prot != a->prot)) || |
| svd1->type != a->type || svd1->flags != a->flags || |
| seg1->s_szc != a->szc || svd1->softlockcnt_send > 0) |
| return (-1); |
| |
| /* vp == NULL implies zfod, offset doesn't matter */ |
| if (svd1->vp != NULL && |
| svd1->offset + seg1->s_size != (a->offset & PAGEMASK)) |
| return (-1); |
| |
| if (svd1->tr_state != SEGVN_TR_OFF) { |
| return (-1); |
| } |
| |
| amp1 = svd1->amp; |
| if (amp1) { |
| pgcnt_t newpgs; |
| |
| /* |
| * Segment has private pages, can data structures |
| * be expanded? |
| * |
| * Acquire the anon_map lock to prevent it from changing, |
| * if it is shared. This ensures that the anon_map |
| * will not change while a thread which has a read/write |
| * lock on an address space references it. |
| * XXX - Don't need the anon_map lock at all if "refcnt" |
| * is 1. |
| * |
| * Can't grow a MAP_SHARED segment with an anonmap because |
| * there may be existing anon slots where we want to extend |
| * the segment and we wouldn't know what to do with them |
| * (e.g., for tmpfs right thing is to just leave them there, |
| * for /dev/zero they should be cleared out). |
| */ |
| if (svd1->type == MAP_SHARED) |
| return (-1); |
| |
| ANON_LOCK_ENTER(&1->a_rwlock, RW_WRITER); |
| if (amp1->refcnt > 1) { |
| ANON_LOCK_EXIT(&1->a_rwlock); |
| return (-1); |
| } |
| newpgs = anon_grow(amp1->ahp, &svd1->anon_index, |
| btop(seg1->s_size), btop(seg2->s_size), ANON_NOSLEEP); |
| |
| if (newpgs == 0) { |
| ANON_LOCK_EXIT(&1->a_rwlock); |
| return (-1); |
| } |
| amp1->size = ptob(newpgs); |
| ANON_LOCK_EXIT(&1->a_rwlock); |
| } |
| if (svd1->vpage != NULL) { |
| struct vpage *vp, *evp; |
| new_vpage = |
| kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), |
| KM_NOSLEEP); |
| if (new_vpage == NULL) |
| return (-1); |
| bcopy(svd1->vpage, new_vpage, vpgtob(seg_pages(seg1))); |
| kmem_free(svd1->vpage, vpgtob(seg_pages(seg1))); |
| svd1->vpage = new_vpage; |
| |
| vp = new_vpage + seg_pages(seg1); |
| evp = vp + seg_pages(seg2); |
| for (; vp < evp; vp++) |
| VPP_SETPROT(vp, a->prot); |
| if (svd1->pageswap && swresv) { |
| ASSERT(!(svd1->flags & MAP_NORESERVE)); |
| ASSERT(swresv == seg2->s_size); |
| vp = new_vpage + seg_pages(seg1); |
| for (; vp < evp; vp++) { |
| VPP_SETSWAPRES(vp); |
| } |
| } |
| } |
| ASSERT(svd1->vpage != NULL || svd1->pageswap == 0); |
| size = seg2->s_size; |
| seg_free(seg2); |
| seg1->s_size += size; |
| svd1->swresv += swresv; |
| if (svd1->pageprot && (a->prot & PROT_WRITE) && |
| svd1->type == MAP_SHARED && svd1->vp != NULL && |
| (svd1->vp->v_flag & VVMEXEC)) { |
| ASSERT(vn_is_mapped(svd1->vp, V_WRITE)); |
| segvn_inval_trcache(svd1->vp); |
| } |
| return (0); |
| } |
| |
| /* |
| * Extend the next segment (seg2) to include the |
| * new segment (seg1 + a), if possible. |
| * Return 0 on success. |
| */ |
| static int |
| segvn_extend_next( |
| struct seg *seg1, |
| struct seg *seg2, |
| struct segvn_crargs *a, |
| size_t swresv) |
| { |
| struct segvn_data *svd2 = (struct segvn_data *)seg2->s_data; |
| size_t size; |
| struct anon_map *amp2; |
| struct vpage *new_vpage; |
| |
| /* |
| * We don't need any segment level locks for "segvn" data |
| * since the address space is "write" locked. |
| */ |
| ASSERT(seg2->s_as && AS_WRITE_HELD(seg2->s_as, &seg2->s_as->a_lock)); |
| |
| if (HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) { |
| return (-1); |
| } |
| |
| /* first segment is new, try to extend second */ |
| /* XXX - should also check cred */ |
| if (svd2->vp != a->vp || svd2->maxprot != a->maxprot || |
| (!svd2->pageprot && (svd2->prot != a->prot)) || |
| svd2->type != a->type || svd2->flags != a->flags || |
| seg2->s_szc != a->szc || svd2->softlockcnt_sbase > 0) |
| return (-1); |
| /* vp == NULL implies zfod, offset doesn't matter */ |
| if (svd2->vp != NULL && |
| (a->offset & PAGEMASK) + seg1->s_size != svd2->offset) |
| return (-1); |
| |
| if (svd2->tr_state != SEGVN_TR_OFF) { |
| return (-1); |
| } |
| |
| amp2 = svd2->amp; |
| if (amp2) { |
| pgcnt_t newpgs; |
| |
| /* |
| * Segment has private pages, can data structures |
| * be expanded? |
| * |
| * Acquire the anon_map lock to prevent it from changing, |
| * if it is shared. This ensures that the anon_map |
| * will not change while a thread which has a read/write |
| * lock on an address space references it. |
| * |
| * XXX - Don't need the anon_map lock at all if "refcnt" |
| * is 1. |
| */ |
| if (svd2->type == MAP_SHARED) |
| return (-1); |
| |
| ANON_LOCK_ENTER(&2->a_rwlock, RW_WRITER); |
| if (amp2->refcnt > 1) { |
| ANON_LOCK_EXIT(&2->a_rwlock); |
| return (-1); |
| } |
| newpgs = anon_grow(amp2->ahp, &svd2->anon_index, |
| btop(seg2->s_size), btop(seg1->s_size), |
| ANON_NOSLEEP | ANON_GROWDOWN); |
| |
| if (newpgs == 0) { |
| ANON_LOCK_EXIT(&2->a_rwlock); |
| return (-1); |
| } |
| amp2->size = ptob(newpgs); |
| ANON_LOCK_EXIT(&2->a_rwlock); |
| } |
| if (svd2->vpage != NULL) { |
| struct vpage *vp, *evp; |
| new_vpage = |
| kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), |
| KM_NOSLEEP); |
| if (new_vpage == NULL) { |
| /* Not merging segments so adjust anon_index back */ |
| if (amp2) |
| svd2->anon_index += seg_pages(seg1); |
| return (-1); |
| } |
| bcopy(svd2->vpage, new_vpage + seg_pages(seg1), |
| vpgtob(seg_pages(seg2))); |
| kmem_free(svd2->vpage, vpgtob(seg_pages(seg2))); |
| svd2->vpage = new_vpage; |
| |
| vp = new_vpage; |
| evp = vp + seg_pages(seg1); |
| for (; vp < evp; vp++) |
| VPP_SETPROT(vp, a->prot); |
| if (svd2->pageswap && swresv) { |
| ASSERT(!(svd2->flags & MAP_NORESERVE)); |
| ASSERT(swresv == seg1->s_size); |
| vp = new_vpage; |
| for (; vp < evp; vp++) { |
| VPP_SETSWAPRES(vp); |
| } |
| } |
| } |
| ASSERT(svd2->vpage != NULL || svd2->pageswap == 0); |
| size = seg1->s_size; |
| seg_free(seg1); |
| seg2->s_size += size; |
| seg2->s_base -= size; |
| svd2->offset -= size; |
| svd2->swresv += swresv; |
| if (svd2->pageprot && (a->prot & PROT_WRITE) && |
| svd2->type == MAP_SHARED && svd2->vp != NULL && |
| (svd2->vp->v_flag & VVMEXEC)) { |
| ASSERT(vn_is_mapped(svd2->vp, V_WRITE)); |
| segvn_inval_trcache(svd2->vp); |
| } |
| return (0); |
| } |
| |
| static int |
| segvn_dup(struct seg *seg, struct seg *newseg) |
| { |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| struct segvn_data *newsvd; |
| pgcnt_t npages = seg_pages(seg); |
| int error = 0; |
| uint_t prot; |
| size_t len; |
| struct anon_map *amp; |
| |
| ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); |
| ASSERT(newseg->s_as->a_proc->p_parent == curproc); |
| |
| /* |
| * If segment has anon reserved, reserve more for the new seg. |
| * For a MAP_NORESERVE segment swresv will be a count of all the |
| * allocated anon slots; thus we reserve for the child as many slots |
| * as the parent has allocated. This semantic prevents the child or |
| * parent from dieing during a copy-on-write fault caused by trying |
| * to write a shared pre-existing anon page. |
| */ |
| if ((len = svd->swresv) != 0) { |
| if (anon_resv(svd->swresv) == 0) |
| return (ENOMEM); |
| |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", |
| seg, len, 0); |
| } |
| |
| newsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP); |
| |
| newseg->s_ops = &segvn_ops; |
| newseg->s_data = (void *)newsvd; |
| newseg->s_szc = seg->s_szc; |
| |
| newsvd->seg = newseg; |
| if ((newsvd->vp = svd->vp) != NULL) { |
| VN_HOLD(svd->vp); |
| if (svd->type == MAP_SHARED) |
| lgrp_shm_policy_init(NULL, svd->vp); |
| } |
| newsvd->offset = svd->offset; |
| newsvd->prot = svd->prot; |
| newsvd->maxprot = svd->maxprot; |
| newsvd->pageprot = svd->pageprot; |
| newsvd->type = svd->type; |
| newsvd->cred = svd->cred; |
| crhold(newsvd->cred); |
| newsvd->advice = svd->advice; |
| newsvd->pageadvice = svd->pageadvice; |
| newsvd->swresv = svd->swresv; |
| newsvd->pageswap = svd->pageswap; |
| newsvd->flags = svd->flags; |
| newsvd->softlockcnt = 0; |
| newsvd->softlockcnt_sbase = 0; |
| newsvd->softlockcnt_send = 0; |
| newsvd->policy_info = svd->policy_info; |
| newsvd->rcookie = HAT_INVALID_REGION_COOKIE; |
| |
| if ((amp = svd->amp) == NULL || svd->tr_state == SEGVN_TR_ON) { |
| /* |
| * Not attaching to a shared anon object. |
| */ |
| ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie) || |
| svd->tr_state == SEGVN_TR_OFF); |
| if (svd->tr_state == SEGVN_TR_ON) { |
| ASSERT(newsvd->vp != NULL && amp != NULL); |
| newsvd->tr_state = SEGVN_TR_INIT; |
| } else { |
| newsvd->tr_state = svd->tr_state; |
| } |
| newsvd->amp = NULL; |
| newsvd->anon_index = 0; |
| } else { |
| /* regions for now are only used on pure vnode segments */ |
| ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| newsvd->tr_state = SEGVN_TR_OFF; |
| if (svd->type == MAP_SHARED) { |
| newsvd->amp = amp; |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| amp->refcnt++; |
| ANON_LOCK_EXIT(&->a_rwlock); |
| newsvd->anon_index = svd->anon_index; |
| } else { |
| int reclaim = 1; |
| |
| /* |
| * Allocate and initialize new anon_map structure. |
| */ |
| newsvd->amp = anonmap_alloc(newseg->s_size, 0, |
| ANON_SLEEP); |
| newsvd->amp->a_szc = newseg->s_szc; |
| newsvd->anon_index = 0; |
| |
| /* |
| * We don't have to acquire the anon_map lock |
| * for the new segment (since it belongs to an |
| * address space that is still not associated |
| * with any process), or the segment in the old |
| * address space (since all threads in it |
| * are stopped while duplicating the address space). |
| */ |
| |
| /* |
| * The goal of the following code is to make sure that |
| * softlocked pages do not end up as copy on write |
| * pages. This would cause problems where one |
| * thread writes to a page that is COW and a different |
| * thread in the same process has softlocked it. The |
| * softlock lock would move away from this process |
| * because the write would cause this process to get |
| * a copy (without the softlock). |
| * |
| * The strategy here is to just break the |
| * sharing on pages that could possibly be |
| * softlocked. |
| */ |
| retry: |
| if (svd->softlockcnt) { |
| struct anon *ap, *newap; |
| size_t i; |
| uint_t vpprot; |
| page_t *anon_pl[1+1], *pp; |
| caddr_t addr; |
| ulong_t old_idx = svd->anon_index; |
| ulong_t new_idx = 0; |
| |
| /* |
| * The softlock count might be non zero |
| * because some pages are still stuck in the |
| * cache for lazy reclaim. Flush the cache |
| * now. This should drop the count to zero. |
| * [or there is really I/O going on to these |
| * pages]. Note, we have the writers lock so |
| * nothing gets inserted during the flush. |
| */ |
| if (reclaim == 1) { |
| segvn_purge(seg); |
| reclaim = 0; |
| goto retry; |
| } |
| i = btopr(seg->s_size); |
| addr = seg->s_base; |
| /* |
| * XXX break cow sharing using PAGESIZE |
| * pages. They will be relocated into larger |
| * pages at fault time. |
| */ |
| while (i-- > 0) { |
| if (ap = anon_get_ptr(amp->ahp, |
| old_idx)) { |
| error = anon_getpage(&ap, |
| &vpprot, anon_pl, PAGESIZE, |
| seg, addr, S_READ, |
| svd->cred); |
| if (error) { |
| newsvd->vpage = NULL; |
| goto out; |
| } |
| /* |
| * prot need not be computed |
| * below 'cause anon_private is |
| * going to ignore it anyway |
| * as child doesn't inherit |
| * pagelock from parent. |
| */ |
| prot = svd->pageprot ? |
| VPP_PROT( |
| &svd->vpage[ |
| seg_page(seg, addr)]) |
| : svd->prot; |
| pp = anon_private(&newap, |
| newseg, addr, prot, |
| anon_pl[0], 0, |
| newsvd->cred); |
| if (pp == NULL) { |
| /* no mem abort */ |
| newsvd->vpage = NULL; |
| error = ENOMEM; |
| goto out; |
| } |
| (void) anon_set_ptr( |
| newsvd->amp->ahp, new_idx, |
| newap, ANON_SLEEP); |
| page_unlock(pp); |
| } |
| addr += PAGESIZE; |
| old_idx++; |
| new_idx++; |
| } |
| } else { /* common case */ |
| if (seg->s_szc != 0) { |
| /* |
| * If at least one of anon slots of a |
| * large page exists then make sure |
| * all anon slots of a large page |
| * exist to avoid partial cow sharing |
| * of a large page in the future. |
| */ |
| anon_dup_fill_holes(amp->ahp, |
| svd->anon_index, newsvd->amp->ahp, |
| 0, seg->s_size, seg->s_szc, |
| svd->vp != NULL); |
| } else { |
| anon_dup(amp->ahp, svd->anon_index, |
| newsvd->amp->ahp, 0, seg->s_size); |
| } |
| |
| hat_clrattr(seg->s_as->a_hat, seg->s_base, |
| seg->s_size, PROT_WRITE); |
| } |
| } |
| } |
| /* |
| * If necessary, create a vpage structure for the new segment. |
| * Do not copy any page lock indications. |
| */ |
| if (svd->vpage != NULL) { |
| uint_t i; |
| struct vpage *ovp = svd->vpage; |
| struct vpage *nvp; |
| |
| nvp = newsvd->vpage = |
| kmem_alloc(vpgtob(npages), KM_SLEEP); |
| for (i = 0; i < npages; i++) { |
| *nvp = *ovp++; |
| VPP_CLRPPLOCK(nvp++); |
| } |
| } else |
| newsvd->vpage = NULL; |
| |
| /* Inform the vnode of the new mapping */ |
| if (newsvd->vp != NULL) { |
| error = VOP_ADDMAP(newsvd->vp, (offset_t)newsvd->offset, |
| newseg->s_as, newseg->s_base, newseg->s_size, newsvd->prot, |
| newsvd->maxprot, newsvd->type, newsvd->cred, NULL); |
| } |
| out: |
| if (error == 0 && HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { |
| ASSERT(newsvd->amp == NULL); |
| ASSERT(newsvd->tr_state == SEGVN_TR_OFF); |
| newsvd->rcookie = svd->rcookie; |
| hat_dup_region(newseg->s_as->a_hat, newsvd->rcookie); |
| } |
| return (error); |
| } |
| |
| |
| /* |
| * callback function to invoke free_vp_pages() for only those pages actually |
| * processed by the HAT when a shared region is destroyed. |
| */ |
| extern int free_pages; |
| |
| static void |
| segvn_hat_rgn_unload_callback(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr, |
| size_t r_size, void *r_obj, u_offset_t r_objoff) |
| { |
| u_offset_t off; |
| size_t len; |
| vnode_t *vp = (vnode_t *)r_obj; |
| |
| ASSERT(eaddr > saddr); |
| ASSERT(saddr >= r_saddr); |
| ASSERT(saddr < r_saddr + r_size); |
| ASSERT(eaddr > r_saddr); |
| ASSERT(eaddr <= r_saddr + r_size); |
| ASSERT(vp != NULL); |
| |
| if (!free_pages) { |
| return; |
| } |
| |
| len = eaddr - saddr; |
| off = (saddr - r_saddr) + r_objoff; |
| free_vp_pages(vp, off, len); |
| } |
| |
| /* |
| * callback function used by segvn_unmap to invoke free_vp_pages() for only |
| * those pages actually processed by the HAT |
| */ |
| static void |
| segvn_hat_unload_callback(hat_callback_t *cb) |
| { |
| struct seg *seg = cb->hcb_data; |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| size_t len; |
| u_offset_t off; |
| |
| ASSERT(svd->vp != NULL); |
| ASSERT(cb->hcb_end_addr > cb->hcb_start_addr); |
| ASSERT(cb->hcb_start_addr >= seg->s_base); |
| |
| len = cb->hcb_end_addr - cb->hcb_start_addr; |
| off = cb->hcb_start_addr - seg->s_base; |
| free_vp_pages(svd->vp, svd->offset + off, len); |
| } |
| |
| /* |
| * This function determines the number of bytes of swap reserved by |
| * a segment for which per-page accounting is present. It is used to |
| * calculate the correct value of a segvn_data's swresv. |
| */ |
| static size_t |
| segvn_count_swap_by_vpages(struct seg *seg) |
| { |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| struct vpage *vp, *evp; |
| size_t nswappages = 0; |
| |
| ASSERT(svd->pageswap); |
| ASSERT(svd->vpage != NULL); |
| |
| evp = &svd->vpage[seg_page(seg, seg->s_base + seg->s_size)]; |
| |
| for (vp = svd->vpage; vp < evp; vp++) { |
| if (VPP_ISSWAPRES(vp)) |
| nswappages++; |
| } |
| |
| return (nswappages << PAGESHIFT); |
| } |
| |
| static int |
| segvn_unmap(struct seg *seg, caddr_t addr, size_t len) |
| { |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| struct segvn_data *nsvd; |
| struct seg *nseg; |
| struct anon_map *amp; |
| pgcnt_t opages; /* old segment size in pages */ |
| pgcnt_t npages; /* new segment size in pages */ |
| pgcnt_t dpages; /* pages being deleted (unmapped) */ |
| hat_callback_t callback; /* used for free_vp_pages() */ |
| hat_callback_t *cbp = NULL; |
| caddr_t nbase; |
| size_t nsize; |
| size_t oswresv; |
| int reclaim = 1; |
| |
| /* |
| * We don't need any segment level locks for "segvn" data |
| * since the address space is "write" locked. |
| */ |
| ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); |
| |
| /* |
| * Fail the unmap if pages are SOFTLOCKed through this mapping. |
| * softlockcnt is protected from change by the as write lock. |
| */ |
| retry: |
| if (svd->softlockcnt > 0) { |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| |
| /* |
| * If this is shared segment non 0 softlockcnt |
| * means locked pages are still in use. |
| */ |
| if (svd->type == MAP_SHARED) { |
| return (EAGAIN); |
| } |
| |
| /* |
| * since we do have the writers lock nobody can fill |
| * the cache during the purge. The flush either succeeds |
| * or we still have pending I/Os. |
| */ |
| if (reclaim == 1) { |
| segvn_purge(seg); |
| reclaim = 0; |
| goto retry; |
| } |
| return (EAGAIN); |
| } |
| |
| /* |
| * Check for bad sizes |
| */ |
| if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || |
| (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) { |
| panic("segvn_unmap"); |
| /*NOTREACHED*/ |
| } |
| |
| if (seg->s_szc != 0) { |
| size_t pgsz = page_get_pagesize(seg->s_szc); |
| int err; |
| if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { |
| ASSERT(seg->s_base != addr || seg->s_size != len); |
| if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { |
| ASSERT(svd->amp == NULL); |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| hat_leave_region(seg->s_as->a_hat, |
| svd->rcookie, HAT_REGION_TEXT); |
| svd->rcookie = HAT_INVALID_REGION_COOKIE; |
| /* |
| * could pass a flag to segvn_demote_range() |
| * below to tell it not to do any unloads but |
| * this case is rare enough to not bother for |
| * now. |
| */ |
| } else if (svd->tr_state == SEGVN_TR_INIT) { |
| svd->tr_state = SEGVN_TR_OFF; |
| } else if (svd->tr_state == SEGVN_TR_ON) { |
| ASSERT(svd->amp != NULL); |
| segvn_textunrepl(seg, 1); |
| ASSERT(svd->amp == NULL); |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| } |
| VM_STAT_ADD(segvnvmstats.demoterange[0]); |
| err = segvn_demote_range(seg, addr, len, SDR_END, 0); |
| if (err == 0) { |
| return (IE_RETRY); |
| } |
| return (err); |
| } |
| } |
| |
| /* Inform the vnode of the unmapping. */ |
| if (svd->vp) { |
| int error; |
| |
| error = VOP_DELMAP(svd->vp, |
| (offset_t)svd->offset + (uintptr_t)(addr - seg->s_base), |
| seg->s_as, addr, len, svd->prot, svd->maxprot, |
| svd->type, svd->cred, NULL); |
| |
| if (error == EAGAIN) |
| return (error); |
| } |
| |
| /* |
| * Remove any page locks set through this mapping. |
| * If text replication is not off no page locks could have been |
| * established via this mapping. |
| */ |
| if (svd->tr_state == SEGVN_TR_OFF) { |
| (void) segvn_lockop(seg, addr, len, 0, MC_UNLOCK, NULL, 0); |
| } |
| |
| if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { |
| ASSERT(svd->amp == NULL); |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| ASSERT(svd->type == MAP_PRIVATE); |
| hat_leave_region(seg->s_as->a_hat, svd->rcookie, |
| HAT_REGION_TEXT); |
| svd->rcookie = HAT_INVALID_REGION_COOKIE; |
| } else if (svd->tr_state == SEGVN_TR_ON) { |
| ASSERT(svd->amp != NULL); |
| ASSERT(svd->pageprot == 0 && !(svd->prot & PROT_WRITE)); |
| segvn_textunrepl(seg, 1); |
| ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF); |
| } else { |
| if (svd->tr_state != SEGVN_TR_OFF) { |
| ASSERT(svd->tr_state == SEGVN_TR_INIT); |
| svd->tr_state = SEGVN_TR_OFF; |
| } |
| /* |
| * Unload any hardware translations in the range to be taken |
| * out. Use a callback to invoke free_vp_pages() effectively. |
| */ |
| if (svd->vp != NULL && free_pages != 0) { |
| callback.hcb_data = seg; |
| callback.hcb_function = segvn_hat_unload_callback; |
| cbp = &callback; |
| } |
| hat_unload_callback(seg->s_as->a_hat, addr, len, |
| HAT_UNLOAD_UNMAP, cbp); |
| |
| if (svd->type == MAP_SHARED && svd->vp != NULL && |
| (svd->vp->v_flag & VVMEXEC) && |
| ((svd->prot & PROT_WRITE) || svd->pageprot)) { |
| segvn_inval_trcache(svd->vp); |
| } |
| } |
| |
| /* |
| * Check for entire segment |
| */ |
| if (addr == seg->s_base && len == seg->s_size) { |
| seg_free(seg); |
| return (0); |
| } |
| |
| opages = seg_pages(seg); |
| dpages = btop(len); |
| npages = opages - dpages; |
| amp = svd->amp; |
| ASSERT(amp == NULL || amp->a_szc >= seg->s_szc); |
| |
| /* |
| * Check for beginning of segment |
| */ |
| if (addr == seg->s_base) { |
| if (svd->vpage != NULL) { |
| size_t nbytes; |
| struct vpage *ovpage; |
| |
| ovpage = svd->vpage; /* keep pointer to vpage */ |
| |
| nbytes = vpgtob(npages); |
| svd->vpage = kmem_alloc(nbytes, KM_SLEEP); |
| bcopy(&ovpage[dpages], svd->vpage, nbytes); |
| |
| /* free up old vpage */ |
| kmem_free(ovpage, vpgtob(opages)); |
| } |
| if (amp != NULL) { |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { |
| /* |
| * Shared anon map is no longer in use. Before |
| * freeing its pages purge all entries from |
| * pcache that belong to this amp. |
| */ |
| if (svd->type == MAP_SHARED) { |
| ASSERT(amp->refcnt == 1); |
| ASSERT(svd->softlockcnt == 0); |
| anonmap_purge(amp); |
| } |
| /* |
| * Free up now unused parts of anon_map array. |
| */ |
| if (amp->a_szc == seg->s_szc) { |
| if (seg->s_szc != 0) { |
| anon_free_pages(amp->ahp, |
| svd->anon_index, len, |
| seg->s_szc); |
| } else { |
| anon_free(amp->ahp, |
| svd->anon_index, |
| len); |
| } |
| } else { |
| ASSERT(svd->type == MAP_SHARED); |
| ASSERT(amp->a_szc > seg->s_szc); |
| anon_shmap_free_pages(amp, |
| svd->anon_index, len); |
| } |
| |
| /* |
| * Unreserve swap space for the |
| * unmapped chunk of this segment in |
| * case it's MAP_SHARED |
| */ |
| if (svd->type == MAP_SHARED) { |
| anon_unresv_zone(len, |
| seg->s_as->a_proc->p_zone); |
| amp->swresv -= len; |
| } |
| } |
| ANON_LOCK_EXIT(&->a_rwlock); |
| svd->anon_index += dpages; |
| } |
| if (svd->vp != NULL) |
| svd->offset += len; |
| |
| seg->s_base += len; |
| seg->s_size -= len; |
| |
| if (svd->swresv) { |
| if (svd->flags & MAP_NORESERVE) { |
| ASSERT(amp); |
| oswresv = svd->swresv; |
| |
| svd->swresv = ptob(anon_pages(amp->ahp, |
| svd->anon_index, npages)); |
| anon_unresv_zone(oswresv - svd->swresv, |
| seg->s_as->a_proc->p_zone); |
| if (SEG_IS_PARTIAL_RESV(seg)) |
| seg->s_as->a_resvsize -= oswresv - |
| svd->swresv; |
| } else { |
| size_t unlen; |
| |
| if (svd->pageswap) { |
| oswresv = svd->swresv; |
| svd->swresv = |
| segvn_count_swap_by_vpages(seg); |
| ASSERT(oswresv >= svd->swresv); |
| unlen = oswresv - svd->swresv; |
| } else { |
| svd->swresv -= len; |
| ASSERT(svd->swresv == seg->s_size); |
| unlen = len; |
| } |
| anon_unresv_zone(unlen, |
| seg->s_as->a_proc->p_zone); |
| } |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", |
| seg, len, 0); |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * Check for end of segment |
| */ |
| if (addr + len == seg->s_base + seg->s_size) { |
| if (svd->vpage != NULL) { |
| size_t nbytes; |
| struct vpage *ovpage; |
| |
| ovpage = svd->vpage; /* keep pointer to vpage */ |
| |
| nbytes = vpgtob(npages); |
| svd->vpage = kmem_alloc(nbytes, KM_SLEEP); |
| bcopy(ovpage, svd->vpage, nbytes); |
| |
| /* free up old vpage */ |
| kmem_free(ovpage, vpgtob(opages)); |
| |
| } |
| if (amp != NULL) { |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { |
| /* |
| * Free up now unused parts of anon_map array. |
| */ |
| ulong_t an_idx = svd->anon_index + npages; |
| |
| /* |
| * Shared anon map is no longer in use. Before |
| * freeing its pages purge all entries from |
| * pcache that belong to this amp. |
| */ |
| if (svd->type == MAP_SHARED) { |
| ASSERT(amp->refcnt == 1); |
| ASSERT(svd->softlockcnt == 0); |
| anonmap_purge(amp); |
| } |
| |
| if (amp->a_szc == seg->s_szc) { |
| if (seg->s_szc != 0) { |
| anon_free_pages(amp->ahp, |
| an_idx, len, |
| seg->s_szc); |
| } else { |
| anon_free(amp->ahp, an_idx, |
| len); |
| } |
| } else { |
| ASSERT(svd->type == MAP_SHARED); |
| ASSERT(amp->a_szc > seg->s_szc); |
| anon_shmap_free_pages(amp, |
| an_idx, len); |
| } |
| |
| /* |
| * Unreserve swap space for the |
| * unmapped chunk of this segment in |
| * case it's MAP_SHARED |
| */ |
| if (svd->type == MAP_SHARED) { |
| anon_unresv_zone(len, |
| seg->s_as->a_proc->p_zone); |
| amp->swresv -= len; |
| } |
| } |
| ANON_LOCK_EXIT(&->a_rwlock); |
| } |
| |
| seg->s_size -= len; |
| |
| if (svd->swresv) { |
| if (svd->flags & MAP_NORESERVE) { |
| ASSERT(amp); |
| oswresv = svd->swresv; |
| svd->swresv = ptob(anon_pages(amp->ahp, |
| svd->anon_index, npages)); |
| anon_unresv_zone(oswresv - svd->swresv, |
| seg->s_as->a_proc->p_zone); |
| if (SEG_IS_PARTIAL_RESV(seg)) |
| seg->s_as->a_resvsize -= oswresv - |
| svd->swresv; |
| } else { |
| size_t unlen; |
| |
| if (svd->pageswap) { |
| oswresv = svd->swresv; |
| svd->swresv = |
| segvn_count_swap_by_vpages(seg); |
| ASSERT(oswresv >= svd->swresv); |
| unlen = oswresv - svd->swresv; |
| } else { |
| svd->swresv -= len; |
| ASSERT(svd->swresv == seg->s_size); |
| unlen = len; |
| } |
| anon_unresv_zone(unlen, |
| seg->s_as->a_proc->p_zone); |
| } |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, |
| "anon proc:%p %lu %u", seg, len, 0); |
| } |
| |
| return (0); |
| } |
| |
| /* |
| * The section to go is in the middle of the segment, |
| * have to make it into two segments. nseg is made for |
| * the high end while seg is cut down at the low end. |
| */ |
| nbase = addr + len; /* new seg base */ |
| nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */ |
| seg->s_size = addr - seg->s_base; /* shrink old seg */ |
| nseg = seg_alloc(seg->s_as, nbase, nsize); |
| if (nseg == NULL) { |
| panic("segvn_unmap seg_alloc"); |
| /*NOTREACHED*/ |
| } |
| nseg->s_ops = seg->s_ops; |
| nsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP); |
| nseg->s_data = (void *)nsvd; |
| nseg->s_szc = seg->s_szc; |
| *nsvd = *svd; |
| nsvd->seg = nseg; |
| nsvd->offset = svd->offset + (uintptr_t)(nseg->s_base - seg->s_base); |
| nsvd->swresv = 0; |
| nsvd->softlockcnt = 0; |
| nsvd->softlockcnt_sbase = 0; |
| nsvd->softlockcnt_send = 0; |
| ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE); |
| |
| if (svd->vp != NULL) { |
| VN_HOLD(nsvd->vp); |
| if (nsvd->type == MAP_SHARED) |
| lgrp_shm_policy_init(NULL, nsvd->vp); |
| } |
| crhold(svd->cred); |
| |
| if (svd->vpage == NULL) { |
| nsvd->vpage = NULL; |
| } else { |
| /* need to split vpage into two arrays */ |
| size_t nbytes; |
| struct vpage *ovpage; |
| |
| ovpage = svd->vpage; /* keep pointer to vpage */ |
| |
| npages = seg_pages(seg); /* seg has shrunk */ |
| nbytes = vpgtob(npages); |
| svd->vpage = kmem_alloc(nbytes, KM_SLEEP); |
| |
| bcopy(ovpage, svd->vpage, nbytes); |
| |
| npages = seg_pages(nseg); |
| nbytes = vpgtob(npages); |
| nsvd->vpage = kmem_alloc(nbytes, KM_SLEEP); |
| |
| bcopy(&ovpage[opages - npages], nsvd->vpage, nbytes); |
| |
| /* free up old vpage */ |
| kmem_free(ovpage, vpgtob(opages)); |
| } |
| |
| if (amp == NULL) { |
| nsvd->amp = NULL; |
| nsvd->anon_index = 0; |
| } else { |
| /* |
| * Need to create a new anon map for the new segment. |
| * We'll also allocate a new smaller array for the old |
| * smaller segment to save space. |
| */ |
| opages = btop((uintptr_t)(addr - seg->s_base)); |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { |
| /* |
| * Free up now unused parts of anon_map array. |
| */ |
| ulong_t an_idx = svd->anon_index + opages; |
| |
| /* |
| * Shared anon map is no longer in use. Before |
| * freeing its pages purge all entries from |
| * pcache that belong to this amp. |
| */ |
| if (svd->type == MAP_SHARED) { |
| ASSERT(amp->refcnt == 1); |
| ASSERT(svd->softlockcnt == 0); |
| anonmap_purge(amp); |
| } |
| |
| if (amp->a_szc == seg->s_szc) { |
| if (seg->s_szc != 0) { |
| anon_free_pages(amp->ahp, an_idx, len, |
| seg->s_szc); |
| } else { |
| anon_free(amp->ahp, an_idx, |
| len); |
| } |
| } else { |
| ASSERT(svd->type == MAP_SHARED); |
| ASSERT(amp->a_szc > seg->s_szc); |
| anon_shmap_free_pages(amp, an_idx, len); |
| } |
| |
| /* |
| * Unreserve swap space for the |
| * unmapped chunk of this segment in |
| * case it's MAP_SHARED |
| */ |
| if (svd->type == MAP_SHARED) { |
| anon_unresv_zone(len, |
| seg->s_as->a_proc->p_zone); |
| amp->swresv -= len; |
| } |
| } |
| nsvd->anon_index = svd->anon_index + |
| btop((uintptr_t)(nseg->s_base - seg->s_base)); |
| if (svd->type == MAP_SHARED) { |
| amp->refcnt++; |
| nsvd->amp = amp; |
| } else { |
| struct anon_map *namp; |
| struct anon_hdr *nahp; |
| |
| ASSERT(svd->type == MAP_PRIVATE); |
| nahp = anon_create(btop(seg->s_size), ANON_SLEEP); |
| namp = anonmap_alloc(nseg->s_size, 0, ANON_SLEEP); |
| namp->a_szc = seg->s_szc; |
| (void) anon_copy_ptr(amp->ahp, svd->anon_index, nahp, |
| 0, btop(seg->s_size), ANON_SLEEP); |
| (void) anon_copy_ptr(amp->ahp, nsvd->anon_index, |
| namp->ahp, 0, btop(nseg->s_size), ANON_SLEEP); |
| anon_release(amp->ahp, btop(amp->size)); |
| svd->anon_index = 0; |
| nsvd->anon_index = 0; |
| amp->ahp = nahp; |
| amp->size = seg->s_size; |
| nsvd->amp = namp; |
| } |
| ANON_LOCK_EXIT(&->a_rwlock); |
| } |
| if (svd->swresv) { |
| if (svd->flags & MAP_NORESERVE) { |
| ASSERT(amp); |
| oswresv = svd->swresv; |
| svd->swresv = ptob(anon_pages(amp->ahp, |
| svd->anon_index, btop(seg->s_size))); |
| nsvd->swresv = ptob(anon_pages(nsvd->amp->ahp, |
| nsvd->anon_index, btop(nseg->s_size))); |
| ASSERT(oswresv >= (svd->swresv + nsvd->swresv)); |
| anon_unresv_zone(oswresv - (svd->swresv + nsvd->swresv), |
| seg->s_as->a_proc->p_zone); |
| if (SEG_IS_PARTIAL_RESV(seg)) |
| seg->s_as->a_resvsize -= oswresv - |
| (svd->swresv + nsvd->swresv); |
| } else { |
| size_t unlen; |
| |
| if (svd->pageswap) { |
| oswresv = svd->swresv; |
| svd->swresv = segvn_count_swap_by_vpages(seg); |
| nsvd->swresv = segvn_count_swap_by_vpages(nseg); |
| ASSERT(oswresv >= (svd->swresv + nsvd->swresv)); |
| unlen = oswresv - (svd->swresv + nsvd->swresv); |
| } else { |
| if (seg->s_size + nseg->s_size + len != |
| svd->swresv) { |
| panic("segvn_unmap: cannot split " |
| "swap reservation"); |
| /*NOTREACHED*/ |
| } |
| svd->swresv = seg->s_size; |
| nsvd->swresv = nseg->s_size; |
| unlen = len; |
| } |
| anon_unresv_zone(unlen, |
| seg->s_as->a_proc->p_zone); |
| } |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", |
| seg, len, 0); |
| } |
| |
| return (0); /* I'm glad that's all over with! */ |
| } |
| |
| static void |
| segvn_free(struct seg *seg) |
| { |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| pgcnt_t npages = seg_pages(seg); |
| struct anon_map *amp; |
| size_t len; |
| |
| /* |
| * We don't need any segment level locks for "segvn" data |
| * since the address space is "write" locked. |
| */ |
| ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| |
| ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); |
| |
| /* |
| * Be sure to unlock pages. XXX Why do things get free'ed instead |
| * of unmapped? XXX |
| */ |
| (void) segvn_lockop(seg, seg->s_base, seg->s_size, |
| 0, MC_UNLOCK, NULL, 0); |
| |
| /* |
| * Deallocate the vpage and anon pointers if necessary and possible. |
| */ |
| if (svd->vpage != NULL) { |
| kmem_free(svd->vpage, vpgtob(npages)); |
| svd->vpage = NULL; |
| } |
| if ((amp = svd->amp) != NULL) { |
| /* |
| * If there are no more references to this anon_map |
| * structure, then deallocate the structure after freeing |
| * up all the anon slot pointers that we can. |
| */ |
| ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); |
| ASSERT(amp->a_szc >= seg->s_szc); |
| if (--amp->refcnt == 0) { |
| if (svd->type == MAP_PRIVATE) { |
| /* |
| * Private - we only need to anon_free |
| * the part that this segment refers to. |
| */ |
| if (seg->s_szc != 0) { |
| anon_free_pages(amp->ahp, |
| svd->anon_index, seg->s_size, |
| seg->s_szc); |
| } else { |
| anon_free(amp->ahp, svd->anon_index, |
| seg->s_size); |
| } |
| } else { |
| |
| /* |
| * Shared anon map is no longer in use. Before |
| * freeing its pages purge all entries from |
| * pcache that belong to this amp. |
| */ |
| ASSERT(svd->softlockcnt == 0); |
| anonmap_purge(amp); |
| |
| /* |
| * Shared - anon_free the entire |
| * anon_map's worth of stuff and |
| * release any swap reservation. |
| */ |
| if (amp->a_szc != 0) { |
| anon_shmap_free_pages(amp, 0, |
| amp->size); |
| } else { |
| anon_free(amp->ahp, 0, amp->size); |
| } |
| if ((len = amp->swresv) != 0) { |
| anon_unresv_zone(len, |
| seg->s_as->a_proc->p_zone); |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, |
| "anon proc:%p %lu %u", seg, len, 0); |
| } |
| } |
| svd->amp = NULL; |
| ANON_LOCK_EXIT(&->a_rwlock); |
| anonmap_free(amp); |
| } else if (svd->type == MAP_PRIVATE) { |
| /* |
| * We had a private mapping which still has |
| * a held anon_map so just free up all the |
| * anon slot pointers that we were using. |
| */ |
| if (seg->s_szc != 0) { |
| anon_free_pages(amp->ahp, svd->anon_index, |
| seg->s_size, seg->s_szc); |
| } else { |
| anon_free(amp->ahp, svd->anon_index, |
| seg->s_size); |
| } |
| ANON_LOCK_EXIT(&->a_rwlock); |
| } else { |
| ANON_LOCK_EXIT(&->a_rwlock); |
| } |
| } |
| |
| /* |
| * Release swap reservation. |
| */ |
| if ((len = svd->swresv) != 0) { |
| anon_unresv_zone(svd->swresv, |
| seg->s_as->a_proc->p_zone); |
| TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u", |
| seg, len, 0); |
| if (SEG_IS_PARTIAL_RESV(seg)) |
| seg->s_as->a_resvsize -= svd->swresv; |
| svd->swresv = 0; |
| } |
| /* |
| * Release claim on vnode, credentials, and finally free the |
| * private data. |
| */ |
| if (svd->vp != NULL) { |
| if (svd->type == MAP_SHARED) |
| lgrp_shm_policy_fini(NULL, svd->vp); |
| VN_RELE(svd->vp); |
| svd->vp = NULL; |
| } |
| crfree(svd->cred); |
| svd->pageprot = 0; |
| svd->pageadvice = 0; |
| svd->pageswap = 0; |
| svd->cred = NULL; |
| |
| /* |
| * Take segfree_syncmtx lock to let segvn_reclaim() finish if it's |
| * still working with this segment without holding as lock (in case |
| * it's called by pcache async thread). |
| */ |
| ASSERT(svd->softlockcnt == 0); |
| mutex_enter(&svd->segfree_syncmtx); |
| mutex_exit(&svd->segfree_syncmtx); |
| |
| seg->s_data = NULL; |
| kmem_cache_free(segvn_cache, svd); |
| } |
| |
| /* |
| * Do a F_SOFTUNLOCK call over the range requested. The range must have |
| * already been F_SOFTLOCK'ed. |
| * Caller must always match addr and len of a softunlock with a previous |
| * softlock with exactly the same addr and len. |
| */ |
| static void |
| segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) |
| { |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| page_t *pp; |
| caddr_t adr; |
| struct vnode *vp; |
| u_offset_t offset; |
| ulong_t anon_index; |
| struct anon_map *amp; |
| struct anon *ap = NULL; |
| |
| ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); |
| ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); |
| |
| if ((amp = svd->amp) != NULL) |
| anon_index = svd->anon_index + seg_page(seg, addr); |
| |
| if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { |
| ASSERT(svd->tr_state == SEGVN_TR_OFF); |
| hat_unlock_region(seg->s_as->a_hat, addr, len, svd->rcookie); |
| } else { |
| hat_unlock(seg->s_as->a_hat, addr, len); |
| } |
| for (adr = addr; adr < addr + len; adr += PAGESIZE) { |
| if (amp != NULL) { |
| ANON_LOCK_ENTER(&->a_rwlock, RW_READER); |
| if ((ap = anon_get_ptr(amp->ahp, anon_index++)) |
| != NULL) { |
| swap_xlate(ap, &vp, &offset); |
| } else { |
| vp = svd->vp; |
| offset = svd->offset + |
| (uintptr_t)(adr - seg->s_base); |
| } |
| ANON_LOCK_EXIT(&->a_rwlock); |
| } else { |
| vp = svd->vp; |
| offset = svd->offset + |
| (uintptr_t)(adr - seg->s_base); |
| } |
| |
| /* |
| * Use page_find() instead of page_lookup() to |
| * find the page since we know that it is locked. |
| */ |
| pp = page_find(vp, offset); |
| if (pp == NULL) { |
| panic( |
| "segvn_softunlock: addr %p, ap %p, vp %p, off %llx", |
| (void *)adr, (void *)ap, (void *)vp, offset); |
| /*NOTREACHED*/ |
| } |
| |
| if (rw == S_WRITE) { |
| hat_setrefmod(pp); |
| if (seg->s_as->a_vbits) |
| hat_setstat(seg->s_as, adr, PAGESIZE, |
| P_REF | P_MOD); |
| } else if (rw != S_OTHER) { |
| hat_setref(pp); |
| if (seg->s_as->a_vbits) |
| hat_setstat(seg->s_as, adr, PAGESIZE, P_REF); |
| } |
| TRACE_3(TR_FAC_VM, TR_SEGVN_FAULT, |
| "segvn_fault:pp %p vp %p offset %llx", pp, vp, offset); |
| page_unlock(pp); |
| } |
| ASSERT(svd->softlockcnt >= btop(len)); |
| if (!atomic_add_long_nv((ulong_t *)&svd->softlockcnt, -btop(len))) { |
| /* |
| * All SOFTLOCKS are gone. Wakeup any waiting |
| * unmappers so they can try again to unmap. |
| * Check for waiters first without the mutex |
| * held so we don't always grab the mutex on |
| * softunlocks. |
| */ |
| if (AS_ISUNMAPWAIT(seg->s_as)) { |
| mutex_enter(&seg->s_as->a_contents); |
| if (AS_ISUNMAPWAIT(seg->s_as)) { |
| AS_CLRUNMAPWAIT(seg->s_as); |
| cv_broadcast(&seg->s_as->a_cv); |
| } |
| mutex_exit(&seg->s_as->a_contents); |
| } |
| } |
| } |
| |
| #define PAGE_HANDLED ((page_t *)-1) |
| |
| /* |
| * Release all the pages in the NULL terminated ppp list |
| * which haven't already been converted to PAGE_HANDLED. |
| */ |
| static void |
| segvn_pagelist_rele(page_t **ppp) |
| { |
| for (; *ppp != NULL; ppp++) { |
| if (*ppp != PAGE_HANDLED) |
| page_unlock(*ppp); |
| } |
| } |
| |
| static int stealcow = 1; |
| |
| /* |
| * Workaround for viking chip bug. See bug id 1220902. |
| * To fix this down in pagefault() would require importing so |
| * much as and segvn code as to be unmaintainable. |
| */ |
| int enable_mbit_wa = 0; |
| |
| /* |
| * Handles all the dirty work of getting the right |
| * anonymous pages and loading up the translations. |
| * This routine is called only from segvn_fault() |
| * when looping over the range of addresses requested. |
| * |
| * The basic algorithm here is: |
| * If this is an anon_zero case |
| * Call anon_zero to allocate page |
| * Load up translation |
| * Return |
| * endif |
| * If this is an anon page |
| * Use anon_getpage to get the page |
| * else |
| * Find page in pl[] list passed in |
| * endif |
| * If not a cow |
| * Load up the translation to the page |
| * return |
| * endif |
| * Call anon_private to handle cow |
| * Load up (writable) translation to new page |
| */ |
| static faultcode_t |
| segvn_faultpage( |
| struct hat *hat, /* the hat to use for mapping */ |
| struct seg *seg, /* seg_vn of interest */ |
| caddr_t addr, /* address in as */ |
| u_offset_t off, /* offset in vp */ |
| struct vpage *vpage, /* pointer to vpage for vp, off */ |
| page_t *pl[], /* object source page pointer */ |
| uint_t vpprot, /* access allowed to object pages */ |
| enum fault_type type, /* type of fault */ |
| enum seg_rw rw, /* type of access at fault */ |
| int brkcow) /* we may need to break cow */ |
| { |
| struct segvn_data *svd = (struct segvn_data *)seg->s_data; |
| page_t *pp, **ppp; |
| uint_t pageflags = 0; |
| page_t *anon_pl[1 + 1]; |
| page_t *opp = NULL; /* original page */ |
| uint_t prot; |
| |