blob: e89cf2c06d2b9dd3658996aaefed6c39e3fd79c6 [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015, Joyent Inc. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
*/
/*
* Zones
*
* A zone is a named collection of processes, namespace constraints,
* and other system resources which comprise a secure and manageable
* application containment facility.
*
* Zones (represented by the reference counted zone_t) are tracked in
* the kernel in the zonehash. Elsewhere in the kernel, Zone IDs
* (zoneid_t) are used to track zone association. Zone IDs are
* dynamically generated when the zone is created; if a persistent
* identifier is needed (core files, accounting logs, audit trail,
* etc.), the zone name should be used.
*
*
* Global Zone:
*
* The global zone (zoneid 0) is automatically associated with all
* system resources that have not been bound to a user-created zone.
* This means that even systems where zones are not in active use
* have a global zone, and all processes, mounts, etc. are
* associated with that zone. The global zone is generally
* unconstrained in terms of privileges and access, though the usual
* credential and privilege based restrictions apply.
*
*
* Zone States:
*
* The states in which a zone may be in and the transitions are as
* follows:
*
* ZONE_IS_UNINITIALIZED: primordial state for a zone. The partially
* initialized zone is added to the list of active zones on the system but
* isn't accessible.
*
* ZONE_IS_INITIALIZED: Initialization complete except the ZSD callbacks are
* not yet completed. Not possible to enter the zone, but attributes can
* be retrieved.
*
* ZONE_IS_READY: zsched (the kernel dummy process for a zone) is
* ready. The zone is made visible after the ZSD constructor callbacks are
* executed. A zone remains in this state until it transitions into
* the ZONE_IS_BOOTING state as a result of a call to zone_boot().
*
* ZONE_IS_BOOTING: in this shortlived-state, zsched attempts to start
* init. Should that fail, the zone proceeds to the ZONE_IS_SHUTTING_DOWN
* state.
*
* ZONE_IS_RUNNING: The zone is open for business: zsched has
* successfully started init. A zone remains in this state until
* zone_shutdown() is called.
*
* ZONE_IS_SHUTTING_DOWN: zone_shutdown() has been called, the system is
* killing all processes running in the zone. The zone remains
* in this state until there are no more user processes running in the zone.
* zone_create(), zone_enter(), and zone_destroy() on this zone will fail.
* Since zone_shutdown() is restartable, it may be called successfully
* multiple times for the same zone_t. Setting of the zone's state to
* ZONE_IS_SHUTTING_DOWN is synchronized with mounts, so VOP_MOUNT() may check
* the zone's status without worrying about it being a moving target.
*
* ZONE_IS_EMPTY: zone_shutdown() has been called, and there
* are no more user processes in the zone. The zone remains in this
* state until there are no more kernel threads associated with the
* zone. zone_create(), zone_enter(), and zone_destroy() on this zone will
* fail.
*
* ZONE_IS_DOWN: All kernel threads doing work on behalf of the zone
* have exited. zone_shutdown() returns. Henceforth it is not possible to
* join the zone or create kernel threads therein.
*
* ZONE_IS_DYING: zone_destroy() has been called on the zone; zone
* remains in this state until zsched exits. Calls to zone_find_by_*()
* return NULL from now on.
*
* ZONE_IS_DEAD: zsched has exited (zone_ntasks == 0). There are no
* processes or threads doing work on behalf of the zone. The zone is
* removed from the list of active zones. zone_destroy() returns, and
* the zone can be recreated.
*
* ZONE_IS_FREE (internal state): zone_ref goes to 0, ZSD destructor
* callbacks are executed, and all memory associated with the zone is
* freed.
*
* Threads can wait for the zone to enter a requested state by using
* zone_status_wait() or zone_status_timedwait() with the desired
* state passed in as an argument. Zone state transitions are
* uni-directional; it is not possible to move back to an earlier state.
*
*
* Zone-Specific Data:
*
* Subsystems needing to maintain zone-specific data can store that
* data using the ZSD mechanism. This provides a zone-specific data
* store, similar to thread-specific data (see pthread_getspecific(3C)
* or the TSD code in uts/common/disp/thread.c. Also, ZSD can be used
* to register callbacks to be invoked when a zone is created, shut
* down, or destroyed. This can be used to initialize zone-specific
* data for new zones and to clean up when zones go away.
*
*
* Data Structures:
*
* The per-zone structure (zone_t) is reference counted, and freed
* when all references are released. zone_hold and zone_rele can be
* used to adjust the reference count. In addition, reference counts
* associated with the cred_t structure are tracked separately using
* zone_cred_hold and zone_cred_rele.
*
* Pointers to active zone_t's are stored in two hash tables; one
* for searching by id, the other for searching by name. Lookups
* can be performed on either basis, using zone_find_by_id and
* zone_find_by_name. Both return zone_t pointers with the zone
* held, so zone_rele should be called when the pointer is no longer
* needed. Zones can also be searched by path; zone_find_by_path
* returns the zone with which a path name is associated (global
* zone if the path is not within some other zone's file system
* hierarchy). This currently requires iterating through each zone,
* so it is slower than an id or name search via a hash table.
*
*
* Locking:
*
* zonehash_lock: This is a top-level global lock used to protect the
* zone hash tables and lists. Zones cannot be created or destroyed
* while this lock is held.
* zone_status_lock: This is a global lock protecting zone state.
* Zones cannot change state while this lock is held. It also
* protects the list of kernel threads associated with a zone.
* zone_lock: This is a per-zone lock used to protect several fields of
* the zone_t (see <sys/zone.h> for details). In addition, holding
* this lock means that the zone cannot go away.
* zone_nlwps_lock: This is a per-zone lock used to protect the fields
* related to the zone.max-lwps rctl.
* zone_mem_lock: This is a per-zone lock used to protect the fields
* related to the zone.max-locked-memory and zone.max-swap rctls.
* zone_rctl_lock: This is a per-zone lock used to protect other rctls,
* currently just max_lofi
* zsd_key_lock: This is a global lock protecting the key state for ZSD.
* zone_deathrow_lock: This is a global lock protecting the "deathrow"
* list (a list of zones in the ZONE_IS_DEAD state).
*
* Ordering requirements:
* pool_lock --> cpu_lock --> zonehash_lock --> zone_status_lock -->
* zone_lock --> zsd_key_lock --> pidlock --> p_lock
*
* When taking zone_mem_lock or zone_nlwps_lock, the lock ordering is:
* zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_mem_lock
* zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_nlwps_lock
*
* Blocking memory allocations are permitted while holding any of the
* zone locks.
*
*
* System Call Interface:
*
* The zone subsystem can be managed and queried from user level with
* the following system calls (all subcodes of the primary "zone"
* system call):
* - zone_create: creates a zone with selected attributes (name,
* root path, privileges, resource controls, ZFS datasets)
* - zone_enter: allows the current process to enter a zone
* - zone_getattr: reports attributes of a zone
* - zone_setattr: set attributes of a zone
* - zone_boot: set 'init' running for the zone
* - zone_list: lists all zones active in the system
* - zone_lookup: looks up zone id based on name
* - zone_shutdown: initiates shutdown process (see states above)
* - zone_destroy: completes shutdown process (see states above)
*
*/
#include <sys/priv_impl.h>
#include <sys/cred.h>
#include <c2/audit.h>
#include <sys/debug.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/kstat.h>
#include <sys/mutex.h>
#include <sys/note.h>
#include <sys/pathname.h>
#include <sys/proc.h>
#include <sys/project.h>
#include <sys/sysevent.h>
#include <sys/task.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/vnode.h>
#include <sys/vfs.h>
#include <sys/systeminfo.h>
#include <sys/policy.h>
#include <sys/cred_impl.h>
#include <sys/contract_impl.h>
#include <sys/contract/process_impl.h>
#include <sys/class.h>
#include <sys/pool.h>
#include <sys/pool_pset.h>
#include <sys/pset.h>
#include <sys/strlog.h>
#include <sys/sysmacros.h>
#include <sys/callb.h>
#include <sys/vmparam.h>
#include <sys/corectl.h>
#include <sys/ipc_impl.h>
#include <sys/klpd.h>
#include <sys/door.h>
#include <sys/cpuvar.h>
#include <sys/sdt.h>
#include <sys/uadmin.h>
#include <sys/session.h>
#include <sys/cmn_err.h>
#include <sys/modhash.h>
#include <sys/sunddi.h>
#include <sys/nvpair.h>
#include <sys/rctl.h>
#include <sys/fss.h>
#include <sys/brand.h>
#include <sys/zone.h>
#include <net/if.h>
#include <sys/cpucaps.h>
#include <vm/seg.h>
#include <sys/mac.h>
/*
* This constant specifies the number of seconds that threads waiting for
* subsystems to release a zone's general-purpose references will wait before
* they log the zone's reference counts. The constant's value shouldn't
* be so small that reference counts are unnecessarily reported for zones
* whose references are slowly released. On the other hand, it shouldn't be so
* large that users reboot their systems out of frustration over hung zones
* before the system logs the zones' reference counts.
*/
#define ZONE_DESTROY_TIMEOUT_SECS 60
/* List of data link IDs which are accessible from the zone */
typedef struct zone_dl {
datalink_id_t zdl_id;
nvlist_t *zdl_net;
list_node_t zdl_linkage;
} zone_dl_t;
/*
* cv used to signal that all references to the zone have been released. This
* needs to be global since there may be multiple waiters, and the first to
* wake up will free the zone_t, hence we cannot use zone->zone_cv.
*/
static kcondvar_t zone_destroy_cv;
/*
* Lock used to serialize access to zone_cv. This could have been per-zone,
* but then we'd need another lock for zone_destroy_cv, and why bother?
*/
static kmutex_t zone_status_lock;
/*
* ZSD-related global variables.
*/
static kmutex_t zsd_key_lock; /* protects the following two */
/*
* The next caller of zone_key_create() will be assigned a key of ++zsd_keyval.
*/
static zone_key_t zsd_keyval = 0;
/*
* Global list of registered keys. We use this when a new zone is created.
*/
static list_t zsd_registered_keys;
int zone_hash_size = 256;
static mod_hash_t *zonehashbyname, *zonehashbyid, *zonehashbylabel;
static kmutex_t zonehash_lock;
static uint_t zonecount;
static id_space_t *zoneid_space;
/*
* The global zone (aka zone0) is the all-seeing, all-knowing zone in which the
* kernel proper runs, and which manages all other zones.
*
* Although not declared as static, the variable "zone0" should not be used
* except for by code that needs to reference the global zone early on in boot,
* before it is fully initialized. All other consumers should use
* 'global_zone'.
*/
zone_t zone0;
zone_t *global_zone = NULL; /* Set when the global zone is initialized */
/*
* List of active zones, protected by zonehash_lock.
*/
static list_t zone_active;
/*
* List of destroyed zones that still have outstanding cred references.
* Used for debugging. Uses a separate lock to avoid lock ordering
* problems in zone_free.
*/
static list_t zone_deathrow;
static kmutex_t zone_deathrow_lock;
/* number of zones is limited by virtual interface limit in IP */
uint_t maxzones = 8192;
/* Event channel to sent zone state change notifications */
evchan_t *zone_event_chan;
/*
* This table holds the mapping from kernel zone states to
* states visible in the state notification API.
* The idea is that we only expose "obvious" states and
* do not expose states which are just implementation details.
*/
const char *zone_status_table[] = {
ZONE_EVENT_UNINITIALIZED, /* uninitialized */
ZONE_EVENT_INITIALIZED, /* initialized */
ZONE_EVENT_READY, /* ready */
ZONE_EVENT_READY, /* booting */
ZONE_EVENT_RUNNING, /* running */
ZONE_EVENT_SHUTTING_DOWN, /* shutting_down */
ZONE_EVENT_SHUTTING_DOWN, /* empty */
ZONE_EVENT_SHUTTING_DOWN, /* down */
ZONE_EVENT_SHUTTING_DOWN, /* dying */
ZONE_EVENT_UNINITIALIZED, /* dead */
};
/*
* This array contains the names of the subsystems listed in zone_ref_subsys_t
* (see sys/zone.h).
*/
static char *zone_ref_subsys_names[] = {
"NFS", /* ZONE_REF_NFS */
"NFSv4", /* ZONE_REF_NFSV4 */
"SMBFS", /* ZONE_REF_SMBFS */
"MNTFS", /* ZONE_REF_MNTFS */
"LOFI", /* ZONE_REF_LOFI */
"VFS", /* ZONE_REF_VFS */
"IPC" /* ZONE_REF_IPC */
};
/*
* This isn't static so lint doesn't complain.
*/
rctl_hndl_t rc_zone_cpu_shares;
rctl_hndl_t rc_zone_locked_mem;
rctl_hndl_t rc_zone_max_swap;
rctl_hndl_t rc_zone_max_lofi;
rctl_hndl_t rc_zone_cpu_cap;
rctl_hndl_t rc_zone_nlwps;
rctl_hndl_t rc_zone_nprocs;
rctl_hndl_t rc_zone_shmmax;
rctl_hndl_t rc_zone_shmmni;
rctl_hndl_t rc_zone_semmni;
rctl_hndl_t rc_zone_msgmni;
const char * const zone_default_initname = "/sbin/init";
static char * const zone_prefix = "/zone/";
static int zone_shutdown(zoneid_t zoneid);
static int zone_add_datalink(zoneid_t, datalink_id_t);
static int zone_remove_datalink(zoneid_t, datalink_id_t);
static int zone_list_datalink(zoneid_t, int *, datalink_id_t *);
static int zone_set_network(zoneid_t, zone_net_data_t *);
static int zone_get_network(zoneid_t, zone_net_data_t *);
typedef boolean_t zsd_applyfn_t(kmutex_t *, boolean_t, zone_t *, zone_key_t);
static void zsd_apply_all_zones(zsd_applyfn_t *, zone_key_t);
static void zsd_apply_all_keys(zsd_applyfn_t *, zone_t *);
static boolean_t zsd_apply_create(kmutex_t *, boolean_t, zone_t *, zone_key_t);
static boolean_t zsd_apply_shutdown(kmutex_t *, boolean_t, zone_t *,
zone_key_t);
static boolean_t zsd_apply_destroy(kmutex_t *, boolean_t, zone_t *, zone_key_t);
static boolean_t zsd_wait_for_creator(zone_t *, struct zsd_entry *,
kmutex_t *);
static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
kmutex_t *);
/*
* Bump this number when you alter the zone syscall interfaces; this is
* because we need to have support for previous API versions in libc
* to support patching; libc calls into the kernel to determine this number.
*
* Version 1 of the API is the version originally shipped with Solaris 10
* Version 2 alters the zone_create system call in order to support more
* arguments by moving the args into a structure; and to do better
* error reporting when zone_create() fails.
* Version 3 alters the zone_create system call in order to support the
* import of ZFS datasets to zones.
* Version 4 alters the zone_create system call in order to support
* Trusted Extensions.
* Version 5 alters the zone_boot system call, and converts its old
* bootargs parameter to be set by the zone_setattr API instead.
* Version 6 adds the flag argument to zone_create.
*/
static const int ZONE_SYSCALL_API_VERSION = 6;
/*
* Certain filesystems (such as NFS and autofs) need to know which zone
* the mount is being placed in. Because of this, we need to be able to
* ensure that a zone isn't in the process of being created/destroyed such
* that nfs_mount() thinks it is in the global/NGZ zone, while by the time
* it gets added the list of mounted zones, it ends up on the wrong zone's
* mount list. Since a zone can't reside on an NFS file system, we don't
* have to worry about the zonepath itself.
*
* The following functions: block_mounts()/resume_mounts() and
* mount_in_progress()/mount_completed() are used by zones and the VFS
* layer (respectively) to synchronize zone state transitions and new
* mounts within a zone. This syncronization is on a per-zone basis, so
* activity for one zone will not interfere with activity for another zone.
*
* The semantics are like a reader-reader lock such that there may
* either be multiple mounts (or zone state transitions, if that weren't
* serialized by zonehash_lock) in progress at the same time, but not
* both.
*
* We use cv's so the user can ctrl-C out of the operation if it's
* taking too long.
*
* The semantics are such that there is unfair bias towards the
* "current" operation. This means that zone halt may starve if
* there is a rapid succession of new mounts coming in to the zone.
*/
/*
* Prevent new mounts from progressing to the point of calling
* VFS_MOUNT(). If there are already mounts in this "region", wait for
* them to complete.
*/
static int
block_mounts(zone_t *zp)
{
int retval = 0;
/*
* Since it may block for a long time, block_mounts() shouldn't be
* called with zonehash_lock held.
*/
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
mutex_enter(&zp->zone_mount_lock);
while (zp->zone_mounts_in_progress > 0) {
if (cv_wait_sig(&zp->zone_mount_cv, &zp->zone_mount_lock) == 0)
goto signaled;
}
/*
* A negative value of mounts_in_progress indicates that mounts
* have been blocked by (-mounts_in_progress) different callers
* (remotely possible if two threads enter zone_shutdown at the same
* time).
*/
zp->zone_mounts_in_progress--;
retval = 1;
signaled:
mutex_exit(&zp->zone_mount_lock);
return (retval);
}
/*
* The VFS layer may progress with new mounts as far as we're concerned.
* Allow them to progress if we were the last obstacle.
*/
static void
resume_mounts(zone_t *zp)
{
mutex_enter(&zp->zone_mount_lock);
if (++zp->zone_mounts_in_progress == 0)
cv_broadcast(&zp->zone_mount_cv);
mutex_exit(&zp->zone_mount_lock);
}
/*
* The VFS layer is busy with a mount; this zone should wait until all
* of its mounts are completed to progress.
*/
void
mount_in_progress(zone_t *zp)
{
mutex_enter(&zp->zone_mount_lock);
while (zp->zone_mounts_in_progress < 0)
cv_wait(&zp->zone_mount_cv, &zp->zone_mount_lock);
zp->zone_mounts_in_progress++;
mutex_exit(&zp->zone_mount_lock);
}
/*
* VFS is done with one mount; wake up any waiting block_mounts()
* callers if this is the last mount.
*/
void
mount_completed(zone_t *zp)
{
mutex_enter(&zp->zone_mount_lock);
if (--zp->zone_mounts_in_progress == 0)
cv_broadcast(&zp->zone_mount_cv);
mutex_exit(&zp->zone_mount_lock);
}
/*
* ZSD routines.
*
* Zone Specific Data (ZSD) is modeled after Thread Specific Data as
* defined by the pthread_key_create() and related interfaces.
*
* Kernel subsystems may register one or more data items and/or
* callbacks to be executed when a zone is created, shutdown, or
* destroyed.
*
* Unlike the thread counterpart, destructor callbacks will be executed
* even if the data pointer is NULL and/or there are no constructor
* callbacks, so it is the responsibility of such callbacks to check for
* NULL data values if necessary.
*
* The locking strategy and overall picture is as follows:
*
* When someone calls zone_key_create(), a template ZSD entry is added to the
* global list "zsd_registered_keys", protected by zsd_key_lock. While
* holding that lock all the existing zones are marked as
* ZSD_CREATE_NEEDED and a copy of the ZSD entry added to the per-zone
* zone_zsd list (protected by zone_lock). The global list is updated first
* (under zone_key_lock) to make sure that newly created zones use the
* most recent list of keys. Then under zonehash_lock we walk the zones
* and mark them. Similar locking is used in zone_key_delete().
*
* The actual create, shutdown, and destroy callbacks are done without
* holding any lock. And zsd_flags are used to ensure that the operations
* completed so that when zone_key_create (and zone_create) is done, as well as
* zone_key_delete (and zone_destroy) is done, all the necessary callbacks
* are completed.
*
* When new zones are created constructor callbacks for all registered ZSD
* entries will be called. That also uses the above two phases of marking
* what needs to be done, and then running the callbacks without holding
* any locks.
*
* The framework does not provide any locking around zone_getspecific() and
* zone_setspecific() apart from that needed for internal consistency, so
* callers interested in atomic "test-and-set" semantics will need to provide
* their own locking.
*/
/*
* Helper function to find the zsd_entry associated with the key in the
* given list.
*/
static struct zsd_entry *
zsd_find(list_t *l, zone_key_t key)
{
struct zsd_entry *zsd;
for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
if (zsd->zsd_key == key) {
return (zsd);
}
}
return (NULL);
}
/*
* Helper function to find the zsd_entry associated with the key in the
* given list. Move it to the front of the list.
*/
static struct zsd_entry *
zsd_find_mru(list_t *l, zone_key_t key)
{
struct zsd_entry *zsd;
for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
if (zsd->zsd_key == key) {
/*
* Move to head of list to keep list in MRU order.
*/
if (zsd != list_head(l)) {
list_remove(l, zsd);
list_insert_head(l, zsd);
}
return (zsd);
}
}
return (NULL);
}
void
zone_key_create(zone_key_t *keyp, void *(*create)(zoneid_t),
void (*shutdown)(zoneid_t, void *), void (*destroy)(zoneid_t, void *))
{
struct zsd_entry *zsdp;
struct zsd_entry *t;
struct zone *zone;
zone_key_t key;
zsdp = kmem_zalloc(sizeof (*zsdp), KM_SLEEP);
zsdp->zsd_data = NULL;
zsdp->zsd_create = create;
zsdp->zsd_shutdown = shutdown;
zsdp->zsd_destroy = destroy;
/*
* Insert in global list of callbacks. Makes future zone creations
* see it.
*/
mutex_enter(&zsd_key_lock);
key = zsdp->zsd_key = ++zsd_keyval;
ASSERT(zsd_keyval != 0);
list_insert_tail(&zsd_registered_keys, zsdp);
mutex_exit(&zsd_key_lock);
/*
* Insert for all existing zones and mark them as needing
* a create callback.
*/
mutex_enter(&zonehash_lock); /* stop the world */
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
zone_status_t status;
mutex_enter(&zone->zone_lock);
/* Skip zones that are on the way down or not yet up */
status = zone_status_get(zone);
if (status >= ZONE_IS_DOWN ||
status == ZONE_IS_UNINITIALIZED) {
mutex_exit(&zone->zone_lock);
continue;
}
t = zsd_find_mru(&zone->zone_zsd, key);
if (t != NULL) {
/*
* A zsd_configure already inserted it after
* we dropped zsd_key_lock above.
*/
mutex_exit(&zone->zone_lock);
continue;
}
t = kmem_zalloc(sizeof (*t), KM_SLEEP);
t->zsd_key = key;
t->zsd_create = create;
t->zsd_shutdown = shutdown;
t->zsd_destroy = destroy;
if (create != NULL) {
t->zsd_flags = ZSD_CREATE_NEEDED;
DTRACE_PROBE2(zsd__create__needed,
zone_t *, zone, zone_key_t, key);
}
list_insert_tail(&zone->zone_zsd, t);
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
if (create != NULL) {
/* Now call the create callback for this key */
zsd_apply_all_zones(zsd_apply_create, key);
}
/*
* It is safe for consumers to use the key now, make it
* globally visible. Specifically zone_getspecific() will
* always successfully return the zone specific data associated
* with the key.
*/
*keyp = key;
}
/*
* Function called when a module is being unloaded, or otherwise wishes
* to unregister its ZSD key and callbacks.
*
* Remove from the global list and determine the functions that need to
* be called under a global lock. Then call the functions without
* holding any locks. Finally free up the zone_zsd entries. (The apply
* functions need to access the zone_zsd entries to find zsd_data etc.)
*/
int
zone_key_delete(zone_key_t key)
{
struct zsd_entry *zsdp = NULL;
zone_t *zone;
mutex_enter(&zsd_key_lock);
zsdp = zsd_find_mru(&zsd_registered_keys, key);
if (zsdp == NULL) {
mutex_exit(&zsd_key_lock);
return (-1);
}
list_remove(&zsd_registered_keys, zsdp);
mutex_exit(&zsd_key_lock);
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
struct zsd_entry *del;
mutex_enter(&zone->zone_lock);
del = zsd_find_mru(&zone->zone_zsd, key);
if (del == NULL) {
/*
* Somebody else got here first e.g the zone going
* away.
*/
mutex_exit(&zone->zone_lock);
continue;
}
ASSERT(del->zsd_shutdown == zsdp->zsd_shutdown);
ASSERT(del->zsd_destroy == zsdp->zsd_destroy);
if (del->zsd_shutdown != NULL &&
(del->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
del->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
DTRACE_PROBE2(zsd__shutdown__needed,
zone_t *, zone, zone_key_t, key);
}
if (del->zsd_destroy != NULL &&
(del->zsd_flags & ZSD_DESTROY_ALL) == 0) {
del->zsd_flags |= ZSD_DESTROY_NEEDED;
DTRACE_PROBE2(zsd__destroy__needed,
zone_t *, zone, zone_key_t, key);
}
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
kmem_free(zsdp, sizeof (*zsdp));
/* Now call the shutdown and destroy callback for this key */
zsd_apply_all_zones(zsd_apply_shutdown, key);
zsd_apply_all_zones(zsd_apply_destroy, key);
/* Now we can free up the zsdp structures in each zone */
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
struct zsd_entry *del;
mutex_enter(&zone->zone_lock);
del = zsd_find(&zone->zone_zsd, key);
if (del != NULL) {
list_remove(&zone->zone_zsd, del);
ASSERT(!(del->zsd_flags & ZSD_ALL_INPROGRESS));
kmem_free(del, sizeof (*del));
}
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
return (0);
}
/*
* ZSD counterpart of pthread_setspecific().
*
* Since all zsd callbacks, including those with no create function,
* have an entry in zone_zsd, if the key is registered it is part of
* the zone_zsd list.
* Return an error if the key wasn't registerd.
*/
int
zone_setspecific(zone_key_t key, zone_t *zone, const void *data)
{
struct zsd_entry *t;
mutex_enter(&zone->zone_lock);
t = zsd_find_mru(&zone->zone_zsd, key);
if (t != NULL) {
/*
* Replace old value with new
*/
t->zsd_data = (void *)data;
mutex_exit(&zone->zone_lock);
return (0);
}
mutex_exit(&zone->zone_lock);
return (-1);
}
/*
* ZSD counterpart of pthread_getspecific().
*/
void *
zone_getspecific(zone_key_t key, zone_t *zone)
{
struct zsd_entry *t;
void *data;
mutex_enter(&zone->zone_lock);
t = zsd_find_mru(&zone->zone_zsd, key);
data = (t == NULL ? NULL : t->zsd_data);
mutex_exit(&zone->zone_lock);
return (data);
}
/*
* Function used to initialize a zone's list of ZSD callbacks and data
* when the zone is being created. The callbacks are initialized from
* the template list (zsd_registered_keys). The constructor callback is
* executed later (once the zone exists and with locks dropped).
*/
static void
zone_zsd_configure(zone_t *zone)
{
struct zsd_entry *zsdp;
struct zsd_entry *t;
ASSERT(MUTEX_HELD(&zonehash_lock));
ASSERT(list_head(&zone->zone_zsd) == NULL);
mutex_enter(&zone->zone_lock);
mutex_enter(&zsd_key_lock);
for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL;
zsdp = list_next(&zsd_registered_keys, zsdp)) {
/*
* Since this zone is ZONE_IS_UNCONFIGURED, zone_key_create
* should not have added anything to it.
*/
ASSERT(zsd_find(&zone->zone_zsd, zsdp->zsd_key) == NULL);
t = kmem_zalloc(sizeof (*t), KM_SLEEP);
t->zsd_key = zsdp->zsd_key;
t->zsd_create = zsdp->zsd_create;
t->zsd_shutdown = zsdp->zsd_shutdown;
t->zsd_destroy = zsdp->zsd_destroy;
if (zsdp->zsd_create != NULL) {
t->zsd_flags = ZSD_CREATE_NEEDED;
DTRACE_PROBE2(zsd__create__needed,
zone_t *, zone, zone_key_t, zsdp->zsd_key);
}
list_insert_tail(&zone->zone_zsd, t);
}
mutex_exit(&zsd_key_lock);
mutex_exit(&zone->zone_lock);
}
enum zsd_callback_type { ZSD_CREATE, ZSD_SHUTDOWN, ZSD_DESTROY };
/*
* Helper function to execute shutdown or destructor callbacks.
*/
static void
zone_zsd_callbacks(zone_t *zone, enum zsd_callback_type ct)
{
struct zsd_entry *t;
ASSERT(ct == ZSD_SHUTDOWN || ct == ZSD_DESTROY);
ASSERT(ct != ZSD_SHUTDOWN || zone_status_get(zone) >= ZONE_IS_EMPTY);
ASSERT(ct != ZSD_DESTROY || zone_status_get(zone) >= ZONE_IS_DOWN);
/*
* Run the callback solely based on what is registered for the zone
* in zone_zsd. The global list can change independently of this
* as keys are registered and unregistered and we don't register new
* callbacks for a zone that is in the process of going away.
*/
mutex_enter(&zone->zone_lock);
for (t = list_head(&zone->zone_zsd); t != NULL;
t = list_next(&zone->zone_zsd, t)) {
zone_key_t key = t->zsd_key;
/* Skip if no callbacks registered */
if (ct == ZSD_SHUTDOWN) {
if (t->zsd_shutdown != NULL &&
(t->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
t->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
DTRACE_PROBE2(zsd__shutdown__needed,
zone_t *, zone, zone_key_t, key);
}
} else {
if (t->zsd_destroy != NULL &&
(t->zsd_flags & ZSD_DESTROY_ALL) == 0) {
t->zsd_flags |= ZSD_DESTROY_NEEDED;
DTRACE_PROBE2(zsd__destroy__needed,
zone_t *, zone, zone_key_t, key);
}
}
}
mutex_exit(&zone->zone_lock);
/* Now call the shutdown and destroy callback for this key */
zsd_apply_all_keys(zsd_apply_shutdown, zone);
zsd_apply_all_keys(zsd_apply_destroy, zone);
}
/*
* Called when the zone is going away; free ZSD-related memory, and
* destroy the zone_zsd list.
*/
static void
zone_free_zsd(zone_t *zone)
{
struct zsd_entry *t, *next;
/*
* Free all the zsd_entry's we had on this zone.
*/
mutex_enter(&zone->zone_lock);
for (t = list_head(&zone->zone_zsd); t != NULL; t = next) {
next = list_next(&zone->zone_zsd, t);
list_remove(&zone->zone_zsd, t);
ASSERT(!(t->zsd_flags & ZSD_ALL_INPROGRESS));
kmem_free(t, sizeof (*t));
}
list_destroy(&zone->zone_zsd);
mutex_exit(&zone->zone_lock);
}
/*
* Apply a function to all zones for particular key value.
*
* The applyfn has to drop zonehash_lock if it does some work, and
* then reacquire it before it returns.
* When the lock is dropped we don't follow list_next even
* if it is possible to do so without any hazards. This is
* because we want the design to allow for the list of zones
* to change in any arbitrary way during the time the
* lock was dropped.
*
* It is safe to restart the loop at list_head since the applyfn
* changes the zsd_flags as it does work, so a subsequent
* pass through will have no effect in applyfn, hence the loop will terminate
* in at worst O(N^2).
*/
static void
zsd_apply_all_zones(zsd_applyfn_t *applyfn, zone_key_t key)
{
zone_t *zone;
mutex_enter(&zonehash_lock);
zone = list_head(&zone_active);
while (zone != NULL) {
if ((applyfn)(&zonehash_lock, B_FALSE, zone, key)) {
/* Lock dropped - restart at head */
zone = list_head(&zone_active);
} else {
zone = list_next(&zone_active, zone);
}
}
mutex_exit(&zonehash_lock);
}
/*
* Apply a function to all keys for a particular zone.
*
* The applyfn has to drop zonehash_lock if it does some work, and
* then reacquire it before it returns.
* When the lock is dropped we don't follow list_next even
* if it is possible to do so without any hazards. This is
* because we want the design to allow for the list of zsd callbacks
* to change in any arbitrary way during the time the
* lock was dropped.
*
* It is safe to restart the loop at list_head since the applyfn
* changes the zsd_flags as it does work, so a subsequent
* pass through will have no effect in applyfn, hence the loop will terminate
* in at worst O(N^2).
*/
static void
zsd_apply_all_keys(zsd_applyfn_t *applyfn, zone_t *zone)
{
struct zsd_entry *t;
mutex_enter(&zone->zone_lock);
t = list_head(&zone->zone_zsd);
while (t != NULL) {
if ((applyfn)(NULL, B_TRUE, zone, t->zsd_key)) {
/* Lock dropped - restart at head */
t = list_head(&zone->zone_zsd);
} else {
t = list_next(&zone->zone_zsd, t);
}
}
mutex_exit(&zone->zone_lock);
}
/*
* Call the create function for the zone and key if CREATE_NEEDED
* is set.
* If some other thread gets here first and sets CREATE_INPROGRESS, then
* we wait for that thread to complete so that we can ensure that
* all the callbacks are done when we've looped over all zones/keys.
*
* When we call the create function, we drop the global held by the
* caller, and return true to tell the caller it needs to re-evalute the
* state.
* If the caller holds zone_lock then zone_lock_held is set, and zone_lock
* remains held on exit.
*/
static boolean_t
zsd_apply_create(kmutex_t *lockp, boolean_t zone_lock_held,
zone_t *zone, zone_key_t key)
{
void *result;
struct zsd_entry *t;
boolean_t dropped;
if (lockp != NULL) {
ASSERT(MUTEX_HELD(lockp));
}
if (zone_lock_held) {
ASSERT(MUTEX_HELD(&zone->zone_lock));
} else {
mutex_enter(&zone->zone_lock);
}
t = zsd_find(&zone->zone_zsd, key);
if (t == NULL) {
/*
* Somebody else got here first e.g the zone going
* away.
*/
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (B_FALSE);
}
dropped = B_FALSE;
if (zsd_wait_for_inprogress(zone, t, lockp))
dropped = B_TRUE;
if (t->zsd_flags & ZSD_CREATE_NEEDED) {
t->zsd_flags &= ~ZSD_CREATE_NEEDED;
t->zsd_flags |= ZSD_CREATE_INPROGRESS;
DTRACE_PROBE2(zsd__create__inprogress,
zone_t *, zone, zone_key_t, key);
mutex_exit(&zone->zone_lock);
if (lockp != NULL)
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(t->zsd_create != NULL);
DTRACE_PROBE2(zsd__create__start,
zone_t *, zone, zone_key_t, key);
result = (*t->zsd_create)(zone->zone_id);
DTRACE_PROBE2(zsd__create__end,
zone_t *, zone, voidn *, result);
ASSERT(result != NULL);
if (lockp != NULL)
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
t->zsd_data = result;
t->zsd_flags &= ~ZSD_CREATE_INPROGRESS;
t->zsd_flags |= ZSD_CREATE_COMPLETED;
cv_broadcast(&t->zsd_cv);
DTRACE_PROBE2(zsd__create__completed,
zone_t *, zone, zone_key_t, key);
}
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (dropped);
}
/*
* Call the shutdown function for the zone and key if SHUTDOWN_NEEDED
* is set.
* If some other thread gets here first and sets *_INPROGRESS, then
* we wait for that thread to complete so that we can ensure that
* all the callbacks are done when we've looped over all zones/keys.
*
* When we call the shutdown function, we drop the global held by the
* caller, and return true to tell the caller it needs to re-evalute the
* state.
* If the caller holds zone_lock then zone_lock_held is set, and zone_lock
* remains held on exit.
*/
static boolean_t
zsd_apply_shutdown(kmutex_t *lockp, boolean_t zone_lock_held,
zone_t *zone, zone_key_t key)
{
struct zsd_entry *t;
void *data;
boolean_t dropped;
if (lockp != NULL) {
ASSERT(MUTEX_HELD(lockp));
}
if (zone_lock_held) {
ASSERT(MUTEX_HELD(&zone->zone_lock));
} else {
mutex_enter(&zone->zone_lock);
}
t = zsd_find(&zone->zone_zsd, key);
if (t == NULL) {
/*
* Somebody else got here first e.g the zone going
* away.
*/
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (B_FALSE);
}
dropped = B_FALSE;
if (zsd_wait_for_creator(zone, t, lockp))
dropped = B_TRUE;
if (zsd_wait_for_inprogress(zone, t, lockp))
dropped = B_TRUE;
if (t->zsd_flags & ZSD_SHUTDOWN_NEEDED) {
t->zsd_flags &= ~ZSD_SHUTDOWN_NEEDED;
t->zsd_flags |= ZSD_SHUTDOWN_INPROGRESS;
DTRACE_PROBE2(zsd__shutdown__inprogress,
zone_t *, zone, zone_key_t, key);
mutex_exit(&zone->zone_lock);
if (lockp != NULL)
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(t->zsd_shutdown != NULL);
data = t->zsd_data;
DTRACE_PROBE2(zsd__shutdown__start,
zone_t *, zone, zone_key_t, key);
(t->zsd_shutdown)(zone->zone_id, data);
DTRACE_PROBE2(zsd__shutdown__end,
zone_t *, zone, zone_key_t, key);
if (lockp != NULL)
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
t->zsd_flags &= ~ZSD_SHUTDOWN_INPROGRESS;
t->zsd_flags |= ZSD_SHUTDOWN_COMPLETED;
cv_broadcast(&t->zsd_cv);
DTRACE_PROBE2(zsd__shutdown__completed,
zone_t *, zone, zone_key_t, key);
}
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (dropped);
}
/*
* Call the destroy function for the zone and key if DESTROY_NEEDED
* is set.
* If some other thread gets here first and sets *_INPROGRESS, then
* we wait for that thread to complete so that we can ensure that
* all the callbacks are done when we've looped over all zones/keys.
*
* When we call the destroy function, we drop the global held by the
* caller, and return true to tell the caller it needs to re-evalute the
* state.
* If the caller holds zone_lock then zone_lock_held is set, and zone_lock
* remains held on exit.
*/
static boolean_t
zsd_apply_destroy(kmutex_t *lockp, boolean_t zone_lock_held,
zone_t *zone, zone_key_t key)
{
struct zsd_entry *t;
void *data;
boolean_t dropped;
if (lockp != NULL) {
ASSERT(MUTEX_HELD(lockp));
}
if (zone_lock_held) {
ASSERT(MUTEX_HELD(&zone->zone_lock));
} else {
mutex_enter(&zone->zone_lock);
}
t = zsd_find(&zone->zone_zsd, key);
if (t == NULL) {
/*
* Somebody else got here first e.g the zone going
* away.
*/
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (B_FALSE);
}
dropped = B_FALSE;
if (zsd_wait_for_creator(zone, t, lockp))
dropped = B_TRUE;
if (zsd_wait_for_inprogress(zone, t, lockp))
dropped = B_TRUE;
if (t->zsd_flags & ZSD_DESTROY_NEEDED) {
t->zsd_flags &= ~ZSD_DESTROY_NEEDED;
t->zsd_flags |= ZSD_DESTROY_INPROGRESS;
DTRACE_PROBE2(zsd__destroy__inprogress,
zone_t *, zone, zone_key_t, key);
mutex_exit(&zone->zone_lock);
if (lockp != NULL)
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(t->zsd_destroy != NULL);
data = t->zsd_data;
DTRACE_PROBE2(zsd__destroy__start,
zone_t *, zone, zone_key_t, key);
(t->zsd_destroy)(zone->zone_id, data);
DTRACE_PROBE2(zsd__destroy__end,
zone_t *, zone, zone_key_t, key);
if (lockp != NULL)
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
t->zsd_data = NULL;
t->zsd_flags &= ~ZSD_DESTROY_INPROGRESS;
t->zsd_flags |= ZSD_DESTROY_COMPLETED;
cv_broadcast(&t->zsd_cv);
DTRACE_PROBE2(zsd__destroy__completed,
zone_t *, zone, zone_key_t, key);
}
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (dropped);
}
/*
* Wait for any CREATE_NEEDED flag to be cleared.
* Returns true if lockp was temporarily dropped while waiting.
*/
static boolean_t
zsd_wait_for_creator(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
{
boolean_t dropped = B_FALSE;
while (t->zsd_flags & ZSD_CREATE_NEEDED) {
DTRACE_PROBE2(zsd__wait__for__creator,
zone_t *, zone, struct zsd_entry *, t);
if (lockp != NULL) {
dropped = B_TRUE;
mutex_exit(lockp);
}
cv_wait(&t->zsd_cv, &zone->zone_lock);
if (lockp != NULL) {
/* First drop zone_lock to preserve order */
mutex_exit(&zone->zone_lock);
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
}
}
return (dropped);
}
/*
* Wait for any INPROGRESS flag to be cleared.
* Returns true if lockp was temporarily dropped while waiting.
*/
static boolean_t
zsd_wait_for_inprogress(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
{
boolean_t dropped = B_FALSE;
while (t->zsd_flags & ZSD_ALL_INPROGRESS) {
DTRACE_PROBE2(zsd__wait__for__inprogress,
zone_t *, zone, struct zsd_entry *, t);
if (lockp != NULL) {
dropped = B_TRUE;
mutex_exit(lockp);
}
cv_wait(&t->zsd_cv, &zone->zone_lock);
if (lockp != NULL) {
/* First drop zone_lock to preserve order */
mutex_exit(&zone->zone_lock);
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
}
}
return (dropped);
}
/*
* Frees memory associated with the zone dataset list.
*/
static void
zone_free_datasets(zone_t *zone)
{
zone_dataset_t *t, *next;
for (t = list_head(&zone->zone_datasets); t != NULL; t = next) {
next = list_next(&zone->zone_datasets, t);
list_remove(&zone->zone_datasets, t);
kmem_free(t->zd_dataset, strlen(t->zd_dataset) + 1);
kmem_free(t, sizeof (*t));
}
list_destroy(&zone->zone_datasets);
}
/*
* zone.cpu-shares resource control support.
*/
/*ARGSUSED*/
static rctl_qty_t
zone_cpu_shares_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_shares);
}
/*ARGSUSED*/
static int
zone_cpu_shares_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_shares = nv;
return (0);
}
static rctl_ops_t zone_cpu_shares_ops = {
rcop_no_action,
zone_cpu_shares_usage,
zone_cpu_shares_set,
rcop_no_test
};
/*
* zone.cpu-cap resource control support.
*/
/*ARGSUSED*/
static rctl_qty_t
zone_cpu_cap_get(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (cpucaps_zone_get(p->p_zone));
}
/*ARGSUSED*/
static int
zone_cpu_cap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
zone_t *zone = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (zone == NULL)
return (0);
/*
* set cap to the new value.
*/
return (cpucaps_zone_set(zone, nv));
}
static rctl_ops_t zone_cpu_cap_ops = {
rcop_no_action,
zone_cpu_cap_get,
zone_cpu_cap_set,
rcop_no_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_lwps_usage(rctl_t *r, proc_t *p)
{
rctl_qty_t nlwps;
zone_t *zone = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&zone->zone_nlwps_lock);
nlwps = zone->zone_nlwps;
mutex_exit(&zone->zone_nlwps_lock);
return (nlwps);
}
/*ARGSUSED*/
static int
zone_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t nlwps;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock)));
nlwps = e->rcep_p.zone->zone_nlwps;
if (nlwps + incr > rcntl->rcv_value)
return (1);
return (0);
}
/*ARGSUSED*/
static int
zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_nlwps_ctl = nv;
return (0);
}
static rctl_ops_t zone_lwps_ops = {
rcop_no_action,
zone_lwps_usage,
zone_lwps_set,
zone_lwps_test,
};
/*ARGSUSED*/
static rctl_qty_t
zone_procs_usage(rctl_t *r, proc_t *p)
{
rctl_qty_t nprocs;
zone_t *zone = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&zone->zone_nlwps_lock);
nprocs = zone->zone_nprocs;
mutex_exit(&zone->zone_nlwps_lock);
return (nprocs);
}
/*ARGSUSED*/
static int
zone_procs_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t nprocs;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock)));
nprocs = e->rcep_p.zone->zone_nprocs;
if (nprocs + incr > rcntl->rcv_value)
return (1);
return (0);
}
/*ARGSUSED*/
static int
zone_procs_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_nprocs_ctl = nv;
return (0);
}
static rctl_ops_t zone_procs_ops = {
rcop_no_action,
zone_procs_usage,
zone_procs_set,
zone_procs_test,
};
/*ARGSUSED*/
static rctl_qty_t
zone_shmmax_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_shmmax);
}
/*ARGSUSED*/
static int
zone_shmmax_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_shmmax + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_shmmax_ops = {
rcop_no_action,
zone_shmmax_usage,
rcop_no_set,
zone_shmmax_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_shmmni_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_ipc.ipcq_shmmni);
}
/*ARGSUSED*/
static int
zone_shmmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_ipc.ipcq_shmmni + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_shmmni_ops = {
rcop_no_action,
zone_shmmni_usage,
rcop_no_set,
zone_shmmni_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_semmni_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_ipc.ipcq_semmni);
}
/*ARGSUSED*/
static int
zone_semmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_ipc.ipcq_semmni + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_semmni_ops = {
rcop_no_action,
zone_semmni_usage,
rcop_no_set,
zone_semmni_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_msgmni_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_ipc.ipcq_msgmni);
}
/*ARGSUSED*/
static int
zone_msgmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_ipc.ipcq_msgmni + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_msgmni_ops = {
rcop_no_action,
zone_msgmni_usage,
rcop_no_set,
zone_msgmni_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_locked_mem_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&p->p_zone->zone_mem_lock);
q = p->p_zone->zone_locked_mem;
mutex_exit(&p->p_zone->zone_mem_lock);
return (q);
}
/*ARGSUSED*/
static int
zone_locked_mem_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
{
rctl_qty_t q;
zone_t *z;
z = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(MUTEX_HELD(&z->zone_mem_lock));
q = z->zone_locked_mem;
if (q + incr > rcntl->rcv_value)
return (1);
return (0);
}
/*ARGSUSED*/
static int
zone_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_locked_mem_ctl = nv;
return (0);
}
static rctl_ops_t zone_locked_mem_ops = {
rcop_no_action,
zone_locked_mem_usage,
zone_locked_mem_set,
zone_locked_mem_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_max_swap_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
zone_t *z = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&z->zone_mem_lock);
q = z->zone_max_swap;
mutex_exit(&z->zone_mem_lock);
return (q);
}
/*ARGSUSED*/
static int
zone_max_swap_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
{
rctl_qty_t q;
zone_t *z;
z = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(MUTEX_HELD(&z->zone_mem_lock));
q = z->zone_max_swap;
if (q + incr > rcntl->rcv_value)
return (1);
return (0);
}
/*ARGSUSED*/
static int
zone_max_swap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_max_swap_ctl = nv;
return (0);
}
static rctl_ops_t zone_max_swap_ops = {
rcop_no_action,
zone_max_swap_usage,
zone_max_swap_set,
zone_max_swap_test
};
/*ARGSUSED*/
static rctl_qty_t
zone_max_lofi_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
zone_t *z = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&z->zone_rctl_lock);
q = z->zone_max_lofi;
mutex_exit(&z->zone_rctl_lock);
return (q);
}
/*ARGSUSED*/
static int
zone_max_lofi_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
{
rctl_qty_t q;
zone_t *z;
z = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(MUTEX_HELD(&z->zone_rctl_lock));
q = z->zone_max_lofi;
if (q + incr > rcntl->rcv_value)
return (1);
return (0);
}
/*ARGSUSED*/
static int
zone_max_lofi_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_max_lofi_ctl = nv;
return (0);
}
static rctl_ops_t zone_max_lofi_ops = {
rcop_no_action,
zone_max_lofi_usage,
zone_max_lofi_set,
zone_max_lofi_test
};
/*
* Helper function to brand the zone with a unique ID.
*/
static void
zone_uniqid(zone_t *zone)
{
static uint64_t uniqid = 0;
ASSERT(MUTEX_HELD(&zonehash_lock));
zone->zone_uniqid = uniqid++;
}
/*
* Returns a held pointer to the "kcred" for the specified zone.
*/
struct cred *
zone_get_kcred(zoneid_t zoneid)
{
zone_t *zone;
cred_t *cr;
if ((zone = zone_find_by_id(zoneid)) == NULL)
return (NULL);
cr = zone->zone_kcred;
crhold(cr);
zone_rele(zone);
return (cr);
}
static int
zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zk->zk_usage.value.ui64 = zone->zone_locked_mem;
zk->zk_value.value.ui64 = zone->zone_locked_mem_ctl;
return (0);
}
static int
zone_nprocs_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zk->zk_usage.value.ui64 = zone->zone_nprocs;
zk->zk_value.value.ui64 = zone->zone_nprocs_ctl;
return (0);
}
static int
zone_swapresv_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zk->zk_usage.value.ui64 = zone->zone_max_swap;
zk->zk_value.value.ui64 = zone->zone_max_swap_ctl;
return (0);
}
static kstat_t *
zone_kstat_create_common(zone_t *zone, char *name,
int (*updatefunc) (kstat_t *, int))
{
kstat_t *ksp;
zone_kstat_t *zk;
ksp = rctl_kstat_create_zone(zone, name, KSTAT_TYPE_NAMED,
sizeof (zone_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (ksp == NULL)
return (NULL);
zk = ksp->ks_data = kmem_alloc(sizeof (zone_kstat_t), KM_SLEEP);
ksp->ks_data_size += strlen(zone->zone_name) + 1;
kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zk->zk_zonename, zone->zone_name);
kstat_named_init(&zk->zk_usage, "usage", KSTAT_DATA_UINT64);
kstat_named_init(&zk->zk_value, "value", KSTAT_DATA_UINT64);
ksp->ks_update = updatefunc;
ksp->ks_private = zone;
kstat_install(ksp);
return (ksp);
}
static int
zone_mcap_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_mcap_kstat_t *zmp = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zmp->zm_pgpgin.value.ui64 = zone->zone_pgpgin;
zmp->zm_anonpgin.value.ui64 = zone->zone_anonpgin;
zmp->zm_execpgin.value.ui64 = zone->zone_execpgin;
zmp->zm_fspgin.value.ui64 = zone->zone_fspgin;
zmp->zm_anon_alloc_fail.value.ui64 = zone->zone_anon_alloc_fail;
return (0);
}
static kstat_t *
zone_mcap_kstat_create(zone_t *zone)
{
kstat_t *ksp;
zone_mcap_kstat_t *zmp;
if ((ksp = kstat_create_zone("memory_cap", zone->zone_id,
zone->zone_name, "zone_memory_cap", KSTAT_TYPE_NAMED,
sizeof (zone_mcap_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
return (NULL);
if (zone->zone_id != GLOBAL_ZONEID)
kstat_zone_add(ksp, GLOBAL_ZONEID);
zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_mcap_kstat_t), KM_SLEEP);
ksp->ks_data_size += strlen(zone->zone_name) + 1;
ksp->ks_lock = &zone->zone_mcap_lock;
zone->zone_mcap_stats = zmp;
/* The kstat "name" field is not large enough for a full zonename */
kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
kstat_named_init(&zmp->zm_pgpgin, "pgpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_anonpgin, "anonpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_execpgin, "execpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_fspgin, "fspgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_anon_alloc_fail, "anon_alloc_fail",
KSTAT_DATA_UINT64);
ksp->ks_update = zone_mcap_kstat_update;
ksp->ks_private = zone;
kstat_install(ksp);
return (ksp);
}
static int
zone_misc_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_misc_kstat_t *zmp = ksp->ks_data;
hrtime_t hrtime;
uint64_t tmp;
if (rw == KSTAT_WRITE)
return (EACCES);
tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_STIME);
hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&hrtime);
zmp->zm_stime.value.ui64 = hrtime;
tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_UTIME);
hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&hrtime);
zmp->zm_utime.value.ui64 = hrtime;
tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_WTIME);
hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&hrtime);
zmp->zm_wtime.value.ui64 = hrtime;
zmp->zm_avenrun1.value.ui32 = zone->zone_avenrun[0];
zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
zmp->zm_ffcap.value.ui32 = zone->zone_ffcap;
zmp->zm_ffnoproc.value.ui32 = zone->zone_ffnoproc;
zmp->zm_ffnomem.value.ui32 = zone->zone_ffnomem;
zmp->zm_ffmisc.value.ui32 = zone->zone_ffmisc;
zmp->zm_nested_intp.value.ui32 = zone->zone_nested_intp;
zmp->zm_init_pid.value.ui32 = zone->zone_proc_initpid;
zmp->zm_boot_time.value.ui64 = (uint64_t)zone->zone_boot_time;
return (0);
}
static kstat_t *
zone_misc_kstat_create(zone_t *zone)
{
kstat_t *ksp;
zone_misc_kstat_t *zmp;
if ((ksp = kstat_create_zone("zones", zone->zone_id,
zone->zone_name, "zone_misc", KSTAT_TYPE_NAMED,
sizeof (zone_misc_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
return (NULL);
if (zone->zone_id != GLOBAL_ZONEID)
kstat_zone_add(ksp, GLOBAL_ZONEID);
zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_misc_kstat_t), KM_SLEEP);
ksp->ks_data_size += strlen(zone->zone_name) + 1;
ksp->ks_lock = &zone->zone_misc_lock;
zone->zone_misc_stats = zmp;
/* The kstat "name" field is not large enough for a full zonename */
kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
kstat_named_init(&zmp->zm_utime, "nsec_user", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_stime, "nsec_sys", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_avenrun1, "avenrun_1min", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",
KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffcap, "forkfail_cap", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffnoproc, "forkfail_noproc",
KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffnomem, "forkfail_nomem", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffmisc, "forkfail_misc", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_nested_intp, "nested_interp",
KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_init_pid, "init_pid", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_boot_time, "boot_time", KSTAT_DATA_UINT64);
ksp->ks_update = zone_misc_kstat_update;
ksp->ks_private = zone;
kstat_install(ksp);
return (ksp);
}
static void
zone_kstat_create(zone_t *zone)
{
zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
"lockedmem", zone_lockedmem_kstat_update);
zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
"swapresv", zone_swapresv_kstat_update);
zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
"nprocs", zone_nprocs_kstat_update);
if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
zone->zone_mcap_stats = kmem_zalloc(
sizeof (zone_mcap_kstat_t), KM_SLEEP);
}
if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
zone->zone_misc_stats = kmem_zalloc(
sizeof (zone_misc_kstat_t), KM_SLEEP);
}
}
static void
zone_kstat_delete_common(kstat_t **pkstat, size_t datasz)
{
void *data;
if (*pkstat != NULL) {
data = (*pkstat)->ks_data;
kstat_delete(*pkstat);
kmem_free(data, datasz);
*pkstat = NULL;
}
}
static void
zone_kstat_delete(zone_t *zone)
{
zone_kstat_delete_common(&zone->zone_lockedmem_kstat,
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_swapresv_kstat,
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_nprocs_kstat,
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_mcap_ksp,
sizeof (zone_mcap_kstat_t));
zone_kstat_delete_common(&zone->zone_misc_ksp,
sizeof (zone_misc_kstat_t));
}
/*
* Called very early on in boot to initialize the ZSD list so that
* zone_key_create() can be called before zone_init(). It also initializes
* portions of zone0 which may be used before zone_init() is called. The
* variable "global_zone" will be set when zone0 is fully initialized by
* zone_init().
*/
void
zone_zsd_init(void)
{
mutex_init(&zonehash_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zsd_key_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zsd_registered_keys, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
list_create(&zone_active, sizeof (zone_t),
offsetof(zone_t, zone_linkage));
list_create(&zone_deathrow, sizeof (zone_t),
offsetof(zone_t, zone_linkage));
mutex_init(&zone0.zone_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone0.zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone0.zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
zone0.zone_shares = 1;
zone0.zone_nlwps = 0;
zone0.zone_nlwps_ctl = INT_MAX;
zone0.zone_nprocs = 0;
zone0.zone_nprocs_ctl = INT_MAX;
zone0.zone_locked_mem = 0;
zone0.zone_locked_mem_ctl = UINT64_MAX;
ASSERT(zone0.zone_max_swap == 0);
zone0.zone_max_swap_ctl = UINT64_MAX;
zone0.zone_max_lofi = 0;
zone0.zone_max_lofi_ctl = UINT64_MAX;
zone0.zone_shmmax = 0;
zone0.zone_ipc.ipcq_shmmni = 0;
zone0.zone_ipc.ipcq_semmni = 0;
zone0.zone_ipc.ipcq_msgmni = 0;
zone0.zone_name = GLOBAL_ZONENAME;
zone0.zone_nodename = utsname.nodename;
zone0.zone_domain = srpc_domain;
zone0.zone_hostid = HW_INVALID_HOSTID;
zone0.zone_fs_allowed = NULL;
psecflags_default(&zone0.zone_secflags);
zone0.zone_ref = 1;
zone0.zone_id = GLOBAL_ZONEID;
zone0.zone_status = ZONE_IS_RUNNING;
zone0.zone_rootpath = "/";
zone0.zone_rootpathlen = 2;
zone0.zone_psetid = ZONE_PS_INVAL;
zone0.zone_ncpus = 0;
zone0.zone_ncpus_online = 0;
zone0.zone_proc_initpid = 1;
zone0.zone_initname = initname;
zone0.zone_lockedmem_kstat = NULL;
zone0.zone_swapresv_kstat = NULL;
zone0.zone_nprocs_kstat = NULL;
list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
offsetof(zone_ref_t, zref_linkage));
list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
list_insert_head(&zone_active, &zone0);
/*
* The root filesystem is not mounted yet, so zone_rootvp cannot be set
* to anything meaningful. It is assigned to be 'rootdir' in
* vfs_mountroot().
*/
zone0.zone_rootvp = NULL;
zone0.zone_vfslist = NULL;
zone0.zone_bootargs = initargs;
zone0.zone_privset = kmem_alloc(sizeof (priv_set_t), KM_SLEEP);
/*
* The global zone has all privileges
*/
priv_fillset(zone0.zone_privset);
/*
* Add p0 to the global zone
*/
zone0.zone_zsched = &p0;
p0.p_zone = &zone0;
}
/*
* Compute a hash value based on the contents of the label and the DOI. The
* hash algorithm is somewhat arbitrary, but is based on the observation that
* humans will likely pick labels that differ by amounts that work out to be
* multiples of the number of hash chains, and thus stirring in some primes
* should help.
*/
static uint_t
hash_bylabel(void *hdata, mod_hash_key_t key)
{
const ts_label_t *lab = (ts_label_t *)key;
const uint32_t *up, *ue;
uint_t hash;
int i;
_NOTE(ARGUNUSED(hdata));
hash = lab->tsl_doi + (lab->tsl_doi << 1);
/* we depend on alignment of label, but not representation */
up = (const uint32_t *)&lab->tsl_label;
ue = up + sizeof (lab->tsl_label) / sizeof (*up);
i = 1;
while (up < ue) {
/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
hash += *up + (*up << ((i % 16) + 1));
up++;
i++;
}
return (hash);
}
/*
* All that mod_hash cares about here is zero (equal) versus non-zero (not
* equal). This may need to be changed if less than / greater than is ever
* needed.
*/
static int
hash_labelkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
{
ts_label_t *lab1 = (ts_label_t *)key1;
ts_label_t *lab2 = (ts_label_t *)key2;
return (label_equal(lab1, lab2) ? 0 : 1);
}
/*
* Called by main() to initialize the zones framework.
*/
void
zone_init(void)
{
rctl_dict_entry_t *rde;
rctl_val_t *dval;
rctl_set_t *set;
rctl_alloc_gp_t *gp;
rctl_entity_p_t e;
int res;
ASSERT(curproc == &p0);
/*
* Create ID space for zone IDs. ID 0 is reserved for the
* global zone.
*/
zoneid_space = id_space_create("zoneid_space", 1, MAX_ZONEID);
/*
* Initialize generic zone resource controls, if any.
*/
rc_zone_cpu_shares = rctl_register("zone.cpu-shares",
RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
FSS_MAXSHARES, FSS_MAXSHARES, &zone_cpu_shares_ops);
rc_zone_cpu_cap = rctl_register("zone.cpu-cap",
RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_ALWAYS |
RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |RCTL_GLOBAL_SYSLOG_NEVER |
RCTL_GLOBAL_INFINITE,
MAXCAP, MAXCAP, &zone_cpu_cap_ops);
rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
INT_MAX, INT_MAX, &zone_lwps_ops);
rc_zone_nprocs = rctl_register("zone.max-processes", RCENTITY_ZONE,
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
INT_MAX, INT_MAX, &zone_procs_ops);
/*
* System V IPC resource controls
*/
rc_zone_msgmni = rctl_register("zone.max-msg-ids",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_msgmni_ops);
rc_zone_semmni = rctl_register("zone.max-sem-ids",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_semmni_ops);
rc_zone_shmmni = rctl_register("zone.max-shm-ids",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_shmmni_ops);
rc_zone_shmmax = rctl_register("zone.max-shm-memory",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_BYTES, UINT64_MAX, UINT64_MAX, &zone_shmmax_ops);
/*
* Create a rctl_val with PRIVILEGED, NOACTION, value = 1. Then attach
* this at the head of the rctl_dict_entry for ``zone.cpu-shares''.
*/
dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
bzero(dval, sizeof (rctl_val_t));
dval->rcv_value = 1;
dval->rcv_privilege = RCPRIV_PRIVILEGED;
dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
dval->rcv_action_recip_pid = -1;
rde = rctl_dict_lookup("zone.cpu-shares");
(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_locked_mem_ops);
rc_zone_max_swap = rctl_register("zone.max-swap",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_max_swap_ops);
rc_zone_max_lofi = rctl_register("zone.max-lofi",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_max_lofi_ops);
/*
* Initialize the ``global zone''.
*/
set = rctl_set_create();
gp = rctl_set_init_prealloc(RCENTITY_ZONE);
mutex_enter(&p0.p_lock);
e.rcep_p.zone = &zone0;
e.rcep_t = RCENTITY_ZONE;
zone0.zone_rctls = rctl_set_init(RCENTITY_ZONE, &p0, &e, set,
gp);
zone0.zone_nlwps = p0.p_lwpcnt;
zone0.zone_nprocs = 1;
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
zone0.zone_restart_init = B_TRUE;
zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
/*
* pool_default hasn't been initialized yet, so we let pool_init()
* take care of making sure the global zone is in the default pool.
*/
/*
* Initialize global zone kstats
*/
zone_kstat_create(&zone0);
/*
* Initialize zone label.
* mlp are initialized when tnzonecfg is loaded.
*/
zone0.zone_slabel = l_admin_low;
rw_init(&zone0.zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
label_hold(l_admin_low);
/*
* Initialise the lock for the database structure used by mntfs.
*/
rw_init(&zone0.zone_mntfs_db_lock, NULL, RW_DEFAULT, NULL);
zone0.zone_ustate = cpu_uarray_zalloc(ZONE_USTATE_MAX, KM_SLEEP);
mutex_enter(&zonehash_lock);
zone_uniqid(&zone0);
ASSERT(zone0.zone_uniqid == GLOBAL_ZONEUNIQID);
zonehashbyid = mod_hash_create_idhash("zone_by_id", zone_hash_size,
mod_hash_null_valdtor);
zonehashbyname = mod_hash_create_strhash("zone_by_name",
zone_hash_size, mod_hash_null_valdtor);
/*
* maintain zonehashbylabel only for labeled systems
*/
if (is_system_labeled())
zonehashbylabel = mod_hash_create_extended("zone_by_label",
zone_hash_size, mod_hash_null_keydtor,
mod_hash_null_valdtor, hash_bylabel, NULL,
hash_labelkey_cmp, KM_SLEEP);
zonecount = 1;
(void) mod_hash_insert(zonehashbyid, (mod_hash_key_t)GLOBAL_ZONEID,
(mod_hash_val_t)&zone0);
(void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)zone0.zone_name,
(mod_hash_val_t)&zone0);
if (is_system_labeled()) {
zone0.zone_flags |= ZF_HASHED_LABEL;
(void) mod_hash_insert(zonehashbylabel,
(mod_hash_key_t)zone0.zone_slabel, (mod_hash_val_t)&zone0);
}
mutex_exit(&zonehash_lock);
/*
* We avoid setting zone_kcred until now, since kcred is initialized
* sometime after zone_zsd_init() and before zone_init().
*/
zone0.zone_kcred = kcred;
/*
* The global zone is fully initialized (except for zone_rootvp which
* will be set when the root filesystem is mounted).
*/
global_zone = &zone0;
/*
* Setup an event channel to send zone status change notifications on
*/
res = sysevent_evc_bind(ZONE_EVENT_CHANNEL, &zone_event_chan,
EVCH_CREAT);
if (res)
panic("Sysevent_evc_bind failed during zone setup.\n");
}
static void
zone_free(zone_t *zone)
{
ASSERT(zone != global_zone);
ASSERT(zone->zone_ntasks == 0);
ASSERT(zone->zone_nlwps == 0);
ASSERT(zone->zone_nprocs == 0);
ASSERT(zone->zone_cred_ref == 0);
ASSERT(zone->zone_kcred == NULL);
ASSERT(zone_status_get(zone) == ZONE_IS_DEAD ||
zone_status_get(zone) == ZONE_IS_UNINITIALIZED);
ASSERT(list_is_empty(&zone->zone_ref_list));
/*
* Remove any zone caps.
*/
cpucaps_zone_remove(zone);
ASSERT(zone->zone_cpucap == NULL);
/* remove from deathrow list */
if (zone_status_get(zone) == ZONE_IS_DEAD) {
ASSERT(zone->zone_ref == 0);
mutex_enter(&zone_deathrow_lock);
list_remove(&zone_deathrow, zone);
mutex_exit(&zone_deathrow_lock);
}
list_destroy(&zone->zone_ref_list);
zone_free_zsd(zone);
zone_free_datasets(zone);
list_destroy(&zone->zone_dl_list);
cpu_uarray_free(zone->zone_ustate);
if (zone->zone_rootvp != NULL)
VN_RELE(zone->zone_rootvp);
if (zone->zone_rootpath)
kmem_free(zone->zone_rootpath, zone->zone_rootpathlen);
if (zone->zone_name != NULL)
kmem_free(zone->zone_name, ZONENAME_MAX);
if (zone->zone_slabel != NULL)
label_rele(zone->zone_slabel);
if (zone->zone_nodename != NULL)
kmem_free(zone->zone_nodename, _SYS_NMLN);
if (zone->zone_domain != NULL)
kmem_free(zone->zone_domain, _SYS_NMLN);
if (zone->zone_privset != NULL)
kmem_free(zone->zone_privset, sizeof (priv_set_t));
if (zone->zone_rctls != NULL)
rctl_set_free(zone->zone_rctls);
if (zone->zone_bootargs != NULL)
strfree(zone->zone_bootargs);
if (zone->zone_initname != NULL)
strfree(zone->zone_initname);
if (zone->zone_fs_allowed != NULL)
strfree(zone->zone_fs_allowed);
if (zone->zone_pfexecd != NULL)
klpd_freelist(&zone->zone_pfexecd);
id_free(zoneid_space, zone->zone_id);
mutex_destroy(&zone->zone_lock);
cv_destroy(&zone->zone_cv);
rw_destroy(&zone->zone_mlps.mlpl_rwlock);
rw_destroy(&zone->zone_mntfs_db_lock);
kmem_free(zone, sizeof (zone_t));
}
/*
* See block comment at the top of this file for information about zone
* status values.
*/
/*
* Convenience function for setting zone status.
*/
static void
zone_status_set(zone_t *zone, zone_status_t status)
{
nvlist_t *nvl = NULL;
ASSERT(MUTEX_HELD(&zone_status_lock));
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
status >= zone_status_get(zone));
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
zone_status_table[status]) ||
nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
zone_status_table[zone->zone_status]) ||
nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
#ifdef DEBUG
(void) printf(
"Failed to allocate and send zone state change event.\n");
#endif
}
nvlist_free(nvl);
zone->zone_status = status;
cv_broadcast(&zone->zone_cv);
}
/*
* Public function to retrieve the zone status. The zone status may
* change after it is retrieved.
*/
zone_status_t
zone_status_get(zone_t *zone)
{
return (zone->zone_status);
}
static int
zone_set_bootargs(zone_t *zone, const char *zone_bootargs)
{
char *buf = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
int err = 0;
ASSERT(zone != global_zone);
if ((err = copyinstr(zone_bootargs, buf, BOOTARGS_MAX, NULL)) != 0)
goto done; /* EFAULT or ENAMETOOLONG */
if (zone->zone_bootargs != NULL)
strfree(zone->zone_bootargs);
zone->zone_bootargs = strdup(buf);
done:
kmem_free(buf, BOOTARGS_MAX);
return (err);
}
static int
zone_set_brand(zone_t *zone, const char *brand)
{
struct brand_attr *attrp;
brand_t *bp;
attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP);
if (copyin(brand, attrp, sizeof (struct brand_attr)) != 0) {
kmem_free(attrp, sizeof (struct brand_attr));
return (EFAULT);
}
bp = brand_register_zone(attrp);
kmem_free(attrp, sizeof (struct brand_attr));
if (bp == NULL)
return (EINVAL);
/*
* This is the only place where a zone can change it's brand.
* We already need to hold zone_status_lock to check the zone
* status, so we'll just use that lock to serialize zone
* branding requests as well.
*/
mutex_enter(&zone_status_lock);
/* Re-Branding is not allowed and the zone can't be booted yet */
if ((ZONE_IS_BRANDED(zone)) ||
(zone_status_get(zone) >= ZONE_IS_BOOTING)) {
mutex_exit(&zone_status_lock);
brand_unregister_zone(bp);
return (EINVAL);
}
/* set up the brand specific data */
zone->zone_brand = bp;
ZBROP(zone)->b_init_brand_data(zone);
mutex_exit(&zone_status_lock);
return (0);
}
static int
zone_set_secflags(zone_t *zone, const psecflags_t *zone_secflags)
{
int err = 0;
psecflags_t psf;
ASSERT(zone != global_zone);
if ((err = copyin(zone_secflags, &psf, sizeof (psf))) != 0)
return (err);
if (zone_status_get(zone) > ZONE_IS_READY)
return (EINVAL);
if (!psecflags_validate(&psf))
return (EINVAL);
(void) memcpy(&zone->zone_secflags, &psf, sizeof (psf));
/* Set security flags on the zone's zsched */
(void) memcpy(&zone->zone_zsched->p_secflags, &zone->zone_secflags,
sizeof (zone->zone_zsched->p_secflags));
return (0);
}
static int
zone_set_fs_allowed(zone_t *zone, const char *zone_fs_allowed)
{
char *buf = kmem_zalloc(ZONE_FS_ALLOWED_MAX, KM_SLEEP);
int err = 0;
ASSERT(zone != global_zone);
if ((err = copyinstr(zone_fs_allowed, buf,
ZONE_FS_ALLOWED_MAX, NULL)) != 0)
goto done;
if (zone->zone_fs_allowed != NULL)
strfree(zone->zone_fs_allowed);
zone->zone_fs_allowed = strdup(buf);
done:
kmem_free(buf, ZONE_FS_ALLOWED_MAX);
return (err);
}
static int
zone_set_initname(zone_t *zone, const char *zone_initname)
{
char initname[INITNAME_SZ];
size_t len;
int err = 0;
ASSERT(zone != global_zone);
if ((err = copyinstr(zone_initname, initname, INITNAME_SZ, &len)) != 0)
return (err); /* EFAULT or ENAMETOOLONG */
if (zone->zone_initname != NULL)
strfree(zone->zone_initname);
zone->zone_initname = kmem_alloc(strlen(initname) + 1, KM_SLEEP);
(void) strcpy(zone->zone_initname, initname);
return (0);
}
static int
zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
{
uint64_t mcap;
int err = 0;
if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
zone->zone_phys_mcap = mcap;
return (err);
}
static int
zone_set_sched_class(zone_t *zone, const char *new_class)
{
char sched_class[PC_CLNMSZ];
id_t classid;
int err;
ASSERT(zone != global_zone);
if ((err = copyinstr(new_class, sched_class, PC_CLNMSZ, NULL)) != 0)
return (err); /* EFAULT or ENAMETOOLONG */
if (getcid(sched_class, &classid) != 0 || CLASS_KERNEL(classid))
return (set_errno(EINVAL));
zone->zone_defaultcid = classid;
ASSERT(zone->zone_defaultcid > 0 &&
zone->zone_defaultcid < loaded_classes);
return (0);
}
/*
* Block indefinitely waiting for (zone_status >= status)
*/
void
zone_status_wait(zone_t *zone, zone_status_t status)
{
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
cv_wait(&zone->zone_cv, &zone_status_lock);
}
mutex_exit(&zone_status_lock);
}
/*
* Private CPR-safe version of zone_status_wait().
*/
static void
zone_status_wait_cpr(zone_t *zone, zone_status_t status, char *str)
{
callb_cpr_t cprinfo;
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
CALLB_CPR_INIT(&cprinfo, &zone_status_lock, callb_generic_cpr,
str);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&zone->zone_cv, &zone_status_lock);
CALLB_CPR_SAFE_END(&cprinfo, &zone_status_lock);
}
/*
* zone_status_lock is implicitly released by the following.
*/
CALLB_CPR_EXIT(&cprinfo);
}
/*
* Block until zone enters requested state or signal is received. Return (0)
* if signaled, non-zero otherwise.
*/
int
zone_status_wait_sig(zone_t *zone, zone_status_t status)
{
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
if (!cv_wait_sig(&zone->zone_cv, &zone_status_lock)) {
mutex_exit(&zone_status_lock);
return (0);
}
}
mutex_exit(&zone_status_lock);
return (1);
}
/*
* Block until the zone enters the requested state or the timeout expires,
* whichever happens first. Return (-1) if operation timed out, time remaining
* otherwise.
*/
clock_t
zone_status_timedwait(zone_t *zone, clock_t tim, zone_status_t status)
{
clock_t timeleft = 0;
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status && timeleft != -1) {
timeleft = cv_timedwait(&zone->zone_cv, &zone_status_lock, tim);
}
mutex_exit(&zone_status_lock);
return (timeleft);
}
/*
* Block until the zone enters the requested state, the current process is
* signaled, or the timeout expires, whichever happens first. Return (-1) if
* operation timed out, 0 if signaled, time remaining otherwise.
*/
clock_t
zone_status_timedwait_sig(zone_t *zone, clock_t tim, zone_status_t status)
{
clock_t timeleft = tim - ddi_get_lbolt();
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
timeleft = cv_timedwait_sig(&zone->zone_cv, &zone_status_lock,
tim);
if (timeleft <= 0)
break;
}
mutex_exit(&zone_status_lock);
return (timeleft);
}
/*
* Zones have two reference counts: one for references from credential
* structures (zone_cred_ref), and one (zone_ref) for everything else.
* This is so we can allow a zone to be rebooted while there are still
* outstanding cred references, since certain drivers cache dblks (which
* implicitly results in cached creds). We wait for zone_ref to drop to
* 0 (actually 1), but not zone_cred_ref. The zone structure itself is
* later freed when the zone_cred_ref drops to 0, though nothing other
* than the zone id and privilege set should be accessed once the zone
* is "dead".
*
* A debugging flag, zone_wait_for_cred, can be set to a non-zero value
* to force halt/reboot to block waiting for the zone_cred_ref to drop
* to 0. This can be useful to flush out other sources of cached creds
* that may be less innocuous than the driver case.
*
* Zones also provide a tracked reference counting mechanism in which zone
* references are represented by "crumbs" (zone_ref structures). Crumbs help
* debuggers determine the sources of leaked zone references. See
* zone_hold_ref() and zone_rele_ref() below for more information.
*/
int zone_wait_for_cred = 0;
static void
zone_hold_locked(zone_t *z)
{
ASSERT(MUTEX_HELD(&z->zone_lock));
z->zone_ref++;
ASSERT(z->zone_ref != 0);
}
/*
* Increment the specified zone's reference count. The zone's zone_t structure
* will not be freed as long as the zone's reference count is nonzero.
* Decrement the zone's reference count via zone_rele().
*
* NOTE: This function should only be used to hold zones for short periods of
* time. Use zone_hold_ref() if the zone must be held for a long time.
*/
void
zone_hold(zone_t *z)
{
mutex_enter(&z->zone_lock);
zone_hold_locked(z);
mutex_exit(&z->zone_lock);
}
/*
* If the non-cred ref count drops to 1 and either the cred ref count
* is 0 or we aren't waiting for cred references, the zone is ready to
* be destroyed.
*/
#define ZONE_IS_UNREF(zone) ((zone)->zone_ref == 1 && \
(!zone_wait_for_cred || (zone)->zone_cred_ref == 0))
/*
* Common zone reference release function invoked by zone_rele() and
* zone_rele_ref(). If subsys is ZONE_REF_NUM_SUBSYS, then the specified
* zone's subsystem-specific reference counters are not affected by the
* release. If ref is not NULL, then the zone_ref_t to which it refers is
* removed from the specified zone's reference list. ref must be non-NULL iff
* subsys is not ZONE_REF_NUM_SUBSYS.
*/
static void
zone_rele_common(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys)
{
boolean_t wakeup;
mutex_enter(&z->zone_lock);
ASSERT(z->zone_ref != 0);
z->zone_ref--;
if (subsys != ZONE_REF_NUM_SUBSYS) {
ASSERT(z->zone_subsys_ref[subsys] != 0);
z->zone_subsys_ref[subsys]--;
list_remove(&z->zone_ref_list, ref);
}
if (z->zone_ref == 0 && z->zone_cred_ref == 0) {
/* no more refs, free the structure */
mutex_exit(&z->zone_lock);
zone_free(z);
return;
}
/* signal zone_destroy so the zone can finish halting */
wakeup = (ZONE_IS_UNREF(z) && zone_status_get(z) >= ZONE_IS_DEAD);
mutex_exit(&z->zone_lock);
if (wakeup) {
/*
* Grabbing zonehash_lock here effectively synchronizes with
* zone_destroy() to avoid missed signals.
*/
mutex_enter(&zonehash_lock);
cv_broadcast(&zone_destroy_cv);
mutex_exit(&zonehash_lock);
}
}
/*
* Decrement the specified zone's reference count. The specified zone will
* cease to exist after this function returns if the reference count drops to
* zero. This function should be paired with zone_hold().
*/
void
zone_rele(zone_t *z)
{
zone_rele_common(z, NULL, ZONE_REF_NUM_SUBSYS);
}
/*
* Initialize a zone reference structure. This function must be invoked for
* a reference structure before the structure is passed to zone_hold_ref().
*/
void
zone_init_ref(zone_ref_t *ref)
{
ref->zref_zone = NULL;
list_link_init(&ref->zref_linkage);
}
/*
* Acquire a reference to zone z. The caller must specify the
* zone_ref_subsys_t constant associated with its subsystem. The specified
* zone_ref_t structure will represent a reference to the specified zone. Use
* zone_rele_ref() to release the reference.
*
* The referenced zone_t structure will not be freed as long as the zone_t's
* zone_status field is not ZONE_IS_DEAD and the zone has outstanding
* references.
*
* NOTE: The zone_ref_t structure must be initialized before it is used.
* See zone_init_ref() above.
*/