stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
ahl | ac44896 | 2006-03-30 01:12:03 -0800 | [diff] [blame] | 21 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 22 | /* |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. |
John Levon | ab61854 | 2018-10-08 15:34:11 +0100 | [diff] [blame^] | 24 | * Copyright (c) 2018, Joyent, Inc. |
Matthew Ahrens | 82d86f4 | 2014-04-22 09:36:35 -0800 | [diff] [blame] | 25 | * Copyright (c) 2012, 2014 by Delphix. All rights reserved. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 26 | */ |
| 27 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 28 | /* |
| 29 | * DTrace - Dynamic Tracing for Solaris |
| 30 | * |
| 31 | * This is the implementation of the Solaris Dynamic Tracing framework |
| 32 | * (DTrace). The user-visible interface to DTrace is described at length in |
| 33 | * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace |
| 34 | * library, the in-kernel DTrace framework, and the DTrace providers are |
| 35 | * described in the block comments in the <sys/dtrace.h> header file. The |
| 36 | * internal architecture of DTrace is described in the block comments in the |
| 37 | * <sys/dtrace_impl.h> header file. The comments contained within the DTrace |
| 38 | * implementation very much assume mastery of all of these sources; if one has |
| 39 | * an unanswered question about the implementation, one should consult them |
| 40 | * first. |
| 41 | * |
| 42 | * The functions here are ordered roughly as follows: |
| 43 | * |
| 44 | * - Probe context functions |
| 45 | * - Probe hashing functions |
| 46 | * - Non-probe context utility functions |
| 47 | * - Matching functions |
| 48 | * - Provider-to-Framework API functions |
| 49 | * - Probe management functions |
| 50 | * - DIF object functions |
| 51 | * - Format functions |
| 52 | * - Predicate functions |
| 53 | * - ECB functions |
| 54 | * - Buffer functions |
| 55 | * - Enabling functions |
| 56 | * - DOF functions |
| 57 | * - Anonymous enabling functions |
| 58 | * - Consumer state functions |
| 59 | * - Helper functions |
| 60 | * - Hook functions |
| 61 | * - Driver cookbook functions |
| 62 | * |
| 63 | * Each group of functions begins with a block comment labelled the "DTrace |
| 64 | * [Group] Functions", allowing one to find each block by searching forward |
| 65 | * on capital-f functions. |
| 66 | */ |
| 67 | #include <sys/errno.h> |
| 68 | #include <sys/stat.h> |
| 69 | #include <sys/modctl.h> |
| 70 | #include <sys/conf.h> |
| 71 | #include <sys/systm.h> |
| 72 | #include <sys/ddi.h> |
| 73 | #include <sys/sunddi.h> |
| 74 | #include <sys/cpuvar.h> |
| 75 | #include <sys/kmem.h> |
| 76 | #include <sys/strsubr.h> |
| 77 | #include <sys/sysmacros.h> |
| 78 | #include <sys/dtrace_impl.h> |
| 79 | #include <sys/atomic.h> |
| 80 | #include <sys/cmn_err.h> |
| 81 | #include <sys/mutex_impl.h> |
| 82 | #include <sys/rwlock_impl.h> |
| 83 | #include <sys/ctf_api.h> |
| 84 | #include <sys/panic.h> |
| 85 | #include <sys/priv_impl.h> |
| 86 | #include <sys/policy.h> |
| 87 | #include <sys/cred_impl.h> |
| 88 | #include <sys/procfs_isa.h> |
| 89 | #include <sys/taskq.h> |
| 90 | #include <sys/mkdev.h> |
| 91 | #include <sys/kdi.h> |
| 92 | #include <sys/zone.h> |
brendan | 4edabff | 2007-05-22 10:56:13 -0700 | [diff] [blame] | 93 | #include <sys/socket.h> |
| 94 | #include <netinet/in.h> |
Joshua M. Clulow | f497f9f | 2012-11-27 08:55:56 +0000 | [diff] [blame] | 95 | #include "strtolctype.h" |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 96 | |
| 97 | /* |
| 98 | * DTrace Tunable Variables |
| 99 | * |
| 100 | * The following variables may be tuned by adding a line to /etc/system that |
| 101 | * includes both the name of the DTrace module ("dtrace") and the name of the |
| 102 | * variable. For example: |
| 103 | * |
| 104 | * set dtrace:dtrace_destructive_disallow = 1 |
| 105 | * |
| 106 | * In general, the only variables that one should be tuning this way are those |
| 107 | * that affect system-wide DTrace behavior, and for which the default behavior |
| 108 | * is undesirable. Most of these variables are tunable on a per-consumer |
| 109 | * basis using DTrace options, and need not be tuned on a system-wide basis. |
| 110 | * When tuning these variables, avoid pathological values; while some attempt |
| 111 | * is made to verify the integrity of these variables, they are not considered |
| 112 | * part of the supported interface to DTrace, and they are therefore not |
| 113 | * checked comprehensively. Further, these variables should not be tuned |
| 114 | * dynamically via "mdb -kw" or other means; they should only be tuned via |
| 115 | * /etc/system. |
| 116 | */ |
| 117 | int dtrace_destructive_disallow = 0; |
| 118 | dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); |
| 119 | size_t dtrace_difo_maxsize = (256 * 1024); |
Bryan Cantrill | d339a29 | 2012-09-18 00:50:06 +0000 | [diff] [blame] | 120 | dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024); |
Bryan Cantrill | 395c7a3 | 2015-09-29 12:15:25 -0700 | [diff] [blame] | 121 | size_t dtrace_statvar_maxsize = (16 * 1024); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 122 | size_t dtrace_actions_max = (16 * 1024); |
| 123 | size_t dtrace_retain_max = 1024; |
Dave Pacheco | 36d0f9d | 2012-03-30 05:41:25 -0700 | [diff] [blame] | 124 | dtrace_optval_t dtrace_helper_actions_max = 1024; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 125 | dtrace_optval_t dtrace_helper_providers_max = 32; |
| 126 | dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); |
| 127 | size_t dtrace_strsize_default = 256; |
| 128 | dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ |
| 129 | dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ |
| 130 | dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ |
| 131 | dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ |
| 132 | dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ |
| 133 | dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ |
| 134 | dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ |
| 135 | dtrace_optval_t dtrace_nspec_default = 1; |
| 136 | dtrace_optval_t dtrace_specsize_default = 32 * 1024; |
| 137 | dtrace_optval_t dtrace_stackframes_default = 20; |
| 138 | dtrace_optval_t dtrace_ustackframes_default = 20; |
| 139 | dtrace_optval_t dtrace_jstackframes_default = 50; |
| 140 | dtrace_optval_t dtrace_jstackstrsize_default = 512; |
| 141 | int dtrace_msgdsize_max = 128; |
Josef 'Jeff' Sipek | 1944925 | 2014-04-29 13:05:25 -0400 | [diff] [blame] | 142 | hrtime_t dtrace_chill_max = MSEC2NSEC(500); /* 500 ms */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 143 | hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ |
| 144 | int dtrace_devdepth_max = 32; |
| 145 | int dtrace_err_verbose; |
| 146 | hrtime_t dtrace_deadman_interval = NANOSEC; |
| 147 | hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; |
| 148 | hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; |
Bryan Cantrill | f484800 | 2011-07-01 22:35:53 -0700 | [diff] [blame] | 149 | hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 150 | |
| 151 | /* |
| 152 | * DTrace External Variables |
| 153 | * |
| 154 | * As dtrace(7D) is a kernel module, any DTrace variables are obviously |
| 155 | * available to DTrace consumers via the backtick (`) syntax. One of these, |
| 156 | * dtrace_zero, is made deliberately so: it is provided as a source of |
| 157 | * well-known, zero-filled memory. While this variable is not documented, |
| 158 | * it is used by some translators as an implementation detail. |
| 159 | */ |
| 160 | const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ |
| 161 | |
| 162 | /* |
| 163 | * DTrace Internal Variables |
| 164 | */ |
| 165 | static dev_info_t *dtrace_devi; /* device info */ |
| 166 | static vmem_t *dtrace_arena; /* probe ID arena */ |
| 167 | static vmem_t *dtrace_minor; /* minor number arena */ |
| 168 | static taskq_t *dtrace_taskq; /* task queue */ |
| 169 | static dtrace_probe_t **dtrace_probes; /* array of all probes */ |
| 170 | static int dtrace_nprobes; /* number of probes */ |
| 171 | static dtrace_provider_t *dtrace_provider; /* provider list */ |
| 172 | static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ |
| 173 | static int dtrace_opens; /* number of opens */ |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 174 | static int dtrace_helpers; /* number of helpers */ |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 175 | static int dtrace_getf; /* number of unpriv getf()s */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 176 | static void *dtrace_softstate; /* softstate pointer */ |
| 177 | static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ |
| 178 | static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ |
| 179 | static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ |
| 180 | static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ |
| 181 | static int dtrace_toxranges; /* number of toxic ranges */ |
| 182 | static int dtrace_toxranges_max; /* size of toxic range array */ |
| 183 | static dtrace_anon_t dtrace_anon; /* anonymous enabling */ |
| 184 | static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ |
| 185 | static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ |
| 186 | static kthread_t *dtrace_panicked; /* panicking thread */ |
| 187 | static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 188 | static dtrace_genid_t dtrace_probegen; /* current probe generation */ |
| 189 | static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ |
| 190 | static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ |
Jonathan Haslam | ef5bb02 | 2008-09-15 10:31:28 +0100 | [diff] [blame] | 191 | static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 192 | static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 193 | static int dtrace_dynvar_failclean; /* dynvars failed to clean */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 194 | |
| 195 | /* |
| 196 | * DTrace Locking |
| 197 | * DTrace is protected by three (relatively coarse-grained) locks: |
| 198 | * |
| 199 | * (1) dtrace_lock is required to manipulate essentially any DTrace state, |
| 200 | * including enabling state, probes, ECBs, consumer state, helper state, |
| 201 | * etc. Importantly, dtrace_lock is _not_ required when in probe context; |
| 202 | * probe context is lock-free -- synchronization is handled via the |
| 203 | * dtrace_sync() cross call mechanism. |
| 204 | * |
| 205 | * (2) dtrace_provider_lock is required when manipulating provider state, or |
| 206 | * when provider state must be held constant. |
| 207 | * |
| 208 | * (3) dtrace_meta_lock is required when manipulating meta provider state, or |
| 209 | * when meta provider state must be held constant. |
| 210 | * |
| 211 | * The lock ordering between these three locks is dtrace_meta_lock before |
| 212 | * dtrace_provider_lock before dtrace_lock. (In particular, there are |
| 213 | * several places where dtrace_provider_lock is held by the framework as it |
| 214 | * calls into the providers -- which then call back into the framework, |
| 215 | * grabbing dtrace_lock.) |
| 216 | * |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 217 | * There are two other locks in the mix: mod_lock and cpu_lock. With respect |
| 218 | * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical |
| 219 | * role as a coarse-grained lock; it is acquired before both of these locks. |
| 220 | * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must |
| 221 | * be acquired _between_ dtrace_meta_lock and any other DTrace locks. |
| 222 | * mod_lock is similar with respect to dtrace_provider_lock in that it must be |
| 223 | * acquired _between_ dtrace_provider_lock and dtrace_lock. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 224 | */ |
| 225 | static kmutex_t dtrace_lock; /* probe state lock */ |
| 226 | static kmutex_t dtrace_provider_lock; /* provider state lock */ |
| 227 | static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ |
| 228 | |
| 229 | /* |
| 230 | * DTrace Provider Variables |
| 231 | * |
| 232 | * These are the variables relating to DTrace as a provider (that is, the |
| 233 | * provider of the BEGIN, END, and ERROR probes). |
| 234 | */ |
| 235 | static dtrace_pattr_t dtrace_provider_attr = { |
| 236 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, |
| 237 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, |
| 238 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, |
| 239 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, |
| 240 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, |
| 241 | }; |
| 242 | |
| 243 | static void |
| 244 | dtrace_nullop(void) |
| 245 | {} |
| 246 | |
Jonathan Haslam | b9e93c1 | 2009-02-13 07:13:13 +0000 | [diff] [blame] | 247 | static int |
| 248 | dtrace_enable_nullop(void) |
| 249 | { |
| 250 | return (0); |
| 251 | } |
| 252 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 253 | static dtrace_pops_t dtrace_provider_ops = { |
| 254 | (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, |
| 255 | (void (*)(void *, struct modctl *))dtrace_nullop, |
Jonathan Haslam | b9e93c1 | 2009-02-13 07:13:13 +0000 | [diff] [blame] | 256 | (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 257 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, |
| 258 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, |
| 259 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, |
| 260 | NULL, |
| 261 | NULL, |
| 262 | NULL, |
| 263 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop |
| 264 | }; |
| 265 | |
| 266 | static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ |
| 267 | static dtrace_id_t dtrace_probeid_end; /* special END probe */ |
| 268 | dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ |
| 269 | |
| 270 | /* |
| 271 | * DTrace Helper Tracing Variables |
Bryan Cantrill | 902686d | 2012-09-05 07:23:44 +0000 | [diff] [blame] | 272 | * |
| 273 | * These variables should be set dynamically to enable helper tracing. The |
| 274 | * only variables that should be set are dtrace_helptrace_enable (which should |
| 275 | * be set to a non-zero value to allocate helper tracing buffers on the next |
| 276 | * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a |
| 277 | * non-zero value to deallocate helper tracing buffers on the next close of |
| 278 | * /dev/dtrace). When (and only when) helper tracing is disabled, the |
| 279 | * buffer size may also be set via dtrace_helptrace_bufsize. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 280 | */ |
Bryan Cantrill | 902686d | 2012-09-05 07:23:44 +0000 | [diff] [blame] | 281 | int dtrace_helptrace_enable = 0; |
| 282 | int dtrace_helptrace_disable = 0; |
| 283 | int dtrace_helptrace_bufsize = 16 * 1024 * 1024; |
| 284 | uint32_t dtrace_helptrace_nlocals; |
| 285 | static dtrace_helptrace_t *dtrace_helptrace_buffer; |
| 286 | static uint32_t dtrace_helptrace_next = 0; |
| 287 | static int dtrace_helptrace_wrapped = 0; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 288 | |
| 289 | /* |
| 290 | * DTrace Error Hashing |
| 291 | * |
| 292 | * On DEBUG kernels, DTrace will track the errors that has seen in a hash |
| 293 | * table. This is very useful for checking coverage of tests that are |
| 294 | * expected to induce DIF or DOF processing errors, and may be useful for |
| 295 | * debugging problems in the DIF code generator or in DOF generation . The |
| 296 | * error hash may be examined with the ::dtrace_errhash MDB dcmd. |
| 297 | */ |
| 298 | #ifdef DEBUG |
| 299 | static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; |
| 300 | static const char *dtrace_errlast; |
| 301 | static kthread_t *dtrace_errthread; |
| 302 | static kmutex_t dtrace_errlock; |
| 303 | #endif |
| 304 | |
| 305 | /* |
| 306 | * DTrace Macros and Constants |
| 307 | * |
| 308 | * These are various macros that are useful in various spots in the |
| 309 | * implementation, along with a few random constants that have no meaning |
| 310 | * outside of the implementation. There is no real structure to this cpp |
| 311 | * mishmash -- but is there ever? |
| 312 | */ |
| 313 | #define DTRACE_HASHSTR(hash, probe) \ |
| 314 | dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) |
| 315 | |
| 316 | #define DTRACE_HASHNEXT(hash, probe) \ |
| 317 | (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) |
| 318 | |
| 319 | #define DTRACE_HASHPREV(hash, probe) \ |
| 320 | (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) |
| 321 | |
| 322 | #define DTRACE_HASHEQ(hash, lhs, rhs) \ |
| 323 | (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ |
| 324 | *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) |
| 325 | |
| 326 | #define DTRACE_AGGHASHSIZE_SLEW 17 |
| 327 | |
brendan | 4edabff | 2007-05-22 10:56:13 -0700 | [diff] [blame] | 328 | #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) |
| 329 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 330 | /* |
| 331 | * The key for a thread-local variable consists of the lower 61 bits of the |
| 332 | * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. |
| 333 | * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never |
| 334 | * equal to a variable identifier. This is necessary (but not sufficient) to |
| 335 | * assure that global associative arrays never collide with thread-local |
| 336 | * variables. To guarantee that they cannot collide, we must also define the |
| 337 | * order for keying dynamic variables. That order is: |
| 338 | * |
| 339 | * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] |
| 340 | * |
| 341 | * Because the variable-key and the tls-key are in orthogonal spaces, there is |
| 342 | * no way for a global variable key signature to match a thread-local key |
| 343 | * signature. |
| 344 | */ |
| 345 | #define DTRACE_TLS_THRKEY(where) { \ |
| 346 | uint_t intr = 0; \ |
| 347 | uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ |
| 348 | for (; actv; actv >>= 1) \ |
| 349 | intr++; \ |
| 350 | ASSERT(intr < (1 << 3)); \ |
| 351 | (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ |
| 352 | (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ |
| 353 | } |
| 354 | |
ahl | 2b6e762 | 2006-09-19 16:29:20 -0700 | [diff] [blame] | 355 | #define DT_BSWAP_8(x) ((x) & 0xff) |
| 356 | #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) |
| 357 | #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) |
| 358 | #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) |
| 359 | |
jhaslam | 6e0bee7 | 2008-02-07 06:05:33 -0800 | [diff] [blame] | 360 | #define DT_MASK_LO 0x00000000FFFFFFFFULL |
| 361 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 362 | #define DTRACE_STORE(type, tomax, offset, what) \ |
| 363 | *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); |
| 364 | |
Sebastien Roy | c93cc65 | 2012-12-28 12:32:23 -0800 | [diff] [blame] | 365 | #ifndef __x86 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 366 | #define DTRACE_ALIGNCHECK(addr, size, flags) \ |
| 367 | if (addr & (size - 1)) { \ |
| 368 | *flags |= CPU_DTRACE_BADALIGN; \ |
| 369 | cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ |
| 370 | return (0); \ |
| 371 | } |
| 372 | #else |
| 373 | #define DTRACE_ALIGNCHECK(addr, size, flags) |
| 374 | #endif |
| 375 | |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 376 | /* |
| 377 | * Test whether a range of memory starting at testaddr of size testsz falls |
dp | 9de6b71 | 2006-10-13 18:01:44 -0700 | [diff] [blame] | 378 | * within the range of memory described by addr, sz. We take care to avoid |
| 379 | * problems with overflow and underflow of the unsigned quantities, and |
| 380 | * disallow all negative sizes. Ranges of size 0 are allowed. |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 381 | */ |
| 382 | #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 383 | ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \ |
| 384 | (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \ |
dp | 9de6b71 | 2006-10-13 18:01:44 -0700 | [diff] [blame] | 385 | (testaddr) + (testsz) >= (testaddr)) |
| 386 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 387 | #define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ |
| 388 | do { \ |
| 389 | if ((remp) != NULL) { \ |
| 390 | *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ |
| 391 | } \ |
| 392 | _NOTE(CONSTCOND) } while (0) |
| 393 | |
| 394 | |
dp | 9de6b71 | 2006-10-13 18:01:44 -0700 | [diff] [blame] | 395 | /* |
| 396 | * Test whether alloc_sz bytes will fit in the scratch region. We isolate |
| 397 | * alloc_sz on the righthand side of the comparison in order to avoid overflow |
| 398 | * or underflow in the comparison with it. This is simpler than the INRANGE |
| 399 | * check above, because we know that the dtms_scratch_ptr is valid in the |
| 400 | * range. Allocations of size zero are allowed. |
| 401 | */ |
| 402 | #define DTRACE_INSCRATCH(mstate, alloc_sz) \ |
| 403 | ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ |
| 404 | (mstate)->dtms_scratch_ptr >= (alloc_sz)) |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 405 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 406 | #define DTRACE_LOADFUNC(bits) \ |
| 407 | /*CSTYLED*/ \ |
| 408 | uint##bits##_t \ |
| 409 | dtrace_load##bits(uintptr_t addr) \ |
| 410 | { \ |
| 411 | size_t size = bits / NBBY; \ |
| 412 | /*CSTYLED*/ \ |
| 413 | uint##bits##_t rval; \ |
| 414 | int i; \ |
| 415 | volatile uint16_t *flags = (volatile uint16_t *) \ |
| 416 | &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ |
| 417 | \ |
| 418 | DTRACE_ALIGNCHECK(addr, size, flags); \ |
| 419 | \ |
| 420 | for (i = 0; i < dtrace_toxranges; i++) { \ |
| 421 | if (addr >= dtrace_toxrange[i].dtt_limit) \ |
| 422 | continue; \ |
| 423 | \ |
| 424 | if (addr + size <= dtrace_toxrange[i].dtt_base) \ |
| 425 | continue; \ |
| 426 | \ |
| 427 | /* \ |
| 428 | * This address falls within a toxic region; return 0. \ |
| 429 | */ \ |
| 430 | *flags |= CPU_DTRACE_BADADDR; \ |
| 431 | cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ |
| 432 | return (0); \ |
| 433 | } \ |
| 434 | \ |
| 435 | *flags |= CPU_DTRACE_NOFAULT; \ |
| 436 | /*CSTYLED*/ \ |
| 437 | rval = *((volatile uint##bits##_t *)addr); \ |
| 438 | *flags &= ~CPU_DTRACE_NOFAULT; \ |
| 439 | \ |
bmc | 9eea9bb | 2006-11-02 12:46:05 -0800 | [diff] [blame] | 440 | return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 441 | } |
| 442 | |
| 443 | #ifdef _LP64 |
| 444 | #define dtrace_loadptr dtrace_load64 |
| 445 | #else |
| 446 | #define dtrace_loadptr dtrace_load32 |
| 447 | #endif |
| 448 | |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 449 | #define DTRACE_DYNHASH_FREE 0 |
| 450 | #define DTRACE_DYNHASH_SINK 1 |
| 451 | #define DTRACE_DYNHASH_VALID 2 |
| 452 | |
Jonathan Haslam | b9e93c1 | 2009-02-13 07:13:13 +0000 | [diff] [blame] | 453 | #define DTRACE_MATCH_FAIL -1 |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 454 | #define DTRACE_MATCH_NEXT 0 |
| 455 | #define DTRACE_MATCH_DONE 1 |
| 456 | #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') |
| 457 | #define DTRACE_STATE_ALIGN 64 |
| 458 | |
bmc | 187eccf | 2005-09-06 21:35:51 -0700 | [diff] [blame] | 459 | #define DTRACE_FLAGS2FLT(flags) \ |
| 460 | (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ |
| 461 | ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ |
| 462 | ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ |
| 463 | ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ |
| 464 | ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ |
| 465 | ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ |
| 466 | ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ |
| 467 | ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ |
jhaslam | b8fac8e | 2007-02-20 05:32:53 -0800 | [diff] [blame] | 468 | ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ |
bmc | 187eccf | 2005-09-06 21:35:51 -0700 | [diff] [blame] | 469 | DTRACEFLT_UNKNOWN) |
| 470 | |
bmc | 30ef842 | 2005-11-29 12:13:13 -0800 | [diff] [blame] | 471 | #define DTRACEACT_ISSTRING(act) \ |
| 472 | ((act)->dta_kind == DTRACEACT_DIFEXPR && \ |
| 473 | (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) |
| 474 | |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 475 | static size_t dtrace_strlen(const char *, size_t); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 476 | static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); |
| 477 | static void dtrace_enabling_provide(dtrace_provider_t *); |
| 478 | static int dtrace_enabling_match(dtrace_enabling_t *, int *); |
| 479 | static void dtrace_enabling_matchall(void); |
Bryan Cantrill | f484800 | 2011-07-01 22:35:53 -0700 | [diff] [blame] | 480 | static void dtrace_enabling_reap(void); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 481 | static dtrace_state_t *dtrace_anon_grab(void); |
| 482 | static uint64_t dtrace_helper(int, dtrace_mstate_t *, |
| 483 | dtrace_state_t *, uint64_t, uint64_t); |
| 484 | static dtrace_helpers_t *dtrace_helpers_create(proc_t *); |
| 485 | static void dtrace_buffer_drop(dtrace_buffer_t *); |
Bryan Cantrill | f484800 | 2011-07-01 22:35:53 -0700 | [diff] [blame] | 486 | static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 487 | static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, |
| 488 | dtrace_state_t *, dtrace_mstate_t *); |
| 489 | static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, |
| 490 | dtrace_optval_t); |
| 491 | static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); |
ahl | f498645 | 2006-06-12 13:43:23 -0700 | [diff] [blame] | 492 | static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 493 | static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *); |
| 494 | static void dtrace_getf_barrier(void); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 495 | static int dtrace_canload_remains(uint64_t, size_t, size_t *, |
| 496 | dtrace_mstate_t *, dtrace_vstate_t *); |
| 497 | static int dtrace_canstore_remains(uint64_t, size_t, size_t *, |
| 498 | dtrace_mstate_t *, dtrace_vstate_t *); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 499 | |
| 500 | /* |
| 501 | * DTrace Probe Context Functions |
| 502 | * |
| 503 | * These functions are called from probe context. Because probe context is |
| 504 | * any context in which C may be called, arbitrarily locks may be held, |
| 505 | * interrupts may be disabled, we may be in arbitrary dispatched state, etc. |
| 506 | * As a result, functions called from probe context may only call other DTrace |
| 507 | * support functions -- they may not interact at all with the system at large. |
| 508 | * (Note that the ASSERT macro is made probe-context safe by redefining it in |
| 509 | * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary |
| 510 | * loads are to be performed from probe context, they _must_ be in terms of |
| 511 | * the safe dtrace_load*() variants. |
| 512 | * |
| 513 | * Some functions in this block are not actually called from probe context; |
| 514 | * for these functions, there will be a comment above the function reading |
| 515 | * "Note: not called from probe context." |
| 516 | */ |
| 517 | void |
| 518 | dtrace_panic(const char *format, ...) |
| 519 | { |
| 520 | va_list alist; |
| 521 | |
| 522 | va_start(alist, format); |
| 523 | dtrace_vpanic(format, alist); |
| 524 | va_end(alist); |
| 525 | } |
| 526 | |
| 527 | int |
| 528 | dtrace_assfail(const char *a, const char *f, int l) |
| 529 | { |
| 530 | dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); |
| 531 | |
| 532 | /* |
| 533 | * We just need something here that even the most clever compiler |
| 534 | * cannot optimize away. |
| 535 | */ |
| 536 | return (a[(uintptr_t)f]); |
| 537 | } |
| 538 | |
| 539 | /* |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 540 | * Atomically increment a specified error counter from probe context. |
| 541 | */ |
| 542 | static void |
| 543 | dtrace_error(uint32_t *counter) |
| 544 | { |
| 545 | /* |
| 546 | * Most counters stored to in probe context are per-CPU counters. |
| 547 | * However, there are some error conditions that are sufficiently |
| 548 | * arcane that they don't merit per-CPU storage. If these counters |
| 549 | * are incremented concurrently on different CPUs, scalability will be |
| 550 | * adversely affected -- but we don't expect them to be white-hot in a |
| 551 | * correctly constructed enabling... |
| 552 | */ |
| 553 | uint32_t oval, nval; |
| 554 | |
| 555 | do { |
| 556 | oval = *counter; |
| 557 | |
| 558 | if ((nval = oval + 1) == 0) { |
| 559 | /* |
| 560 | * If the counter would wrap, set it to 1 -- assuring |
| 561 | * that the counter is never zero when we have seen |
| 562 | * errors. (The counter must be 32-bits because we |
| 563 | * aren't guaranteed a 64-bit compare&swap operation.) |
| 564 | * To save this code both the infamy of being fingered |
| 565 | * by a priggish news story and the indignity of being |
| 566 | * the target of a neo-puritan witch trial, we're |
| 567 | * carefully avoiding any colorful description of the |
| 568 | * likelihood of this condition -- but suffice it to |
| 569 | * say that it is only slightly more likely than the |
| 570 | * overflow of predicate cache IDs, as discussed in |
| 571 | * dtrace_predicate_create(). |
| 572 | */ |
| 573 | nval = 1; |
| 574 | } |
| 575 | } while (dtrace_cas32(counter, oval, nval) != oval); |
| 576 | } |
| 577 | |
| 578 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 579 | * Use the DTRACE_LOADFUNC macro to define functions for each of loading a |
| 580 | * uint8_t, a uint16_t, a uint32_t and a uint64_t. |
| 581 | */ |
Bryan Cantrill | 1c0cef6 | 2016-02-04 07:26:03 +0000 | [diff] [blame] | 582 | /* BEGIN CSTYLED */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 583 | DTRACE_LOADFUNC(8) |
| 584 | DTRACE_LOADFUNC(16) |
| 585 | DTRACE_LOADFUNC(32) |
| 586 | DTRACE_LOADFUNC(64) |
Bryan Cantrill | 1c0cef6 | 2016-02-04 07:26:03 +0000 | [diff] [blame] | 587 | /* END CSTYLED */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 588 | |
| 589 | static int |
| 590 | dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) |
| 591 | { |
| 592 | if (dest < mstate->dtms_scratch_base) |
| 593 | return (0); |
| 594 | |
| 595 | if (dest + size < dest) |
| 596 | return (0); |
| 597 | |
| 598 | if (dest + size > mstate->dtms_scratch_ptr) |
| 599 | return (0); |
| 600 | |
| 601 | return (1); |
| 602 | } |
| 603 | |
| 604 | static int |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 605 | dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 606 | dtrace_statvar_t **svars, int nsvars) |
| 607 | { |
| 608 | int i; |
Bryan Cantrill | 395c7a3 | 2015-09-29 12:15:25 -0700 | [diff] [blame] | 609 | size_t maxglobalsize, maxlocalsize; |
| 610 | |
| 611 | if (nsvars == 0) |
| 612 | return (0); |
| 613 | |
Bryan Cantrill | d65f2bb | 2016-03-03 00:17:09 +0000 | [diff] [blame] | 614 | maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); |
| 615 | maxlocalsize = maxglobalsize * NCPU; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 616 | |
| 617 | for (i = 0; i < nsvars; i++) { |
| 618 | dtrace_statvar_t *svar = svars[i]; |
Bryan Cantrill | 395c7a3 | 2015-09-29 12:15:25 -0700 | [diff] [blame] | 619 | uint8_t scope; |
| 620 | size_t size; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 621 | |
Bryan Cantrill | 395c7a3 | 2015-09-29 12:15:25 -0700 | [diff] [blame] | 622 | if (svar == NULL || (size = svar->dtsv_size) == 0) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 623 | continue; |
| 624 | |
Bryan Cantrill | 395c7a3 | 2015-09-29 12:15:25 -0700 | [diff] [blame] | 625 | scope = svar->dtsv_var.dtdv_scope; |
| 626 | |
| 627 | /* |
| 628 | * We verify that our size is valid in the spirit of providing |
| 629 | * defense in depth: we want to prevent attackers from using |
| 630 | * DTrace to escalate an orthogonal kernel heap corruption bug |
| 631 | * into the ability to store to arbitrary locations in memory. |
| 632 | */ |
Bryan Cantrill | d65f2bb | 2016-03-03 00:17:09 +0000 | [diff] [blame] | 633 | VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || |
| 634 | (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); |
Bryan Cantrill | 395c7a3 | 2015-09-29 12:15:25 -0700 | [diff] [blame] | 635 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 636 | if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, |
| 637 | svar->dtsv_size)) { |
| 638 | DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, |
| 639 | svar->dtsv_size); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 640 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 641 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 642 | } |
| 643 | |
| 644 | return (0); |
| 645 | } |
| 646 | |
| 647 | /* |
| 648 | * Check to see if the address is within a memory region to which a store may |
| 649 | * be issued. This includes the DTrace scratch areas, and any DTrace variable |
| 650 | * region. The caller of dtrace_canstore() is responsible for performing any |
| 651 | * alignment checks that are needed before stores are actually executed. |
| 652 | */ |
| 653 | static int |
| 654 | dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, |
| 655 | dtrace_vstate_t *vstate) |
| 656 | { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 657 | return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); |
| 658 | } |
| 659 | |
| 660 | /* |
| 661 | * Implementation of dtrace_canstore which communicates the upper bound of the |
| 662 | * allowed memory region. |
| 663 | */ |
| 664 | static int |
| 665 | dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, |
| 666 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) |
| 667 | { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 668 | /* |
| 669 | * First, check to see if the address is in scratch space... |
| 670 | */ |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 671 | if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 672 | mstate->dtms_scratch_size)) { |
| 673 | DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, |
| 674 | mstate->dtms_scratch_size); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 675 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 676 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 677 | |
| 678 | /* |
| 679 | * Now check to see if it's a dynamic variable. This check will pick |
| 680 | * up both thread-local variables and any global dynamically-allocated |
| 681 | * variables. |
| 682 | */ |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 683 | if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base, |
jhaslam | fbcb7db | 2007-07-19 02:34:11 -0700 | [diff] [blame] | 684 | vstate->dtvs_dynvars.dtds_size)) { |
| 685 | dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; |
| 686 | uintptr_t base = (uintptr_t)dstate->dtds_base + |
| 687 | (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); |
| 688 | uintptr_t chunkoffs; |
Bryan Cantrill | 1c0cef6 | 2016-02-04 07:26:03 +0000 | [diff] [blame] | 689 | dtrace_dynvar_t *dvar; |
jhaslam | fbcb7db | 2007-07-19 02:34:11 -0700 | [diff] [blame] | 690 | |
| 691 | /* |
| 692 | * Before we assume that we can store here, we need to make |
| 693 | * sure that it isn't in our metadata -- storing to our |
| 694 | * dynamic variable metadata would corrupt our state. For |
| 695 | * the range to not include any dynamic variable metadata, |
| 696 | * it must: |
| 697 | * |
| 698 | * (1) Start above the hash table that is at the base of |
| 699 | * the dynamic variable space |
| 700 | * |
| 701 | * (2) Have a starting chunk offset that is beyond the |
| 702 | * dtrace_dynvar_t that is at the base of every chunk |
| 703 | * |
| 704 | * (3) Not span a chunk boundary |
| 705 | * |
Bryan Cantrill | 1c0cef6 | 2016-02-04 07:26:03 +0000 | [diff] [blame] | 706 | * (4) Not be in the tuple space of a dynamic variable |
| 707 | * |
jhaslam | fbcb7db | 2007-07-19 02:34:11 -0700 | [diff] [blame] | 708 | */ |
| 709 | if (addr < base) |
| 710 | return (0); |
| 711 | |
| 712 | chunkoffs = (addr - base) % dstate->dtds_chunksize; |
| 713 | |
| 714 | if (chunkoffs < sizeof (dtrace_dynvar_t)) |
| 715 | return (0); |
| 716 | |
| 717 | if (chunkoffs + sz > dstate->dtds_chunksize) |
| 718 | return (0); |
| 719 | |
Bryan Cantrill | 1c0cef6 | 2016-02-04 07:26:03 +0000 | [diff] [blame] | 720 | dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); |
| 721 | |
| 722 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) |
| 723 | return (0); |
| 724 | |
| 725 | if (chunkoffs < sizeof (dtrace_dynvar_t) + |
| 726 | ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) |
| 727 | return (0); |
| 728 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 729 | DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 730 | return (1); |
jhaslam | fbcb7db | 2007-07-19 02:34:11 -0700 | [diff] [blame] | 731 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 732 | |
| 733 | /* |
| 734 | * Finally, check the static local and global variables. These checks |
| 735 | * take the longest, so we perform them last. |
| 736 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 737 | if (dtrace_canstore_statvar(addr, sz, remain, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 738 | vstate->dtvs_locals, vstate->dtvs_nlocals)) |
| 739 | return (1); |
| 740 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 741 | if (dtrace_canstore_statvar(addr, sz, remain, |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 742 | vstate->dtvs_globals, vstate->dtvs_nglobals)) |
| 743 | return (1); |
| 744 | |
| 745 | return (0); |
| 746 | } |
| 747 | |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 748 | |
| 749 | /* |
| 750 | * Convenience routine to check to see if the address is within a memory |
| 751 | * region in which a load may be issued given the user's privilege level; |
| 752 | * if not, it sets the appropriate error flags and loads 'addr' into the |
| 753 | * illegal value slot. |
| 754 | * |
| 755 | * DTrace subroutines (DIF_SUBR_*) should use this helper to implement |
| 756 | * appropriate memory access protection. |
| 757 | */ |
| 758 | static int |
| 759 | dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, |
| 760 | dtrace_vstate_t *vstate) |
| 761 | { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 762 | return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); |
| 763 | } |
| 764 | |
| 765 | /* |
| 766 | * Implementation of dtrace_canload which communicates the upper bound of the |
| 767 | * allowed memory region. |
| 768 | */ |
| 769 | static int |
| 770 | dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, |
| 771 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) |
| 772 | { |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 773 | volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 774 | file_t *fp; |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 775 | |
| 776 | /* |
| 777 | * If we hold the privilege to read from kernel memory, then |
| 778 | * everything is readable. |
| 779 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 780 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { |
| 781 | DTRACE_RANGE_REMAIN(remain, addr, addr, sz); |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 782 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 783 | } |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 784 | |
| 785 | /* |
| 786 | * You can obviously read that which you can store. |
| 787 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 788 | if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 789 | return (1); |
| 790 | |
| 791 | /* |
| 792 | * We're allowed to read from our own string table. |
| 793 | */ |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 794 | if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab, |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 795 | mstate->dtms_difo->dtdo_strlen)) { |
| 796 | DTRACE_RANGE_REMAIN(remain, addr, |
| 797 | mstate->dtms_difo->dtdo_strtab, |
| 798 | mstate->dtms_difo->dtdo_strlen); |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 799 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 800 | } |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 801 | |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 802 | if (vstate->dtvs_state != NULL && |
| 803 | dtrace_priv_proc(vstate->dtvs_state, mstate)) { |
| 804 | proc_t *p; |
| 805 | |
| 806 | /* |
| 807 | * When we have privileges to the current process, there are |
| 808 | * several context-related kernel structures that are safe to |
| 809 | * read, even absent the privilege to read from kernel memory. |
| 810 | * These reads are safe because these structures contain only |
| 811 | * state that (1) we're permitted to read, (2) is harmless or |
| 812 | * (3) contains pointers to additional kernel state that we're |
| 813 | * not permitted to read (and as such, do not present an |
| 814 | * opportunity for privilege escalation). Finally (and |
| 815 | * critically), because of the nature of their relation with |
| 816 | * the current thread context, the memory associated with these |
| 817 | * structures cannot change over the duration of probe context, |
| 818 | * and it is therefore impossible for this memory to be |
| 819 | * deallocated and reallocated as something else while it's |
| 820 | * being operated upon. |
| 821 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 822 | if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) { |
| 823 | DTRACE_RANGE_REMAIN(remain, addr, curthread, |
| 824 | sizeof (kthread_t)); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 825 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 826 | } |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 827 | |
| 828 | if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr, |
| 829 | sz, curthread->t_procp, sizeof (proc_t))) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 830 | DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp, |
| 831 | sizeof (proc_t)); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 832 | return (1); |
| 833 | } |
| 834 | |
| 835 | if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz, |
| 836 | curthread->t_cred, sizeof (cred_t))) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 837 | DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred, |
| 838 | sizeof (cred_t)); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 839 | return (1); |
| 840 | } |
| 841 | |
| 842 | if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz, |
| 843 | &(p->p_pidp->pid_id), sizeof (pid_t))) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 844 | DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id), |
| 845 | sizeof (pid_t)); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 846 | return (1); |
| 847 | } |
| 848 | |
| 849 | if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz, |
| 850 | curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 851 | DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu, |
| 852 | offsetof(cpu_t, cpu_pause_thread)); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 853 | return (1); |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | if ((fp = mstate->dtms_getf) != NULL) { |
| 858 | uintptr_t psz = sizeof (void *); |
| 859 | vnode_t *vp; |
| 860 | vnodeops_t *op; |
| 861 | |
| 862 | /* |
| 863 | * When getf() returns a file_t, the enabling is implicitly |
| 864 | * granted the (transient) right to read the returned file_t |
| 865 | * as well as the v_path and v_op->vnop_name of the underlying |
| 866 | * vnode. These accesses are allowed after a successful |
| 867 | * getf() because the members that they refer to cannot change |
| 868 | * once set -- and the barrier logic in the kernel's closef() |
| 869 | * path assures that the file_t and its referenced vode_t |
| 870 | * cannot themselves be stale (that is, it impossible for |
| 871 | * either dtms_getf itself or its f_vnode member to reference |
| 872 | * freed memory). |
| 873 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 874 | if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) { |
| 875 | DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t)); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 876 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 877 | } |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 878 | |
| 879 | if ((vp = fp->f_vnode) != NULL) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 880 | size_t slen; |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 881 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 882 | if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) { |
| 883 | DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path, |
| 884 | psz); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 885 | return (1); |
| 886 | } |
| 887 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 888 | slen = strlen(vp->v_path) + 1; |
| 889 | if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) { |
| 890 | DTRACE_RANGE_REMAIN(remain, addr, vp->v_path, |
| 891 | slen); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 892 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 893 | } |
| 894 | |
| 895 | if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) { |
| 896 | DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op, |
| 897 | psz); |
| 898 | return (1); |
| 899 | } |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 900 | |
| 901 | if ((op = vp->v_op) != NULL && |
| 902 | DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 903 | DTRACE_RANGE_REMAIN(remain, addr, |
| 904 | &op->vnop_name, psz); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 905 | return (1); |
| 906 | } |
| 907 | |
| 908 | if (op != NULL && op->vnop_name != NULL && |
| 909 | DTRACE_INRANGE(addr, sz, op->vnop_name, |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 910 | (slen = strlen(op->vnop_name) + 1))) { |
| 911 | DTRACE_RANGE_REMAIN(remain, addr, |
| 912 | op->vnop_name, slen); |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 913 | return (1); |
| 914 | } |
| 915 | } |
| 916 | } |
| 917 | |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 918 | DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); |
| 919 | *illval = addr; |
| 920 | return (0); |
| 921 | } |
| 922 | |
| 923 | /* |
| 924 | * Convenience routine to check to see if a given string is within a memory |
| 925 | * region in which a load may be issued given the user's privilege level; |
| 926 | * this exists so that we don't need to issue unnecessary dtrace_strlen() |
| 927 | * calls in the event that the user has all privileges. |
| 928 | */ |
| 929 | static int |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 930 | dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, |
| 931 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 932 | { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 933 | size_t rsize; |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 934 | |
| 935 | /* |
| 936 | * If we hold the privilege to read from kernel memory, then |
| 937 | * everything is readable. |
| 938 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 939 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { |
| 940 | DTRACE_RANGE_REMAIN(remain, addr, addr, sz); |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 941 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 942 | } |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 943 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 944 | /* |
| 945 | * Even if the caller is uninterested in querying the remaining valid |
| 946 | * range, it is required to ensure that the access is allowed. |
| 947 | */ |
| 948 | if (remain == NULL) { |
| 949 | remain = &rsize; |
| 950 | } |
| 951 | if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { |
| 952 | size_t strsz; |
| 953 | /* |
| 954 | * Perform the strlen after determining the length of the |
| 955 | * memory region which is accessible. This prevents timing |
| 956 | * information from being used to find NULs in memory which is |
| 957 | * not accessible to the caller. |
| 958 | */ |
| 959 | strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, |
| 960 | MIN(sz, *remain)); |
| 961 | if (strsz <= *remain) { |
| 962 | return (1); |
| 963 | } |
| 964 | } |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 965 | |
| 966 | return (0); |
| 967 | } |
| 968 | |
| 969 | /* |
| 970 | * Convenience routine to check to see if a given variable is within a memory |
| 971 | * region in which a load may be issued given the user's privilege level. |
| 972 | */ |
| 973 | static int |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 974 | dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, |
| 975 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 976 | { |
| 977 | size_t sz; |
| 978 | ASSERT(type->dtdt_flags & DIF_TF_BYREF); |
| 979 | |
| 980 | /* |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 981 | * Calculate the max size before performing any checks since even |
| 982 | * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function |
| 983 | * return the max length via 'remain'. |
| 984 | */ |
| 985 | if (type->dtdt_kind == DIF_TYPE_STRING) { |
| 986 | dtrace_state_t *state = vstate->dtvs_state; |
| 987 | |
| 988 | if (state != NULL) { |
| 989 | sz = state->dts_options[DTRACEOPT_STRSIZE]; |
| 990 | } else { |
| 991 | /* |
| 992 | * In helper context, we have a NULL state; fall back |
| 993 | * to using the system-wide default for the string size |
| 994 | * in this case. |
| 995 | */ |
| 996 | sz = dtrace_strsize_default; |
| 997 | } |
| 998 | } else { |
| 999 | sz = type->dtdt_size; |
| 1000 | } |
| 1001 | |
| 1002 | /* |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1003 | * If we hold the privilege to read from kernel memory, then |
| 1004 | * everything is readable. |
| 1005 | */ |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 1006 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { |
| 1007 | DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1008 | return (1); |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 1009 | } |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1010 | |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 1011 | if (type->dtdt_kind == DIF_TYPE_STRING) { |
| 1012 | return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, |
| 1013 | vstate)); |
| 1014 | } |
| 1015 | return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, |
| 1016 | vstate)); |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1017 | } |
| 1018 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1019 | /* |
Joshua M. Clulow | f497f9f | 2012-11-27 08:55:56 +0000 | [diff] [blame] | 1020 | * Convert a string to a signed integer using safe loads. |
| 1021 | * |
| 1022 | * NOTE: This function uses various macros from strtolctype.h to manipulate |
| 1023 | * digit values, etc -- these have all been checked to ensure they make |
| 1024 | * no additional function calls. |
| 1025 | */ |
| 1026 | static int64_t |
| 1027 | dtrace_strtoll(char *input, int base, size_t limit) |
| 1028 | { |
| 1029 | uintptr_t pos = (uintptr_t)input; |
| 1030 | int64_t val = 0; |
| 1031 | int x; |
| 1032 | boolean_t neg = B_FALSE; |
| 1033 | char c, cc, ccc; |
| 1034 | uintptr_t end = pos + limit; |
| 1035 | |
| 1036 | /* |
| 1037 | * Consume any whitespace preceding digits. |
| 1038 | */ |
| 1039 | while ((c = dtrace_load8(pos)) == ' ' || c == '\t') |
| 1040 | pos++; |
| 1041 | |
| 1042 | /* |
| 1043 | * Handle an explicit sign if one is present. |
| 1044 | */ |
| 1045 | if (c == '-' || c == '+') { |
| 1046 | if (c == '-') |
| 1047 | neg = B_TRUE; |
| 1048 | c = dtrace_load8(++pos); |
| 1049 | } |
| 1050 | |
| 1051 | /* |
| 1052 | * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it |
| 1053 | * if present. |
| 1054 | */ |
| 1055 | if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || |
| 1056 | cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { |
| 1057 | pos += 2; |
| 1058 | c = ccc; |
| 1059 | } |
| 1060 | |
| 1061 | /* |
| 1062 | * Read in contiguous digits until the first non-digit character. |
| 1063 | */ |
| 1064 | for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; |
| 1065 | c = dtrace_load8(++pos)) |
| 1066 | val = val * base + x; |
| 1067 | |
| 1068 | return (neg ? -val : val); |
| 1069 | } |
| 1070 | |
| 1071 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1072 | * Compare two strings using safe loads. |
| 1073 | */ |
| 1074 | static int |
| 1075 | dtrace_strncmp(char *s1, char *s2, size_t limit) |
| 1076 | { |
| 1077 | uint8_t c1, c2; |
| 1078 | volatile uint16_t *flags; |
| 1079 | |
| 1080 | if (s1 == s2 || limit == 0) |
| 1081 | return (0); |
| 1082 | |
| 1083 | flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; |
| 1084 | |
| 1085 | do { |
| 1086 | if (s1 == NULL) { |
| 1087 | c1 = '\0'; |
| 1088 | } else { |
| 1089 | c1 = dtrace_load8((uintptr_t)s1++); |
| 1090 | } |
| 1091 | |
| 1092 | if (s2 == NULL) { |
| 1093 | c2 = '\0'; |
| 1094 | } else { |
| 1095 | c2 = dtrace_load8((uintptr_t)s2++); |
| 1096 | } |
| 1097 | |
| 1098 | if (c1 != c2) |
| 1099 | return (c1 - c2); |
| 1100 | } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); |
| 1101 | |
| 1102 | return (0); |
| 1103 | } |
| 1104 | |
| 1105 | /* |
| 1106 | * Compute strlen(s) for a string using safe memory accesses. The additional |
| 1107 | * len parameter is used to specify a maximum length to ensure completion. |
| 1108 | */ |
| 1109 | static size_t |
| 1110 | dtrace_strlen(const char *s, size_t lim) |
| 1111 | { |
| 1112 | uint_t len; |
| 1113 | |
| 1114 | for (len = 0; len != lim; len++) { |
| 1115 | if (dtrace_load8((uintptr_t)s++) == '\0') |
| 1116 | break; |
| 1117 | } |
| 1118 | |
| 1119 | return (len); |
| 1120 | } |
| 1121 | |
| 1122 | /* |
| 1123 | * Check if an address falls within a toxic region. |
| 1124 | */ |
| 1125 | static int |
| 1126 | dtrace_istoxic(uintptr_t kaddr, size_t size) |
| 1127 | { |
| 1128 | uintptr_t taddr, tsize; |
| 1129 | int i; |
| 1130 | |
| 1131 | for (i = 0; i < dtrace_toxranges; i++) { |
| 1132 | taddr = dtrace_toxrange[i].dtt_base; |
| 1133 | tsize = dtrace_toxrange[i].dtt_limit - taddr; |
| 1134 | |
| 1135 | if (kaddr - taddr < tsize) { |
| 1136 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); |
| 1137 | cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr; |
| 1138 | return (1); |
| 1139 | } |
| 1140 | |
| 1141 | if (taddr - kaddr < size) { |
| 1142 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); |
| 1143 | cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr; |
| 1144 | return (1); |
| 1145 | } |
| 1146 | } |
| 1147 | |
| 1148 | return (0); |
| 1149 | } |
| 1150 | |
| 1151 | /* |
| 1152 | * Copy src to dst using safe memory accesses. The src is assumed to be unsafe |
| 1153 | * memory specified by the DIF program. The dst is assumed to be safe memory |
| 1154 | * that we can store to directly because it is managed by DTrace. As with |
| 1155 | * standard bcopy, overlapping copies are handled properly. |
| 1156 | */ |
| 1157 | static void |
| 1158 | dtrace_bcopy(const void *src, void *dst, size_t len) |
| 1159 | { |
| 1160 | if (len != 0) { |
| 1161 | uint8_t *s1 = dst; |
| 1162 | const uint8_t *s2 = src; |
| 1163 | |
| 1164 | if (s1 <= s2) { |
| 1165 | do { |
| 1166 | *s1++ = dtrace_load8((uintptr_t)s2++); |
| 1167 | } while (--len != 0); |
| 1168 | } else { |
| 1169 | s2 += len; |
| 1170 | s1 += len; |
| 1171 | |
| 1172 | do { |
| 1173 | *--s1 = dtrace_load8((uintptr_t)--s2); |
| 1174 | } while (--len != 0); |
| 1175 | } |
| 1176 | } |
| 1177 | } |
| 1178 | |
| 1179 | /* |
| 1180 | * Copy src to dst using safe memory accesses, up to either the specified |
| 1181 | * length, or the point that a nul byte is encountered. The src is assumed to |
| 1182 | * be unsafe memory specified by the DIF program. The dst is assumed to be |
| 1183 | * safe memory that we can store to directly because it is managed by DTrace. |
| 1184 | * Unlike dtrace_bcopy(), overlapping regions are not handled. |
| 1185 | */ |
| 1186 | static void |
| 1187 | dtrace_strcpy(const void *src, void *dst, size_t len) |
| 1188 | { |
| 1189 | if (len != 0) { |
| 1190 | uint8_t *s1 = dst, c; |
| 1191 | const uint8_t *s2 = src; |
| 1192 | |
| 1193 | do { |
| 1194 | *s1++ = c = dtrace_load8((uintptr_t)s2++); |
| 1195 | } while (--len != 0 && c != '\0'); |
| 1196 | } |
| 1197 | } |
| 1198 | |
| 1199 | /* |
| 1200 | * Copy src to dst, deriving the size and type from the specified (BYREF) |
| 1201 | * variable type. The src is assumed to be unsafe memory specified by the DIF |
| 1202 | * program. The dst is assumed to be DTrace variable memory that is of the |
| 1203 | * specified type; we assume that we can store to directly. |
| 1204 | */ |
| 1205 | static void |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 1206 | dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1207 | { |
| 1208 | ASSERT(type->dtdt_flags & DIF_TF_BYREF); |
| 1209 | |
| 1210 | if (type->dtdt_kind == DIF_TYPE_STRING) { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 1211 | dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1212 | } else { |
Patrick Mooney | 771e39c | 2016-05-31 17:34:15 -0700 | [diff] [blame] | 1213 | dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1214 | } |
| 1215 | } |
| 1216 | |
| 1217 | /* |
| 1218 | * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be |
| 1219 | * unsafe memory specified by the DIF program. The s2 data is assumed to be |
| 1220 | * safe memory that we can access directly because it is managed by DTrace. |
| 1221 | */ |
| 1222 | static int |
| 1223 | dtrace_bcmp(const void *s1, const void *s2, size_t len) |
| 1224 | { |
| 1225 | volatile uint16_t *flags; |
| 1226 | |
| 1227 | flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; |
| 1228 | |
| 1229 | if (s1 == s2) |
| 1230 | return (0); |
| 1231 | |
| 1232 | if (s1 == NULL || s2 == NULL) |
| 1233 | return (1); |
| 1234 | |
| 1235 | if (s1 != s2 && len != 0) { |
| 1236 | const uint8_t *ps1 = s1; |
| 1237 | const uint8_t *ps2 = s2; |
| 1238 | |
| 1239 | do { |
| 1240 | if (dtrace_load8((uintptr_t)ps1++) != *ps2++) |
| 1241 | return (1); |
| 1242 | } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); |
| 1243 | } |
| 1244 | return (0); |
| 1245 | } |
| 1246 | |
| 1247 | /* |
| 1248 | * Zero the specified region using a simple byte-by-byte loop. Note that this |
| 1249 | * is for safe DTrace-managed memory only. |
| 1250 | */ |
| 1251 | static void |
| 1252 | dtrace_bzero(void *dst, size_t len) |
| 1253 | { |
| 1254 | uchar_t *cp; |
| 1255 | |
| 1256 | for (cp = dst; len != 0; len--) |
| 1257 | *cp++ = 0; |
| 1258 | } |
| 1259 | |
jhaslam | 6e0bee7 | 2008-02-07 06:05:33 -0800 | [diff] [blame] | 1260 | static void |
| 1261 | dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) |
| 1262 | { |
| 1263 | uint64_t result[2]; |
| 1264 | |
| 1265 | result[0] = addend1[0] + addend2[0]; |
| 1266 | result[1] = addend1[1] + addend2[1] + |
| 1267 | (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); |
| 1268 | |
| 1269 | sum[0] = result[0]; |
| 1270 | sum[1] = result[1]; |
| 1271 | } |
| 1272 | |
| 1273 | /* |
| 1274 | * Shift the 128-bit value in a by b. If b is positive, shift left. |
| 1275 | * If b is negative, shift right. |
| 1276 | */ |
| 1277 | static void |
| 1278 | dtrace_shift_128(uint64_t *a, int b) |
| 1279 | { |
| 1280 | uint64_t mask; |
| 1281 | |
| 1282 | if (b == 0) |
| 1283 | return; |
| 1284 | |
| 1285 | if (b < 0) { |
| 1286 | b = -b; |
| 1287 | if (b >= 64) { |
| 1288 | a[0] = a[1] >> (b - 64); |
| 1289 | a[1] = 0; |
| 1290 | } else { |
| 1291 | a[0] >>= b; |
| 1292 | mask = 1LL << (64 - b); |
| 1293 | mask -= 1; |
| 1294 | a[0] |= ((a[1] & mask) << (64 - b)); |
| 1295 | a[1] >>= b; |
| 1296 | } |
| 1297 | } else { |
| 1298 | if (b >= 64) { |
| 1299 | a[1] = a[0] << (b - 64); |
| 1300 | a[0] = 0; |
| 1301 | } else { |
| 1302 | a[1] <<= b; |
| 1303 | mask = a[0] >> (64 - b); |
| 1304 | a[1] |= mask; |
| 1305 | a[0] <<= b; |
| 1306 | } |
| 1307 | } |
| 1308 | } |
| 1309 | |
| 1310 | /* |
| 1311 | * The basic idea is to break the 2 64-bit values into 4 32-bit values, |
| 1312 | * use native multiplication on those, and then re-combine into the |
| 1313 | * resulting 128-bit value. |
| 1314 | * |
| 1315 | * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = |
| 1316 | * hi1 * hi2 << 64 + |
| 1317 | * hi1 * lo2 << 32 + |
| 1318 | * hi2 * lo1 << 32 + |
| 1319 | * lo1 * lo2 |
| 1320 | */ |
| 1321 | static void |
| 1322 | dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) |
| 1323 | { |
| 1324 | uint64_t hi1, hi2, lo1, lo2; |
| 1325 | uint64_t tmp[2]; |
| 1326 | |
| 1327 | hi1 = factor1 >> 32; |
| 1328 | hi2 = factor2 >> 32; |
| 1329 | |
| 1330 | lo1 = factor1 & DT_MASK_LO; |
| 1331 | lo2 = factor2 & DT_MASK_LO; |
| 1332 | |
| 1333 | product[0] = lo1 * lo2; |
| 1334 | product[1] = hi1 * hi2; |
| 1335 | |
| 1336 | tmp[0] = hi1 * lo2; |
| 1337 | tmp[1] = 0; |
| 1338 | dtrace_shift_128(tmp, 32); |
| 1339 | dtrace_add_128(product, tmp, product); |
| 1340 | |
| 1341 | tmp[0] = hi2 * lo1; |
| 1342 | tmp[1] = 0; |
| 1343 | dtrace_shift_128(tmp, 32); |
| 1344 | dtrace_add_128(product, tmp, product); |
| 1345 | } |
| 1346 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1347 | /* |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1348 | * This privilege check should be used by actions and subroutines to |
| 1349 | * verify that the user credentials of the process that enabled the |
| 1350 | * invoking ECB match the target credentials |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1351 | */ |
| 1352 | static int |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1353 | dtrace_priv_proc_common_user(dtrace_state_t *state) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1354 | { |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1355 | cred_t *cr, *s_cr = state->dts_cred.dcr_cred; |
| 1356 | |
| 1357 | /* |
| 1358 | * We should always have a non-NULL state cred here, since if cred |
| 1359 | * is null (anonymous tracing), we fast-path bypass this routine. |
| 1360 | */ |
| 1361 | ASSERT(s_cr != NULL); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1362 | |
| 1363 | if ((cr = CRED()) != NULL && |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1364 | s_cr->cr_uid == cr->cr_uid && |
| 1365 | s_cr->cr_uid == cr->cr_ruid && |
| 1366 | s_cr->cr_uid == cr->cr_suid && |
| 1367 | s_cr->cr_gid == cr->cr_gid && |
| 1368 | s_cr->cr_gid == cr->cr_rgid && |
| 1369 | s_cr->cr_gid == cr->cr_sgid) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1370 | return (1); |
| 1371 | |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1372 | return (0); |
| 1373 | } |
| 1374 | |
| 1375 | /* |
| 1376 | * This privilege check should be used by actions and subroutines to |
| 1377 | * verify that the zone of the process that enabled the invoking ECB |
| 1378 | * matches the target credentials |
| 1379 | */ |
| 1380 | static int |
| 1381 | dtrace_priv_proc_common_zone(dtrace_state_t *state) |
| 1382 | { |
| 1383 | cred_t *cr, *s_cr = state->dts_cred.dcr_cred; |
| 1384 | |
| 1385 | /* |
| 1386 | * We should always have a non-NULL state cred here, since if cred |
| 1387 | * is null (anonymous tracing), we fast-path bypass this routine. |
| 1388 | */ |
| 1389 | ASSERT(s_cr != NULL); |
| 1390 | |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 1391 | if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone) |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1392 | return (1); |
| 1393 | |
| 1394 | return (0); |
| 1395 | } |
| 1396 | |
| 1397 | /* |
| 1398 | * This privilege check should be used by actions and subroutines to |
| 1399 | * verify that the process has not setuid or changed credentials. |
| 1400 | */ |
| 1401 | static int |
| 1402 | dtrace_priv_proc_common_nocd() |
| 1403 | { |
| 1404 | proc_t *proc; |
| 1405 | |
| 1406 | if ((proc = ttoproc(curthread)) != NULL && |
| 1407 | !(proc->p_flag & SNOCD)) |
| 1408 | return (1); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1409 | |
| 1410 | return (0); |
| 1411 | } |
| 1412 | |
| 1413 | static int |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1414 | dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1415 | { |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1416 | int action = state->dts_cred.dcr_action; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1417 | |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1418 | if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) |
| 1419 | goto bad; |
| 1420 | |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1421 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && |
| 1422 | dtrace_priv_proc_common_zone(state) == 0) |
| 1423 | goto bad; |
| 1424 | |
| 1425 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && |
| 1426 | dtrace_priv_proc_common_user(state) == 0) |
| 1427 | goto bad; |
| 1428 | |
| 1429 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && |
| 1430 | dtrace_priv_proc_common_nocd() == 0) |
| 1431 | goto bad; |
| 1432 | |
| 1433 | return (1); |
| 1434 | |
| 1435 | bad: |
| 1436 | cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; |
| 1437 | |
| 1438 | return (0); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1439 | } |
| 1440 | |
| 1441 | static int |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1442 | dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1443 | { |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1444 | if (mstate->dtms_access & DTRACE_ACCESS_PROC) { |
| 1445 | if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) |
| 1446 | return (1); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1447 | |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1448 | if (dtrace_priv_proc_common_zone(state) && |
| 1449 | dtrace_priv_proc_common_user(state) && |
| 1450 | dtrace_priv_proc_common_nocd()) |
| 1451 | return (1); |
| 1452 | } |
dp | ad4023c | 2006-03-24 18:42:51 -0800 | [diff] [blame] | 1453 | |
| 1454 | cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; |
| 1455 | |
| 1456 | return (0); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1457 | } |
| 1458 | |
| 1459 | static int |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1460 | dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1461 | { |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1462 | if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && |
| 1463 | (state->dts_cred.dcr_action & DTRACE_CRA_PROC)) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1464 | return (1); |
| 1465 | |
| 1466 | cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; |
| 1467 | |
| 1468 | return (0); |
| 1469 | } |
| 1470 | |
| 1471 | static int |
| 1472 | dtrace_priv_kernel(dtrace_state_t *state) |
| 1473 | { |
| 1474 | if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) |
| 1475 | return (1); |
| 1476 | |
| 1477 | cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; |
| 1478 | |
| 1479 | return (0); |
| 1480 | } |
| 1481 | |
| 1482 | static int |
| 1483 | dtrace_priv_kernel_destructive(dtrace_state_t *state) |
| 1484 | { |
| 1485 | if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) |
| 1486 | return (1); |
| 1487 | |
| 1488 | cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; |
| 1489 | |
| 1490 | return (0); |
| 1491 | } |
| 1492 | |
| 1493 | /* |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1494 | * Determine if the dte_cond of the specified ECB allows for processing of |
| 1495 | * the current probe to continue. Note that this routine may allow continued |
| 1496 | * processing, but with access(es) stripped from the mstate's dtms_access |
| 1497 | * field. |
| 1498 | */ |
| 1499 | static int |
| 1500 | dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, |
| 1501 | dtrace_ecb_t *ecb) |
| 1502 | { |
| 1503 | dtrace_probe_t *probe = ecb->dte_probe; |
| 1504 | dtrace_provider_t *prov = probe->dtpr_provider; |
| 1505 | dtrace_pops_t *pops = &prov->dtpv_pops; |
| 1506 | int mode = DTRACE_MODE_NOPRIV_DROP; |
| 1507 | |
| 1508 | ASSERT(ecb->dte_cond); |
| 1509 | |
| 1510 | if (pops->dtps_mode != NULL) { |
| 1511 | mode = pops->dtps_mode(prov->dtpv_arg, |
| 1512 | probe->dtpr_id, probe->dtpr_arg); |
| 1513 | |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 1514 | ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL)); |
| 1515 | ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT | |
| 1516 | DTRACE_MODE_NOPRIV_DROP)); |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1517 | } |
| 1518 | |
| 1519 | /* |
| 1520 | * If the dte_cond bits indicate that this consumer is only allowed to |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 1521 | * see user-mode firings of this probe, check that the probe was fired |
| 1522 | * while in a user context. If that's not the case, use the policy |
| 1523 | * specified by the provider to determine if we drop the probe or |
| 1524 | * merely restrict operation. |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1525 | */ |
| 1526 | if (ecb->dte_cond & DTRACE_COND_USERMODE) { |
| 1527 | ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); |
| 1528 | |
| 1529 | if (!(mode & DTRACE_MODE_USER)) { |
| 1530 | if (mode & DTRACE_MODE_NOPRIV_DROP) |
| 1531 | return (0); |
| 1532 | |
| 1533 | mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; |
| 1534 | } |
| 1535 | } |
| 1536 | |
| 1537 | /* |
| 1538 | * This is more subtle than it looks. We have to be absolutely certain |
| 1539 | * that CRED() isn't going to change out from under us so it's only |
| 1540 | * legit to examine that structure if we're in constrained situations. |
| 1541 | * Currently, the only times we'll this check is if a non-super-user |
| 1542 | * has enabled the profile or syscall providers -- providers that |
| 1543 | * allow visibility of all processes. For the profile case, the check |
| 1544 | * above will ensure that we're examining a user context. |
| 1545 | */ |
| 1546 | if (ecb->dte_cond & DTRACE_COND_OWNER) { |
| 1547 | cred_t *cr; |
| 1548 | cred_t *s_cr = state->dts_cred.dcr_cred; |
| 1549 | proc_t *proc; |
| 1550 | |
| 1551 | ASSERT(s_cr != NULL); |
| 1552 | |
| 1553 | if ((cr = CRED()) == NULL || |
| 1554 | s_cr->cr_uid != cr->cr_uid || |
| 1555 | s_cr->cr_uid != cr->cr_ruid || |
| 1556 | s_cr->cr_uid != cr->cr_suid || |
| 1557 | s_cr->cr_gid != cr->cr_gid || |
| 1558 | s_cr->cr_gid != cr->cr_rgid || |
| 1559 | s_cr->cr_gid != cr->cr_sgid || |
| 1560 | (proc = ttoproc(curthread)) == NULL || |
| 1561 | (proc->p_flag & SNOCD)) { |
| 1562 | if (mode & DTRACE_MODE_NOPRIV_DROP) |
| 1563 | return (0); |
| 1564 | |
| 1565 | mstate->dtms_access &= ~DTRACE_ACCESS_PROC; |
| 1566 | } |
| 1567 | } |
| 1568 | |
| 1569 | /* |
| 1570 | * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not |
| 1571 | * in our zone, check to see if our mode policy is to restrict rather |
| 1572 | * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC |
| 1573 | * and DTRACE_ACCESS_ARGS |
| 1574 | */ |
| 1575 | if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { |
| 1576 | cred_t *cr; |
| 1577 | cred_t *s_cr = state->dts_cred.dcr_cred; |
| 1578 | |
| 1579 | ASSERT(s_cr != NULL); |
| 1580 | |
| 1581 | if ((cr = CRED()) == NULL || |
| 1582 | s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { |
| 1583 | if (mode & DTRACE_MODE_NOPRIV_DROP) |
| 1584 | return (0); |
| 1585 | |
| 1586 | mstate->dtms_access &= |
| 1587 | ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); |
| 1588 | } |
| 1589 | } |
| 1590 | |
Bryan Cantrill | b0f673c | 2012-06-04 06:54:42 +0000 | [diff] [blame] | 1591 | /* |
| 1592 | * By merits of being in this code path at all, we have limited |
| 1593 | * privileges. If the provider has indicated that limited privileges |
| 1594 | * are to denote restricted operation, strip off the ability to access |
| 1595 | * arguments. |
| 1596 | */ |
| 1597 | if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT) |
| 1598 | mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; |
| 1599 | |
Bryan Cantrill | 7d5c9b5 | 2011-07-06 00:27:16 -0700 | [diff] [blame] | 1600 | return (1); |
| 1601 | } |
| 1602 | |
| 1603 | /* |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1604 | * Note: not called from probe context. This function is called |
| 1605 | * asynchronously (and at a regular interval) from outside of probe context to |
| 1606 | * clean the dirty dynamic variable lists on all CPUs. Dynamic variable |
| 1607 | * cleaning is explained in detail in <sys/dtrace_impl.h>. |
| 1608 | */ |
| 1609 | void |
| 1610 | dtrace_dynvar_clean(dtrace_dstate_t *dstate) |
| 1611 | { |
| 1612 | dtrace_dynvar_t *dirty; |
| 1613 | dtrace_dstate_percpu_t *dcpu; |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 1614 | dtrace_dynvar_t **rinsep; |
| 1615 | int i, j, work = 0; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1616 | |
| 1617 | for (i = 0; i < NCPU; i++) { |
| 1618 | dcpu = &dstate->dtds_percpu[i]; |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 1619 | rinsep = &dcpu->dtdsc_rinsing; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1620 | |
| 1621 | /* |
| 1622 | * If the dirty list is NULL, there is no dirty work to do. |
| 1623 | */ |
| 1624 | if (dcpu->dtdsc_dirty == NULL) |
| 1625 | continue; |
| 1626 | |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 1627 | if (dcpu->dtdsc_rinsing != NULL) { |
| 1628 | /* |
| 1629 | * If the rinsing list is non-NULL, then it is because |
| 1630 | * this CPU was selected to accept another CPU's |
| 1631 | * dirty list -- and since that time, dirty buffers |
| 1632 | * have accumulated. This is a highly unlikely |
| 1633 | * condition, but we choose to ignore the dirty |
| 1634 | * buffers -- they'll be picked up a future cleanse. |
| 1635 | */ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1636 | continue; |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 1637 | } |
| 1638 | |
| 1639 | if (dcpu->dtdsc_clean != NULL) { |
| 1640 | /* |
| 1641 | * If the clean list is non-NULL, then we're in a |
| 1642 | * situation where a CPU has done deallocations (we |
| 1643 | * have a non-NULL dirty list) but no allocations (we |
| 1644 | * also have a non-NULL clean list). We can't simply |
| 1645 | * move the dirty list into the clean list on this |
| 1646 | * CPU, yet we also don't want to allow this condition |
| 1647 | * to persist, lest a short clean list prevent a |
| 1648 | * massive dirty list from being cleaned (which in |
| 1649 | * turn could lead to otherwise avoidable dynamic |
| 1650 | * drops). To deal with this, we look for some CPU |
| 1651 | * with a NULL clean list, NULL dirty list, and NULL |
| 1652 | * rinsing list -- and then we borrow this CPU to |
| 1653 | * rinse our dirty list. |
| 1654 | */ |
| 1655 | for (j = 0; j < NCPU; j++) { |
| 1656 | dtrace_dstate_percpu_t *rinser; |
| 1657 | |
| 1658 | rinser = &dstate->dtds_percpu[j]; |
| 1659 | |
| 1660 | if (rinser->dtdsc_rinsing != NULL) |
| 1661 | continue; |
| 1662 | |
| 1663 | if (rinser->dtdsc_dirty != NULL) |
| 1664 | continue; |
| 1665 | |
| 1666 | if (rinser->dtdsc_clean != NULL) |
| 1667 | continue; |
| 1668 | |
| 1669 | rinsep = &rinser->dtdsc_rinsing; |
| 1670 | break; |
| 1671 | } |
| 1672 | |
| 1673 | if (j == NCPU) { |
| 1674 | /* |
| 1675 | * We were unable to find another CPU that |
| 1676 | * could accept this dirty list -- we are |
| 1677 | * therefore unable to clean it now. |
| 1678 | */ |
| 1679 | dtrace_dynvar_failclean++; |
| 1680 | continue; |
| 1681 | } |
| 1682 | } |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1683 | |
| 1684 | work = 1; |
| 1685 | |
| 1686 | /* |
| 1687 | * Atomically move the dirty list aside. |
| 1688 | */ |
| 1689 | do { |
| 1690 | dirty = dcpu->dtdsc_dirty; |
| 1691 | |
| 1692 | /* |
| 1693 | * Before we zap the dirty list, set the rinsing list. |
| 1694 | * (This allows for a potential assertion in |
| 1695 | * dtrace_dynvar(): if a free dynamic variable appears |
| 1696 | * on a hash chain, either the dirty list or the |
| 1697 | * rinsing list for some CPU must be non-NULL.) |
| 1698 | */ |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 1699 | *rinsep = dirty; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1700 | dtrace_membar_producer(); |
| 1701 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, |
| 1702 | dirty, NULL) != dirty); |
| 1703 | } |
| 1704 | |
| 1705 | if (!work) { |
| 1706 | /* |
| 1707 | * We have no work to do; we can simply return. |
| 1708 | */ |
| 1709 | return; |
| 1710 | } |
| 1711 | |
| 1712 | dtrace_sync(); |
| 1713 | |
| 1714 | for (i = 0; i < NCPU; i++) { |
| 1715 | dcpu = &dstate->dtds_percpu[i]; |
| 1716 | |
| 1717 | if (dcpu->dtdsc_rinsing == NULL) |
| 1718 | continue; |
| 1719 | |
| 1720 | /* |
| 1721 | * We are now guaranteed that no hash chain contains a pointer |
| 1722 | * into this dirty list; we can make it clean. |
| 1723 | */ |
| 1724 | ASSERT(dcpu->dtdsc_clean == NULL); |
| 1725 | dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; |
| 1726 | dcpu->dtdsc_rinsing = NULL; |
| 1727 | } |
| 1728 | |
| 1729 | /* |
| 1730 | * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make |
| 1731 | * sure that all CPUs have seen all of the dtdsc_clean pointers. |
| 1732 | * This prevents a race whereby a CPU incorrectly decides that |
| 1733 | * the state should be something other than DTRACE_DSTATE_CLEAN |
| 1734 | * after dtrace_dynvar_clean() has completed. |
| 1735 | */ |
| 1736 | dtrace_sync(); |
| 1737 | |
| 1738 | dstate->dtds_state = DTRACE_DSTATE_CLEAN; |
| 1739 | } |
| 1740 | |
| 1741 | /* |
| 1742 | * Depending on the value of the op parameter, this function looks-up, |
| 1743 | * allocates or deallocates an arbitrarily-keyed dynamic variable. If an |
| 1744 | * allocation is requested, this function will return a pointer to a |
| 1745 | * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no |
| 1746 | * variable can be allocated. If NULL is returned, the appropriate counter |
| 1747 | * will be incremented. |
| 1748 | */ |
| 1749 | dtrace_dynvar_t * |
| 1750 | dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1751 | dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, |
| 1752 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1753 | { |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1754 | uint64_t hashval = DTRACE_DYNHASH_VALID; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1755 | dtrace_dynhash_t *hash = dstate->dtds_hash; |
| 1756 | dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; |
| 1757 | processorid_t me = CPU->cpu_id, cpu = me; |
| 1758 | dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; |
| 1759 | size_t bucket, ksize; |
| 1760 | size_t chunksize = dstate->dtds_chunksize; |
| 1761 | uintptr_t kdata, lock, nstate; |
| 1762 | uint_t i; |
| 1763 | |
| 1764 | ASSERT(nkeys != 0); |
| 1765 | |
| 1766 | /* |
| 1767 | * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" |
| 1768 | * algorithm. For the by-value portions, we perform the algorithm in |
| 1769 | * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a |
| 1770 | * bit, and seems to have only a minute effect on distribution. For |
| 1771 | * the by-reference data, we perform "One-at-a-time" iterating (safely) |
| 1772 | * over each referenced byte. It's painful to do this, but it's much |
| 1773 | * better than pathological hash distribution. The efficacy of the |
| 1774 | * hashing algorithm (and a comparison with other algorithms) may be |
| 1775 | * found by running the ::dtrace_dynstat MDB dcmd. |
| 1776 | */ |
| 1777 | for (i = 0; i < nkeys; i++) { |
| 1778 | if (key[i].dttk_size == 0) { |
| 1779 | uint64_t val = key[i].dttk_value; |
| 1780 | |
| 1781 | hashval += (val >> 48) & 0xffff; |
| 1782 | hashval += (hashval << 10); |
| 1783 | hashval ^= (hashval >> 6); |
| 1784 | |
| 1785 | hashval += (val >> 32) & 0xffff; |
| 1786 | hashval += (hashval << 10); |
| 1787 | hashval ^= (hashval >> 6); |
| 1788 | |
| 1789 | hashval += (val >> 16) & 0xffff; |
| 1790 | hashval += (hashval << 10); |
| 1791 | hashval ^= (hashval >> 6); |
| 1792 | |
| 1793 | hashval += val & 0xffff; |
| 1794 | hashval += (hashval << 10); |
| 1795 | hashval ^= (hashval >> 6); |
| 1796 | } else { |
| 1797 | /* |
| 1798 | * This is incredibly painful, but it beats the hell |
| 1799 | * out of the alternative. |
| 1800 | */ |
| 1801 | uint64_t j, size = key[i].dttk_size; |
| 1802 | uintptr_t base = (uintptr_t)key[i].dttk_value; |
| 1803 | |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1804 | if (!dtrace_canload(base, size, mstate, vstate)) |
| 1805 | break; |
| 1806 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1807 | for (j = 0; j < size; j++) { |
| 1808 | hashval += dtrace_load8(base + j); |
| 1809 | hashval += (hashval << 10); |
| 1810 | hashval ^= (hashval >> 6); |
| 1811 | } |
| 1812 | } |
| 1813 | } |
| 1814 | |
dp | e0aad1e | 2006-10-05 19:20:42 -0700 | [diff] [blame] | 1815 | if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) |
| 1816 | return (NULL); |
| 1817 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1818 | hashval += (hashval << 3); |
| 1819 | hashval ^= (hashval >> 11); |
| 1820 | hashval += (hashval << 15); |
| 1821 | |
| 1822 | /* |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1823 | * There is a remote chance (ideally, 1 in 2^31) that our hashval |
| 1824 | * comes out to be one of our two sentinel hash values. If this |
| 1825 | * actually happens, we set the hashval to be a value known to be a |
| 1826 | * non-sentinel value. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1827 | */ |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1828 | if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) |
| 1829 | hashval = DTRACE_DYNHASH_VALID; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1830 | |
| 1831 | /* |
| 1832 | * Yes, it's painful to do a divide here. If the cycle count becomes |
| 1833 | * important here, tricks can be pulled to reduce it. (However, it's |
| 1834 | * critical that hash collisions be kept to an absolute minimum; |
| 1835 | * they're much more painful than a divide.) It's better to have a |
| 1836 | * solution that generates few collisions and still keeps things |
| 1837 | * relatively simple. |
| 1838 | */ |
| 1839 | bucket = hashval % dstate->dtds_hashsize; |
| 1840 | |
| 1841 | if (op == DTRACE_DYNVAR_DEALLOC) { |
| 1842 | volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; |
| 1843 | |
| 1844 | for (;;) { |
| 1845 | while ((lock = *lockp) & 1) |
| 1846 | continue; |
| 1847 | |
| 1848 | if (dtrace_casptr((void *)lockp, |
| 1849 | (void *)lock, (void *)(lock + 1)) == (void *)lock) |
| 1850 | break; |
| 1851 | } |
| 1852 | |
| 1853 | dtrace_membar_producer(); |
| 1854 | } |
| 1855 | |
| 1856 | top: |
| 1857 | prev = NULL; |
| 1858 | lock = hash[bucket].dtdh_lock; |
| 1859 | |
| 1860 | dtrace_membar_consumer(); |
| 1861 | |
| 1862 | start = hash[bucket].dtdh_chain; |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1863 | ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || |
| 1864 | start->dtdv_hashval != DTRACE_DYNHASH_FREE || |
| 1865 | op != DTRACE_DYNVAR_DEALLOC)); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1866 | |
| 1867 | for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { |
| 1868 | dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; |
| 1869 | dtrace_key_t *dkey = &dtuple->dtt_key[0]; |
| 1870 | |
| 1871 | if (dvar->dtdv_hashval != hashval) { |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1872 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1873 | /* |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1874 | * We've reached the sink, and therefore the |
| 1875 | * end of the hash chain; we can kick out of |
| 1876 | * the loop knowing that we have seen a valid |
| 1877 | * snapshot of state. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1878 | */ |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1879 | ASSERT(dvar->dtdv_next == NULL); |
| 1880 | ASSERT(dvar == &dtrace_dynhash_sink); |
| 1881 | break; |
| 1882 | } |
| 1883 | |
| 1884 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { |
| 1885 | /* |
| 1886 | * We've gone off the rails: somewhere along |
| 1887 | * the line, one of the members of this hash |
| 1888 | * chain was deleted. Note that we could also |
| 1889 | * detect this by simply letting this loop run |
| 1890 | * to completion, as we would eventually hit |
| 1891 | * the end of the dirty list. However, we |
| 1892 | * want to avoid running the length of the |
| 1893 | * dirty list unnecessarily (it might be quite |
| 1894 | * long), so we catch this as early as |
| 1895 | * possible by detecting the hash marker. In |
| 1896 | * this case, we simply set dvar to NULL and |
| 1897 | * break; the conditional after the loop will |
| 1898 | * send us back to top. |
| 1899 | */ |
| 1900 | dvar = NULL; |
| 1901 | break; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1902 | } |
| 1903 | |
| 1904 | goto next; |
| 1905 | } |
| 1906 | |
| 1907 | if (dtuple->dtt_nkeys != nkeys) |
| 1908 | goto next; |
| 1909 | |
| 1910 | for (i = 0; i < nkeys; i++, dkey++) { |
| 1911 | if (dkey->dttk_size != key[i].dttk_size) |
| 1912 | goto next; /* size or type mismatch */ |
| 1913 | |
| 1914 | if (dkey->dttk_size != 0) { |
| 1915 | if (dtrace_bcmp( |
| 1916 | (void *)(uintptr_t)key[i].dttk_value, |
| 1917 | (void *)(uintptr_t)dkey->dttk_value, |
| 1918 | dkey->dttk_size)) |
| 1919 | goto next; |
| 1920 | } else { |
| 1921 | if (dkey->dttk_value != key[i].dttk_value) |
| 1922 | goto next; |
| 1923 | } |
| 1924 | } |
| 1925 | |
| 1926 | if (op != DTRACE_DYNVAR_DEALLOC) |
| 1927 | return (dvar); |
| 1928 | |
| 1929 | ASSERT(dvar->dtdv_next == NULL || |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1930 | dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1931 | |
| 1932 | if (prev != NULL) { |
| 1933 | ASSERT(hash[bucket].dtdh_chain != dvar); |
| 1934 | ASSERT(start != dvar); |
| 1935 | ASSERT(prev->dtdv_next == dvar); |
| 1936 | prev->dtdv_next = dvar->dtdv_next; |
| 1937 | } else { |
| 1938 | if (dtrace_casptr(&hash[bucket].dtdh_chain, |
| 1939 | start, dvar->dtdv_next) != start) { |
| 1940 | /* |
| 1941 | * We have failed to atomically swing the |
| 1942 | * hash table head pointer, presumably because |
| 1943 | * of a conflicting allocation on another CPU. |
| 1944 | * We need to reread the hash chain and try |
| 1945 | * again. |
| 1946 | */ |
| 1947 | goto top; |
| 1948 | } |
| 1949 | } |
| 1950 | |
| 1951 | dtrace_membar_producer(); |
| 1952 | |
| 1953 | /* |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1954 | * Now set the hash value to indicate that it's free. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1955 | */ |
| 1956 | ASSERT(hash[bucket].dtdh_chain != dvar); |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1957 | dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1958 | |
| 1959 | dtrace_membar_producer(); |
| 1960 | |
| 1961 | /* |
| 1962 | * Set the next pointer to point at the dirty list, and |
| 1963 | * atomically swing the dirty pointer to the newly freed dvar. |
| 1964 | */ |
| 1965 | do { |
| 1966 | next = dcpu->dtdsc_dirty; |
| 1967 | dvar->dtdv_next = next; |
| 1968 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); |
| 1969 | |
| 1970 | /* |
| 1971 | * Finally, unlock this hash bucket. |
| 1972 | */ |
| 1973 | ASSERT(hash[bucket].dtdh_lock == lock); |
| 1974 | ASSERT(lock & 1); |
| 1975 | hash[bucket].dtdh_lock++; |
| 1976 | |
| 1977 | return (NULL); |
| 1978 | next: |
| 1979 | prev = dvar; |
| 1980 | continue; |
| 1981 | } |
| 1982 | |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 1983 | if (dvar == NULL) { |
| 1984 | /* |
| 1985 | * If dvar is NULL, it is because we went off the rails: |
| 1986 | * one of the elements that we traversed in the hash chain |
| 1987 | * was deleted while we were traversing it. In this case, |
| 1988 | * we assert that we aren't doing a dealloc (deallocs lock |
| 1989 | * the hash bucket to prevent themselves from racing with |
| 1990 | * one another), and retry the hash chain traversal. |
| 1991 | */ |
| 1992 | ASSERT(op != DTRACE_DYNVAR_DEALLOC); |
| 1993 | goto top; |
| 1994 | } |
| 1995 | |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 1996 | if (op != DTRACE_DYNVAR_ALLOC) { |
| 1997 | /* |
| 1998 | * If we are not to allocate a new variable, we want to |
| 1999 | * return NULL now. Before we return, check that the value |
| 2000 | * of the lock word hasn't changed. If it has, we may have |
| 2001 | * seen an inconsistent snapshot. |
| 2002 | */ |
| 2003 | if (op == DTRACE_DYNVAR_NOALLOC) { |
| 2004 | if (hash[bucket].dtdh_lock != lock) |
| 2005 | goto top; |
| 2006 | } else { |
| 2007 | ASSERT(op == DTRACE_DYNVAR_DEALLOC); |
| 2008 | ASSERT(hash[bucket].dtdh_lock == lock); |
| 2009 | ASSERT(lock & 1); |
| 2010 | hash[bucket].dtdh_lock++; |
| 2011 | } |
| 2012 | |
| 2013 | return (NULL); |
| 2014 | } |
| 2015 | |
| 2016 | /* |
| 2017 | * We need to allocate a new dynamic variable. The size we need is the |
| 2018 | * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the |
| 2019 | * size of any auxiliary key data (rounded up to 8-byte alignment) plus |
| 2020 | * the size of any referred-to data (dsize). We then round the final |
| 2021 | * size up to the chunksize for allocation. |
| 2022 | */ |
| 2023 | for (ksize = 0, i = 0; i < nkeys; i++) |
| 2024 | ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); |
| 2025 | |
| 2026 | /* |
| 2027 | * This should be pretty much impossible, but could happen if, say, |
| 2028 | * strange DIF specified the tuple. Ideally, this should be an |
| 2029 | * assertion and not an error condition -- but that requires that the |
| 2030 | * chunksize calculation in dtrace_difo_chunksize() be absolutely |
| 2031 | * bullet-proof. (That is, it must not be able to be fooled by |
| 2032 | * malicious DIF.) Given the lack of backwards branches in DIF, |
| 2033 | * solving this would presumably not amount to solving the Halting |
| 2034 | * Problem -- but it still seems awfully hard. |
| 2035 | */ |
| 2036 | if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + |
| 2037 | ksize + dsize > chunksize) { |
| 2038 | dcpu->dtdsc_drops++; |
| 2039 | return (NULL); |
| 2040 | } |
| 2041 | |
| 2042 | nstate = DTRACE_DSTATE_EMPTY; |
| 2043 | |
| 2044 | do { |
| 2045 | retry: |
| 2046 | free = dcpu->dtdsc_free; |
| 2047 | |
| 2048 | if (free == NULL) { |
| 2049 | dtrace_dynvar_t *clean = dcpu->dtdsc_clean; |
| 2050 | void *rval; |
| 2051 | |
| 2052 | if (clean == NULL) { |
| 2053 | /* |
| 2054 | * We're out of dynamic variable space on |
| 2055 | * this CPU. Unless we have tried all CPUs, |
| 2056 | * we'll try to allocate from a different |
| 2057 | * CPU. |
| 2058 | */ |
| 2059 | switch (dstate->dtds_state) { |
| 2060 | case DTRACE_DSTATE_CLEAN: { |
| 2061 | void *sp = &dstate->dtds_state; |
| 2062 | |
| 2063 | if (++cpu >= NCPU) |
| 2064 | cpu = 0; |
| 2065 | |
| 2066 | if (dcpu->dtdsc_dirty != NULL && |
| 2067 | nstate == DTRACE_DSTATE_EMPTY) |
| 2068 | nstate = DTRACE_DSTATE_DIRTY; |
| 2069 | |
| 2070 | if (dcpu->dtdsc_rinsing != NULL) |
| 2071 | nstate = DTRACE_DSTATE_RINSING; |
| 2072 | |
| 2073 | dcpu = &dstate->dtds_percpu[cpu]; |
| 2074 | |
| 2075 | if (cpu != me) |
| 2076 | goto retry; |
| 2077 | |
| 2078 | (void) dtrace_cas32(sp, |
| 2079 | DTRACE_DSTATE_CLEAN, nstate); |
| 2080 | |
| 2081 | /* |
| 2082 | * To increment the correct bean |
| 2083 | * counter, take another lap. |
| 2084 | */ |
| 2085 | goto retry; |
| 2086 | } |
| 2087 | |
| 2088 | case DTRACE_DSTATE_DIRTY: |
| 2089 | dcpu->dtdsc_dirty_drops++; |
| 2090 | break; |
| 2091 | |
| 2092 | case DTRACE_DSTATE_RINSING: |
| 2093 | dcpu->dtdsc_rinsing_drops++; |
| 2094 | break; |
| 2095 | |
| 2096 | case DTRACE_DSTATE_EMPTY: |
| 2097 | dcpu->dtdsc_drops++; |
| 2098 | break; |
| 2099 | } |
| 2100 | |
| 2101 | DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); |
| 2102 | return (NULL); |
| 2103 | } |
| 2104 | |
| 2105 | /* |
| 2106 | * The clean list appears to be non-empty. We want to |
| 2107 | * move the clean list to the free list; we start by |
| 2108 | * moving the clean pointer aside. |
| 2109 | */ |
| 2110 | if (dtrace_casptr(&dcpu->dtdsc_clean, |
| 2111 | clean, NULL) != clean) { |
| 2112 | /* |
| 2113 | * We are in one of two situations: |
| 2114 | * |
| 2115 | * (a) The clean list was switched to the |
| 2116 | * free list by another CPU. |
| 2117 | * |
| 2118 | * (b) The clean list was added to by the |
| 2119 | * cleansing cyclic. |
| 2120 | * |
| 2121 | * In either of these situations, we can |
| 2122 | * just reattempt the free list allocation. |
| 2123 | */ |
| 2124 | goto retry; |
| 2125 | } |
| 2126 | |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 2127 | ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2128 | |
| 2129 | /* |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 2130 | * Now we'll move the clean list to our free list. |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2131 | * It's impossible for this to fail: the only way |
| 2132 | * the free list can be updated is through this |
| 2133 | * code path, and only one CPU can own the clean list. |
| 2134 | * Thus, it would only be possible for this to fail if |
| 2135 | * this code were racing with dtrace_dynvar_clean(). |
| 2136 | * (That is, if dtrace_dynvar_clean() updated the clean |
| 2137 | * list, and we ended up racing to update the free |
| 2138 | * list.) This race is prevented by the dtrace_sync() |
| 2139 | * in dtrace_dynvar_clean() -- which flushes the |
| 2140 | * owners of the clean lists out before resetting |
| 2141 | * the clean lists. |
| 2142 | */ |
Bryan Cantrill | c9a6ea2 | 2010-07-23 17:34:02 -0700 | [diff] [blame] | 2143 | dcpu = &dstate->dtds_percpu[me]; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2144 | rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); |
| 2145 | ASSERT(rval == NULL); |
| 2146 | goto retry; |
| 2147 | } |
| 2148 | |
| 2149 | dvar = free; |
| 2150 | new_free = dvar->dtdv_next; |
| 2151 | } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); |
| 2152 | |
| 2153 | /* |
| 2154 | * We have now allocated a new chunk. We copy the tuple keys into the |
| 2155 | * tuple array and copy any referenced key data into the data space |
| 2156 | * following the tuple array. As we do this, we relocate dttk_value |
| 2157 | * in the final tuple to point to the key data address in the chunk. |
| 2158 | */ |
| 2159 | kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; |
| 2160 | dvar->dtdv_data = (void *)(kdata + ksize); |
| 2161 | dvar->dtdv_tuple.dtt_nkeys = nkeys; |
| 2162 | |
| 2163 | for (i = 0; i < nkeys; i++) { |
| 2164 | dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; |
| 2165 | size_t kesize = key[i].dttk_size; |
| 2166 | |
| 2167 | if (kesize != 0) { |
| 2168 | dtrace_bcopy( |
| 2169 | (const void *)(uintptr_t)key[i].dttk_value, |
| 2170 | (void *)kdata, kesize); |
| 2171 | dkey->dttk_value = kdata; |
| 2172 | kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); |
| 2173 | } else { |
| 2174 | dkey->dttk_value = key[i].dttk_value; |
| 2175 | } |
| 2176 | |
| 2177 | dkey->dttk_size = kesize; |
| 2178 | } |
| 2179 | |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 2180 | ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2181 | dvar->dtdv_hashval = hashval; |
| 2182 | dvar->dtdv_next = start; |
| 2183 | |
| 2184 | if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) |
| 2185 | return (dvar); |
| 2186 | |
| 2187 | /* |
| 2188 | * The cas has failed. Either another CPU is adding an element to |
| 2189 | * this hash chain, or another CPU is deleting an element from this |
| 2190 | * hash chain. The simplest way to deal with both of these cases |
| 2191 | * (though not necessarily the most efficient) is to free our |
Bryan Cantrill | d47448f | 2015-05-14 07:40:18 +0000 | [diff] [blame] | 2192 | * allocated block and re-attempt it all. Note that the free is |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2193 | * to the dirty list and _not_ to the free list. This is to prevent |
| 2194 | * races with allocators, above. |
| 2195 | */ |
bmc | 586d07d | 2006-04-03 10:37:01 -0700 | [diff] [blame] | 2196 | dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2197 | |
| 2198 | dtrace_membar_producer(); |
| 2199 | |
| 2200 | do { |
| 2201 | free = dcpu->dtdsc_dirty; |
| 2202 | dvar->dtdv_next = free; |
| 2203 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); |
| 2204 | |
Bryan Cantrill | d47448f | 2015-05-14 07:40:18 +0000 | [diff] [blame] | 2205 | goto top; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2206 | } |
| 2207 | |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2208 | /*ARGSUSED*/ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2209 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2210 | dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2211 | { |
jhaslam | 6e0bee7 | 2008-02-07 06:05:33 -0800 | [diff] [blame] | 2212 | if ((int64_t)nval < (int64_t)*oval) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2213 | *oval = nval; |
| 2214 | } |
| 2215 | |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2216 | /*ARGSUSED*/ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2217 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2218 | dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2219 | { |
jhaslam | 6e0bee7 | 2008-02-07 06:05:33 -0800 | [diff] [blame] | 2220 | if ((int64_t)nval > (int64_t)*oval) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2221 | *oval = nval; |
| 2222 | } |
| 2223 | |
| 2224 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2225 | dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2226 | { |
| 2227 | int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; |
| 2228 | int64_t val = (int64_t)nval; |
| 2229 | |
| 2230 | if (val < 0) { |
| 2231 | for (i = 0; i < zero; i++) { |
| 2232 | if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2233 | quanta[i] += incr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2234 | return; |
| 2235 | } |
| 2236 | } |
| 2237 | } else { |
| 2238 | for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { |
| 2239 | if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2240 | quanta[i - 1] += incr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2241 | return; |
| 2242 | } |
| 2243 | } |
| 2244 | |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2245 | quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2246 | return; |
| 2247 | } |
| 2248 | |
| 2249 | ASSERT(0); |
| 2250 | } |
| 2251 | |
| 2252 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2253 | dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2254 | { |
| 2255 | uint64_t arg = *lquanta++; |
| 2256 | int32_t base = DTRACE_LQUANTIZE_BASE(arg); |
| 2257 | uint16_t step = DTRACE_LQUANTIZE_STEP(arg); |
| 2258 | uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); |
| 2259 | int32_t val = (int32_t)nval, level; |
| 2260 | |
| 2261 | ASSERT(step != 0); |
| 2262 | ASSERT(levels != 0); |
| 2263 | |
| 2264 | if (val < base) { |
| 2265 | /* |
| 2266 | * This is an underflow. |
| 2267 | */ |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2268 | lquanta[0] += incr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2269 | return; |
| 2270 | } |
| 2271 | |
| 2272 | level = (val - base) / step; |
| 2273 | |
| 2274 | if (level < levels) { |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2275 | lquanta[level + 1] += incr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2276 | return; |
| 2277 | } |
| 2278 | |
| 2279 | /* |
| 2280 | * This is an overflow. |
| 2281 | */ |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2282 | lquanta[levels + 1] += incr; |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2283 | } |
| 2284 | |
Bryan Cantrill | 2b6389e | 2011-02-08 01:46:16 -0800 | [diff] [blame] | 2285 | static int |
| 2286 | dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, |
| 2287 | uint16_t high, uint16_t nsteps, int64_t value) |
| 2288 | { |
| 2289 | int64_t this = 1, last, next; |
| 2290 | int base = 1, order; |
| 2291 | |
| 2292 | ASSERT(factor <= nsteps); |
| 2293 | ASSERT(nsteps % factor == 0); |
| 2294 | |
| 2295 | for (order = 0; order < low; order++) |
| 2296 | this *= factor; |
| 2297 | |
| 2298 | /* |
| 2299 | * If our value is less than our factor taken to the power of the |
| 2300 | * low order of magnitude, it goes into the zeroth bucket. |
| 2301 | */ |
| 2302 | if (value < (last = this)) |
| 2303 | return (0); |
| 2304 | |
| 2305 | for (this *= factor; order <= high; order++) { |
| 2306 | int nbuckets = this > nsteps ? nsteps : this; |
| 2307 | |
| 2308 | if ((next = this * factor) < this) { |
| 2309 | /* |
| 2310 | * We should not generally get log/linear quantizations |
| 2311 | * with a high magnitude that allows 64-bits to |
| 2312 | * overflow, but we nonetheless protect against this |
| 2313 | * by explicitly checking for overflow, and clamping |
| 2314 | * our value accordingly. |
| 2315 | */ |
| 2316 | value = this - 1; |
| 2317 | } |
| 2318 | |
| 2319 | if (value < this) { |
| 2320 | /* |
| 2321 | * If our value lies within this order of magnitude, |
| 2322 | * determine its position by taking the offset within |
| 2323 | * the order of magnitude, dividing by the bucket |
| 2324 | * width, and adding to our (accumulated) base. |
| 2325 | */ |
| 2326 | return (base + (value - last) / (this / nbuckets)); |
| 2327 | } |
| 2328 | |
| 2329 | base += nbuckets - (nbuckets / factor); |
| 2330 | last = this; |
| 2331 | this = next; |
| 2332 | } |
| 2333 | |
| 2334 | /* |
| 2335 | * Our value is greater than or equal to our factor taken to the |
| 2336 | * power of one plus the high magnitude -- return the top bucket. |
| 2337 | */ |
| 2338 | return (base); |
| 2339 | } |
| 2340 | |
| 2341 | static void |
| 2342 | dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) |
| 2343 | { |
| 2344 | uint64_t arg = *llquanta++; |
| 2345 | uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); |
| 2346 | uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); |
| 2347 | uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); |
| 2348 | uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); |
| 2349 | |
| 2350 | llquanta[dtrace_aggregate_llquantize_bucket(factor, |
| 2351 | low, high, nsteps, nval)] += incr; |
| 2352 | } |
| 2353 | |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2354 | /*ARGSUSED*/ |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2355 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2356 | dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2357 | { |
| 2358 | data[0]++; |
| 2359 | data[1] += nval; |
| 2360 | } |
| 2361 | |
| 2362 | /*ARGSUSED*/ |
| 2363 | static void |
jhaslam | 6e0bee7 | 2008-02-07 06:05:33 -0800 | [diff] [blame] | 2364 | dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) |
| 2365 | { |
| 2366 | int64_t snval = (int64_t)nval; |
| 2367 | uint64_t tmp[2]; |
| 2368 | |
| 2369 | data[0]++; |
| 2370 | data[1] += nval; |
| 2371 | |
| 2372 | /* |
| 2373 | * What we want to say here is: |
| 2374 | * |
| 2375 | * data[2] += nval * nval; |
| 2376 | * |
| 2377 | * But given that nval is 64-bit, we could easily overflow, so |
| 2378 | * we do this as 128-bit arithmetic. |
| 2379 | */ |
| 2380 | if (snval < 0) |
| 2381 | snval = -snval; |
| 2382 | |
| 2383 | dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); |
| 2384 | dtrace_add_128(data + 2, tmp, data + 2); |
| 2385 | } |
| 2386 | |
| 2387 | /*ARGSUSED*/ |
| 2388 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2389 | dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2390 | { |
| 2391 | *oval = *oval + 1; |
| 2392 | } |
| 2393 | |
| 2394 | /*ARGSUSED*/ |
| 2395 | static void |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2396 | dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2397 | { |
| 2398 | *oval += nval; |
| 2399 | } |
| 2400 | |
| 2401 | /* |
| 2402 | * Aggregate given the tuple in the principal data buffer, and the aggregating |
| 2403 | * action denoted by the specified dtrace_aggregation_t. The aggregation |
| 2404 | * buffer is specified as the buf parameter. This routine does not return |
| 2405 | * failure; if there is no space in the aggregation buffer, the data will be |
| 2406 | * dropped, and a corresponding counter incremented. |
| 2407 | */ |
| 2408 | static void |
| 2409 | dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, |
bmc | a1b5e53 | 2005-08-30 14:48:05 -0700 | [diff] [blame] | 2410 | intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) |
stevel@tonic-gate | 7c478bd | 2005-06-14 00:00:00 -0700 | [diff] [blame] | 2411 | { |
| 2412 | dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; |
| 2413 | uint32_t i, ndx, size, fsize; |
| 2414 | uint32_t align = |