John Levon | 2918c4a | 2018-11-21 10:02:46 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * This file and its contents are supplied under the terms of the |
| 3 | * Common Development and Distribution License ("CDDL"), version 1.0. |
| 4 | * You may only use this file in accordance with the terms of version |
| 5 | * 1.0 of the CDDL. |
| 6 | * |
| 7 | * A full copy of the text of the CDDL should have accompanied this |
| 8 | * source. A copy of the CDDL is also available via the Internet at |
| 9 | * http://www.illumos.org/license/CDDL. |
| 10 | */ |
| 11 | |
| 12 | /* |
| 13 | * Copyright (c) 2018, Joyent, Inc. |
| 14 | */ |
| 15 | |
| 16 | /* |
| 17 | * Use a cpu_uarray_t for an array of uint64_t values that are written on a |
| 18 | * per-CPU basis. We align each CPU on a 128-byte boundary (so two cachelines). |
| 19 | * It's not clear why, but this can have a significant effect in multi-socket |
| 20 | * systems running certain benchmarks on a relatively current Intel system. |
| 21 | * |
| 22 | * So the layout is like this, for example: |
| 23 | * |
| 24 | * 0: STAT1 for CPU 0 |
| 25 | * 8: STAT2 for CPU 0 |
| 26 | * 16: STAT3 for CPU 0 |
| 27 | * 24: padding |
| 28 | * 128: STAT1 for CPU 1 |
| 29 | * 136: STAT2 for CPU 1 |
| 30 | * ... |
| 31 | * |
| 32 | * At collection time, cpu_uarray_sum() can be used to sum the given value index |
| 33 | * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs. |
| 34 | * The summation is done such that it saturates at UINT64_MAX. |
| 35 | */ |
| 36 | |
| 37 | #ifndef _SYS_CPU_UARRAY_H |
| 38 | #define _SYS_CPU_UARRAY_H |
| 39 | |
| 40 | #include <sys/types.h> |
| 41 | |
| 42 | #ifdef __cplusplus |
| 43 | extern "C" { |
| 44 | #endif |
| 45 | |
| 46 | #ifdef _KERNEL |
| 47 | |
| 48 | /* |
| 49 | * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful. |
| 50 | */ |
| 51 | #define CUA_ROUNDUP(x, align) (-(-(x) & -(align))) |
| 52 | #define CUA_ALIGN (128) |
| 53 | #define CUA_CPU_STRIDE(nr_items) \ |
| 54 | CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t)) |
| 55 | #define CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i)) |
| 56 | |
| 57 | #define CPU_UARRAY_VAL(cua, cpu_index, stat_index) \ |
| 58 | ((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)]) |
| 59 | |
| 60 | typedef struct { |
| 61 | uint64_t cu_nr_items; |
| 62 | char cu_pad[CUA_ALIGN - sizeof (uint64_t)]; |
| 63 | #ifdef __lint |
| 64 | volatile uint64_t cu_vals[1]; |
| 65 | #else |
| 66 | volatile uint64_t cu_vals[]; |
| 67 | #endif |
| 68 | } cpu_uarray_t __aligned(CUA_ALIGN); |
| 69 | |
| 70 | extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int); |
| 71 | extern void cpu_uarray_free(cpu_uarray_t *); |
| 72 | extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t); |
| 73 | extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *); |
| 74 | |
| 75 | #endif /* _KERNEL */ |
| 76 | |
| 77 | #ifdef __cplusplus |
| 78 | } |
| 79 | #endif |
| 80 | |
| 81 | #endif /* _SYS_CPU_UARRAY_H */ |