Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 1 | /* |
| 2 | * This file and its contents are supplied under the terms of the |
| 3 | * Common Development and Distribution License ("CDDL"), version 1.0. |
| 4 | * You may only use this file in accordance with the terms of version |
| 5 | * 1.0 of the CDDL. |
| 6 | * |
| 7 | * A full copy of the text of the CDDL should have accompanied this |
| 8 | * source. A copy of the CDDL is also available via the Internet at |
| 9 | * http://www.illumos.org/license/CDDL. |
| 10 | */ |
| 11 | |
| 12 | /* |
| 13 | * Copyright 2022 Oxide Computer Co. |
| 14 | */ |
| 15 | |
| 16 | #ifndef _SYS_AMDZEN_SMN_H |
| 17 | #define _SYS_AMDZEN_SMN_H |
| 18 | |
| 19 | #include <sys/debug.h> |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 20 | #include <sys/sysmacros.h> |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 21 | #include <sys/types.h> |
| 22 | |
| 23 | /* |
| 24 | * Generic definitions for the system management network (SMN) in Milan and many |
| 25 | * other AMD Zen processors. These are shared between the amdzen nexus and its |
| 26 | * client drivers and kernel code that may require SMN access to resources. |
| 27 | * |
| 28 | * ------------------------ |
| 29 | * Endpoints and Addressing |
| 30 | * ------------------------ |
| 31 | * |
| 32 | * SMN addresses are 36 bits long but in practice we can use only 32. Bits |
| 33 | * [35:32] identify a destination node, but all consumers instead direct SMN |
| 34 | * transactions to a specific node by selecting the address/data register pair |
| 35 | * in the NBIO PCI config space corresponding to the destination. Additional |
| 36 | * information about nodes and the organisation of devices in the Zen |
| 37 | * architecture may be found in the block comments in amdzen.c and cpuid.c. |
| 38 | * |
| 39 | * The SMN provides access to instances of various functional units present on |
| 40 | * or accessed via each node. Some functional units have only a single instance |
| 41 | * per node while others may have many. Each functional unit instance has one |
| 42 | * or more apertures in which it decodes addresses. The aperture portion of the |
| 43 | * address consists of bits [31:20] and the remainder of the address is used to |
| 44 | * specify a register instance within that functional unit. To complicate |
| 45 | * matters, some functional units have multiple smaller sub-units that decode |
| 46 | * smaller regions within its parent's aperture; in some cases, the bits in a |
| 47 | * mask describing the sub-unit's registers may not be contiguous. To keep |
| 48 | * software relatively simple, we generally treat sub-units and parent units the |
| 49 | * same and try to choose collections of registers whose addresses can all be |
| 50 | * computed in the same manner to form what we will describe as a unit. |
| 51 | * |
| 52 | * Each functional unit should typically have its own header containing register |
| 53 | * definitions, accessors, and address calculation routines; some functional |
| 54 | * units are small and straightforward while others may have numerous complex |
| 55 | * sub-units, registers with many instances whose locations are computed in |
| 56 | * unusual and nonstandard ways, and other features that need to be declared for |
| 57 | * consumers. Those functional units that are present across many processors |
| 58 | * and have similar or identical contents across them should live in this |
| 59 | * directory; umc.h is such an example. Others may be specific to a particular |
| 60 | * processor family (see cpuid.c) or other collection and may require their own |
| 61 | * subdirectories, symbol prefixes, and so on. Unlike the DF, the existence, |
| 62 | * location, and format of registers accessible over SMN are not versioned nor |
| 63 | * are they generally self-discoverable. Each functional unit may be present or |
| 64 | * absent, in varying numbers and with varying functionality, across the entire |
| 65 | * Zen product range. Therefore, at this time most per-unit headers are |
| 66 | * intended for use only by code that will execute on a specific processor |
| 67 | * family. Unifying them over time is considered desirable to the extent the |
| 68 | * hardware allows it. |
| 69 | * |
| 70 | * ----- |
| 71 | * Types |
| 72 | * ----- |
| 73 | * |
| 74 | * Practically every last one of us has screwed up the order of arguments to |
| 75 | * functions like amdzen_smn_write32() when they take an address and a value of |
| 76 | * the same type. Repeatedly. Often. To safety this particularly annoying |
| 77 | * footgun, we pass SMN register addresses around in a dedicated struct type |
| 78 | * smn_reg_t, intended to be instantiated only by the amdzen_xx_smn_reg() and |
| 79 | * analogous kernel functions and the macros that expand to them or, for the |
| 80 | * YOLO crew, SMN_MAKE_REG(). Since the struct type and uint32_t are not |
| 81 | * compatible, the compiler will always squawk if the register and value |
| 82 | * arguments are reversed, leaving us far fewer baffling failures to debug at |
| 83 | * runtime. Typical callers don't require any awareness of this at all, but |
| 84 | * those that want to pass the address around to e.g. log warnings can obtain |
| 85 | * the uint32_t address via SMN_REG_ADDR(). |
| 86 | * |
| 87 | * Register definitions within functional units are provided by objects of type |
| 88 | * `const smn_reg_def_t`, the usage of which is described in detail in the next |
| 89 | * section. For now these are produced on demand by macros; see additional |
| 90 | * notes on conventions below. In time, this mechanism may be extended to |
| 91 | * incorporate version information in a manner similar to that used in df.h. An |
| 92 | * automated mechanism for creating a single collection of register and field |
| 93 | * definitions for C, in CTF, and/or for other language consumers as well as |
| 94 | * automated register value decoding remains an open area for future work. |
| 95 | * |
| 96 | * ----------------------- |
| 97 | * Instances and Iterators |
| 98 | * ----------------------- |
| 99 | * |
| 100 | * Not only do some functional units have many instances, so too do many |
| 101 | * registers. AMD documentation describes registers in terms of a series of |
| 102 | * iterators over various functional units, subunits, and other entities and |
| 103 | * attributes that each multiply the number of register instances. A concrete |
| 104 | * example from the publicly-available Naples PPR (publication 54945 rev. 1.14) |
| 105 | * may make this simpler to understand. Unfortunately, SMN is not described by |
| 106 | * this document, but the register instance syntax used is the same and is |
| 107 | * described in additional detail in sections 1.3.3-4. For our example, let us |
| 108 | * consider the same MSR that AMD uses in their own example, |
| 109 | * Core::X86::MSR::TSC. We are given that this register has the following |
| 110 | * instances: lthree[1:0]_core[3:0]_thread[1:0]. We therefore have three |
| 111 | * iterators: one for 'lthree's, one for 'core's for each 'lthree', and one for |
| 112 | * 'thread's for each 'core'. We can also see that there are 16 total |
| 113 | * instances; in fact, there are actually 16 per core-complex die (CCD), which |
| 114 | * documents for more recent processors would expose as a fourth iterator. To |
| 115 | * keep things relatively simple, we will assume that there are only 16 per |
| 116 | * processor. If it were possible to access all of these instances via MMIO, |
| 117 | * SMN, or some other flat address space (it isn't, as far as we can tell), a |
| 118 | * function for computing the address of each instance would require three |
| 119 | * parameters. Let us suppose that this register really were accessible via |
| 120 | * SMN; in that case, we would also be provided with a list of instance alias |
| 121 | * such as |
| 122 | * |
| 123 | * _thread[1:0]_core[7:0]_lthree[1:0]_alias_SMN: THREADREGS[1:0]x0000_0010; |
| 124 | * THREADREGS[1:0]=COREREGS[7:0]x0000_[4,0]000; |
| 125 | * COREREGS[7:0]=L3REGS[1:0]x000[7:0]_5000; L3REGS[1:0]=57[A,6]0_0000 |
| 126 | * |
| 127 | * To compute the address of an instance of this hypothetical register, we would |
| 128 | * begin by determining that its top-level functional unit is L3REGS with a base |
| 129 | * aperture at 0x5760_0000. There are two instances of this functional unit (01 |
| 130 | * and 1) and each subsequent instance is offset 0x40_0000 from the previous. |
| 131 | * This allows us to compute the base address of each L3REGS block; a similar |
| 132 | * process is then used to compute the base address of each COREREGS block, and |
| 133 | * finally the address of each THREADREGS block that contains the register |
| 134 | * instance. In practice, we might choose instead to consider the COREREGS as |
| 135 | * our functional unit, with instances at 0x5760_5000, 0x5761_5000, 0x57A0_5000, |
| 136 | * and 0x57A1_5000; whether it is useful to do this depends on whether we need |
| 137 | * to consider other registers in the L3REGS unit that may not have per-core |
| 138 | * blocks or instances but would otherwise be interleaved with these. This ends |
| 139 | * up being something of a judgment call. Let's suppose we want to consider the |
| 140 | * entire L3REGS functional unit and write a function to compute the address of |
| 141 | * any register (including our hypothetical TSC) in the subordinate THREADREGS |
| 142 | * blocks. We'll start by adding the new unit to the smn_unit_t enumeration; |
| 143 | * let's call it SMN_UNIT_L3REGS_COREREGS since that's the sub-unit level at |
| 144 | * which we can uniformly compute register instance addresses. We have already |
| 145 | * determined our base aperture and we know that we have 3 iterators and |
| 146 | * therefore three parameters; all SMN address calculators return an smn_reg_t |
| 147 | * and must accept an smn_reg_def_t. Therefore our function's signature is: |
| 148 | * |
| 149 | * smn_reg_t amdzen_smn_l3regs_coreregs_reg(uint8_t l3no, |
| 150 | * const smn_reg_def_t def, uint16_t coreinst, uint16_t threadinst); |
| 151 | * |
| 152 | * We have chosen to use a base aperture of 0x5760_0000 and unit offset |
| 153 | * 0x40_0000, so we can begin by computing a COREREGS aperture: |
| 154 | * |
| 155 | * const uint32_t aperture_base = 0x57600000; |
| 156 | * const uint32_t aperture_off = l3no * 0x400000; |
| 157 | * const uint32_t coreregs_aperture_base = 0x5000; |
| 158 | * const uint32_t coreregs_aperture_off = coreinst * 0x10000; |
| 159 | * |
| 160 | * We can now consider the smn_reg_def_t our function will be given, which |
| 161 | * describes THREADREGS::TSC. Within the COREREGS functional sub-unit, each |
| 162 | * thread register has 2 instances present at a stride of 0x4000 bytes (from our |
| 163 | * hypothetical register definition), so the register would be defined as |
| 164 | * follows: |
| 165 | * |
| 166 | * #define D_L3REGS_COREREGS_THREAD_TSC (const smn_reg_def_t){ \ |
| 167 | * .srd_unit = SMN_UNIT_L3REGS_COREREGS, \ |
| 168 | * .srd_reg = 0x10, \ |
| 169 | * .srd_nents = 2, \ |
| 170 | * .srd_stride = 0x4000 \ |
| 171 | * } |
| 172 | * |
| 173 | * Note that describing the number of entries and their stride in the register |
| 174 | * definition allows us to collapse the last functional sub-unit in our |
| 175 | * calculation process: we need not compute the base aperture address of the |
| 176 | * THREADREGS sub-unit. Instead, we can follow our previous code with: |
| 177 | * |
| 178 | * const uint32_t aperture = aperture_base + |
| 179 | * coreregs_aperture_base + coreregs_aperture_off; |
| 180 | * const uint32_t reg = def.srd_reg + threadinst * def.srd_stride; |
| 181 | * |
| 182 | * Finally, we convert the aperture address and register offset into the |
| 183 | * appropriate type and return it: |
| 184 | * |
| 185 | * return (SMN_MAKE_REG(aperture + reg)); |
| 186 | * |
| 187 | * As you can see, other registers in THREADREGS would be defined with the same |
| 188 | * number entries and stride but a different offset (srd_reg member), while |
| 189 | * other registers in the COREREGS block would have a different offset and |
| 190 | * stride. For example, if a block of per-core (not per-thread) registers were |
| 191 | * located at COREREGS[7:0]x0000_1000, a register called "COREREGS::FrobberCntl" |
| 192 | * in that block with a single instance at offset 0x48 might be defined as |
| 193 | * |
| 194 | * #define D_L3REGS_COREREGS_FROB_CTL (const smn_reg_def_t){ \ |
| 195 | * .srd_unit = SMN_UNIT_L3REGS_COREREGS, \ |
| 196 | * .srd_reg = 0x1048, \ |
| 197 | * .srd_nents = 1 \ |
| 198 | * } |
| 199 | * |
| 200 | * You can satisfy yourself that the same calculation function we wrote above |
| 201 | * will correctly compute the address of the sole instance (0) of this register. |
| 202 | * To further simplify register definitions and callers, the actual address |
| 203 | * calculation functions are written to treat srd_nents == 0 to mean a register |
| 204 | * with a single instance, and to treat srd_stride == 0 as if it were 4 (the |
| 205 | * space occupied by registers accessed by SMN is -- so far as we can tell, |
| 206 | * practically always -- 4 bytes in size, even if the register itself is |
| 207 | * smaller). Additionally, a large number of assertions should be present in |
| 208 | * such functions to guard against foreign unit register definitions, |
| 209 | * out-of-bounds unit and register instance parameters, address overflow, and |
| 210 | * register instance offsets that overflow improperly into an aperture base |
| 211 | * address. All of these conditions indicate either an incorrect register |
| 212 | * definition or a bug in the caller. See the template macro at the bottom of |
| 213 | * this file and umc.h for additional examples of calculating and checking |
| 214 | * register addresses. |
| 215 | * |
| 216 | * With address computation out of the way, we can then provide an accessor for |
| 217 | * each instance this register: |
| 218 | * |
| 219 | * #define L3REGS_COREREGS_THREAD_TSC(l3, core, thread) \ |
| 220 | * amdzen_l3regs_coreregs_reg(l3, D_L3REGS_COREREGS_THREAD_TSC, \ |
| 221 | * core, thread) |
| 222 | * |
| 223 | * Our other per-core register's accessor would look like: |
| 224 | * |
| 225 | * #define L3REGS_COREREGS_FROB_CTL(l3, core) \ |
| 226 | * amdzen_l3regs_coreregs_reg(l3, D_L3REGS_COREREGS_FROB_CTL, core, 0) |
| 227 | * |
| 228 | * The next section describes these conventions in greater detail. |
| 229 | * |
| 230 | * ----------- |
| 231 | * Conventions |
| 232 | * ----------- |
| 233 | * |
| 234 | * First, let's consider the names of the register definition and the |
| 235 | * convenience macro supplied to obtain an instance of that register: we've |
| 236 | * prefixed the global definition of the registers with D_ and the convenience |
| 237 | * macros to return a specific instance are simply named for the register |
| 238 | * itself. Additionally, the two macros expand to objects of incompatible |
| 239 | * types, so that using the wrong one will always be detected at compile time. |
| 240 | * Why do we expose both of these? The instance macro is useful for callers who |
| 241 | * know at compile-time the name of the register of which they want instances; |
| 242 | * this makes it unnecessary to remember the names of functions used to compute |
| 243 | * register instance addresses. The definition itself is useful to callers that |
| 244 | * accept const smn_reg_def_t arguments referring to registers of which the |
| 245 | * immediate caller does not know the names at compile time. |
| 246 | * |
| 247 | * You may wonder why we don't declare named constants for the definitions. |
| 248 | * There are two ways we could do that and both are unfortunate: one would be to |
| 249 | * declare them static in the header, the other to separate declarations in the |
| 250 | * header from initialisation in a separate source file. Measurements revealed |
| 251 | * that the former causes a very substantial increase in data size, which will |
| 252 | * be multiplied by the number of registers defined and the number of source |
| 253 | * files including the header. As convenient as it is to have these symbolic |
| 254 | * constants available to debuggers and other tools at runtime, they're just too |
| 255 | * big. However, it is possible to generate code to be compiled into loadable |
| 256 | * modules that would contain a single copy of the constants for this purpose as |
| 257 | * well as for providing CTF to foreign-language binding generators. The other |
| 258 | * option considered here, putting the constants in separate source files, makes |
| 259 | * maintenance significantly more challenging and makes it likely not only that |
| 260 | * new registers may not be added properly but also that definitions, macros, or |
| 261 | * both may be incorrect. Neither of these options is terrible but for now |
| 262 | * we've optimised for simplicity of maintenance and minimal data size at the |
| 263 | * immediate but not necessarily permanent expense of some debugging |
| 264 | * convenience. |
| 265 | * |
| 266 | * We wish to standardise as much as possible on conventions across all |
| 267 | * Zen-related functional units and blocks (including those accessed by SMN, |
| 268 | * through the DF directly, and by other means). In general, some register and |
| 269 | * field names are shortened from their official names for clarity and brevity; |
| 270 | * the official names are always given in the comment above the definition. |
| 271 | * AMD's functional units come from many internal teams and presumably several |
| 272 | * outside vendors as well; as a result, there is no single convention to be |
| 273 | * found throughout the PPRs and other documentation. For example, different |
| 274 | * units may have registers containing "CTL", "CNTL", "CTRL", "CNTRL", and |
| 275 | * "CONTROL", as well as "FOO_CNTL", "FooCntl", and "Foo_Cntl". Reflecting |
| 276 | * longstanding illumos conventions, we collapse all such register names |
| 277 | * regardless of case as follows: |
| 278 | * |
| 279 | * CTL/CTRL/CNTL/CNTRL/CONTROL => CTL |
| 280 | * CFG/CONF/CONFIG/CONFIGURATION => CFG |
| 281 | * EN/ENAB/ENABLE/ENABLED => EN |
| 282 | * DIS/DISAB/DISABLE/DISABLED => DIS |
| 283 | * |
| 284 | * Note that if collapsing these would result in ambiguity, more of the official |
| 285 | * names will be preserved. In addition to collapsing register and field names |
| 286 | * in this case-insensitive manner, we also follow standard code style practice |
| 287 | * and name macros and constants in SCREAMING_SNAKE_CASE regardless of AMD's |
| 288 | * official name. It is similarly reasonable to truncate or abbreviate other |
| 289 | * common terms in a consistent manner where doing so preserves uniqueness and |
| 290 | * at least some semantic value; without doing so, some official register names |
| 291 | * will be excessively unwieldy and may not even fit into 80 columns. Please |
| 292 | * maintain these practices and strive for consistency with existing examples |
| 293 | * when abbreviation is required. |
| 294 | * |
| 295 | * As we have done elsewhere throughout the amdzen body of work, register fields |
| 296 | * should always be given in order starting with the most significant bits and |
| 297 | * working down toward 0; this matches AMD's documentation and makes it easier |
| 298 | * for reviewers and other readers to follow. The routines in bitext.h should |
| 299 | * be used to extract and set bitfields unless there is a compelling reason to |
| 300 | * do otherwise (e.g., assembly consumers). Accessors should be named |
| 301 | * UNIT_REG_GET_FIELD and UNIT_REG_SET_FIELD respectively, unless the register |
| 302 | * has a single field that has no meaningful name (i.e., the field's name is the |
| 303 | * same as the register's or it's otherwise obvious from the context what its |
| 304 | * purpose is), in which case UNIT_REG_GET and UNIT_REG_SET are appropriate. |
| 305 | * Additional getters and setters that select a particular bit from a register |
| 306 | * or field consisting entirely of individual bits describing or controlling the |
| 307 | * state of some entity may also be useful. As with register names, be as brief |
| 308 | * as possible without sacrificing too much information. |
| 309 | * |
| 310 | * Constant values associated with a field should be declared immediately |
| 311 | * following that field. If a constant or collection of constants is used in |
| 312 | * multiple fields of the same register, the definitions should follow the last |
| 313 | * such field; similarly, constants used in multiple registers should follow the |
| 314 | * last such register, and a comment explaining the scope of their validity is |
| 315 | * recommended. Such constants should be named for the common elements of the |
| 316 | * fields or registers in which they are valid. |
| 317 | * |
| 318 | * As noted above, SMN register definitions should omit the srd_nents and |
| 319 | * srd_stride members when there is a single instance of the register within the |
| 320 | * unit. The srd_stride member should also be elided when the register |
| 321 | * instances are contiguous. All address calculation routines should be written |
| 322 | * to support these conventions. Each register should have an accessor macro or |
| 323 | * function, and should accept instance numbers in order from superior to |
| 324 | * inferior (e.g., from the largest functional unit to the smallest, ending with |
| 325 | * the register instance itself). This convention is similar to that used in |
| 326 | * generic PCIe code in which a register is specified by bus, device, and |
| 327 | * function numbers in that order. Register accessor macros or inline functions |
| 328 | * should not expose inapplicable taxons to callers; in our example above, |
| 329 | * COREREGS_FROB_CTL has an instance for each core but is not associated with a |
| 330 | * thread; therefore its accessor should not accept a thread instance argument |
| 331 | * even though the address calculation function it uses does. |
| 332 | * |
| 333 | * Most of these conventions are not specific to registers accessed via SMN; |
| 334 | * note also that some registers may be accessed in multiple ways (e.g., SMN and |
| 335 | * MMIO, or SMN and the MSR instructions). While the code here is generally |
| 336 | * unaware of such aliased access methods, following these conventions will |
| 337 | * simplify naming and usage if such a register needs to be accessed in multiple |
| 338 | * ways. Sensible additions to macro and symbol names such as the access method |
| 339 | * to be used will generally be sufficient to disambiguate while allowing reuse |
| 340 | * of associated field accessors, constants, and in some cases even register |
| 341 | * offset, instance count, and stride. |
| 342 | */ |
| 343 | |
| 344 | #ifdef __cplusplus |
| 345 | extern "C" { |
| 346 | #endif |
| 347 | |
| 348 | #define SMN_APERTURE_MASK 0xfff00000 |
| 349 | |
| 350 | /* |
| 351 | * An instance of an SMN-accessible register. |
| 352 | */ |
| 353 | typedef struct smn_reg { |
| 354 | uint32_t sr_addr; |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 355 | uint8_t sr_size; /* Not size_t: can't ever be that big. */ |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 356 | } smn_reg_t; |
| 357 | |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 358 | /* |
| 359 | * These are intended to be macro-like (and indeed some used to be macros) but |
| 360 | * are implemented as inline functions so that we can use compound statements |
| 361 | * without extensions and don't have to worry about multiple evaluation. Hence |
| 362 | * their capitalised names. |
| 363 | */ |
| 364 | static inline smn_reg_t |
| 365 | SMN_MAKE_REG_SIZED(const uint32_t addr, const uint8_t size) |
| 366 | { |
| 367 | const uint8_t size_always = (size == 0) ? 4 : size; |
| 368 | const smn_reg_t rv = { |
| 369 | .sr_addr = addr, |
| 370 | .sr_size = size_always |
| 371 | }; |
| 372 | |
| 373 | return (rv); |
| 374 | } |
| 375 | |
| 376 | #define SMN_MAKE_REG(x) SMN_MAKE_REG_SIZED(x, 4) |
| 377 | #define SMN_REG_ADDR(x) ((x).sr_addr) |
| 378 | #define SMN_REG_SIZE(x) ((x).sr_size) |
| 379 | |
| 380 | static inline boolean_t |
| 381 | SMN_REG_SIZE_IS_VALID(const smn_reg_t reg) |
| 382 | { |
| 383 | return (reg.sr_size == 1 || reg.sr_size == 2 || reg.sr_size == 4); |
| 384 | } |
| 385 | |
| 386 | /* Is this register suitably aligned for access of <size> bytes? */ |
| 387 | #define SMN_REG_IS_ALIGNED(x, size) IS_P2ALIGNED(SMN_REG_ADDR(x), size) |
| 388 | |
| 389 | /* Is this register naturally aligned with respect to its own width? */ |
| 390 | static inline boolean_t |
| 391 | SMN_REG_IS_NATURALLY_ALIGNED(const smn_reg_t reg) |
| 392 | { |
| 393 | return (SMN_REG_IS_ALIGNED(reg, reg.sr_size)); |
| 394 | } |
| 395 | |
| 396 | /* Does <val> fit into SMN register <x>? */ |
| 397 | #define SMN_REG_VALUE_FITS(x, val) \ |
| 398 | (((val) & ~(0xffffffffU >> ((4 - SMN_REG_SIZE(x)) << 3))) == 0) |
| 399 | |
| 400 | /* |
| 401 | * Retrieve the base address of the register. This is the address that will |
| 402 | * actually be set in the index register when performing a read or write of the |
| 403 | * underlying register via SMN. It must always be 32-bit aligned. |
| 404 | */ |
| 405 | static inline uint32_t |
| 406 | SMN_REG_ADDR_BASE(const smn_reg_t reg) |
| 407 | { |
| 408 | return (reg.sr_addr & ~3); |
| 409 | } |
| 410 | |
| 411 | /* |
| 412 | * The offset address is the byte offset into the 32-bit-wide data register that |
| 413 | * will be returned by a read or set by a write, if the register is smaller than |
| 414 | * 32 bits wide. For registers that are 32 bits wide, this is always 0. |
| 415 | */ |
| 416 | static inline uint32_t |
| 417 | SMN_REG_ADDR_OFF(const smn_reg_t reg) |
| 418 | { |
| 419 | return (reg.sr_addr & 3); |
| 420 | } |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 421 | |
| 422 | /* |
| 423 | * This exists so that address calculation functions can check that the register |
| 424 | * definitions they're passed are something they understand how to use. While |
| 425 | * many address calculation functions are similar, some functional units define |
| 426 | * registers with multiple iterators, have differently-sized apertures, or both; |
| 427 | * it's important that we reject foreign register definitions in these |
| 428 | * functions. In principle this could be done at compile time, but the |
| 429 | * preprocessor gymnastics required to do so are excessively vile and we are |
| 430 | * really already hanging it pretty far over the edge in terms of what the C |
| 431 | * preprocessor can do for us. |
| 432 | */ |
| 433 | typedef enum smn_unit { |
| 434 | SMN_UNIT_UNKNOWN, |
| 435 | SMN_UNIT_IOAPIC, |
| 436 | SMN_UNIT_IOHC, |
| 437 | SMN_UNIT_IOHCDEV_PCIE, |
| 438 | SMN_UNIT_IOHCDEV_NBIF, |
| 439 | SMN_UNIT_IOHCDEV_SB, |
| 440 | SMN_UNIT_IOAGR, |
| 441 | SMN_UNIT_SDPMUX, |
| 442 | SMN_UNIT_UMC, |
| 443 | SMN_UNIT_PCIE_CORE, |
| 444 | SMN_UNIT_PCIE_PORT, |
| 445 | SMN_UNIT_PCIE_RSMU, |
| 446 | SMN_UNIT_SCFCTP, |
| 447 | SMN_UNIT_SMUPWR, |
| 448 | SMN_UNIT_IOMMUL1, |
| 449 | SMN_UNIT_IOMMUL2, |
| 450 | SMN_UNIT_NBIF, |
| 451 | SMN_UNIT_NBIF_ALT, |
| 452 | SMN_UNIT_NBIF_FUNC |
| 453 | } smn_unit_t; |
| 454 | |
| 455 | /* |
| 456 | * srd_unit and srd_reg are required; they describe the functional unit and the |
| 457 | * register's address within that unit's aperture (which may be the SDP-defined |
| 458 | * aperture described above or a smaller one if a unit has been broken down |
| 459 | * logically into smaller units). srd_nents is optional; if not set, all |
| 460 | * existing consumers assume a value of 0 is equivalent to 1: the register has |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 461 | * but a single instance in each unit. srd_size is the width of the register in |
| 462 | * bytes, which must be 0, 1, 2, or 4. If 0, the size is assumed to be 4 bytes. |
| 463 | * srd_stride is ignored if srd_nents is 0 or 1 and optional otherwise; it |
| 464 | * describes the number of bytes to be added to the previous instance's address |
| 465 | * to obtain that of the next instance. If left at 0 it is assumed to be equal |
| 466 | * to the width of the register. |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 467 | * |
| 468 | * There are units in which registers have more complicated collections of |
| 469 | * instances that cannot be represented perfectly by this simple descriptor; |
| 470 | * they require custom address calculation macros and functions that may take |
| 471 | * additional arguments, and they may not be able to check their arguments or |
| 472 | * the computed addresses as carefully as would be ideal. |
| 473 | */ |
| 474 | typedef struct smn_reg_def { |
| 475 | smn_unit_t srd_unit; |
| 476 | uint32_t srd_reg; |
| 477 | uint32_t srd_stride; |
| 478 | uint16_t srd_nents; |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 479 | uint8_t srd_size; |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 480 | } smn_reg_def_t; |
| 481 | |
| 482 | /* |
| 483 | * This macro may be used by per-functional-unit code to construct an address |
| 484 | * calculation function. It is usable by some, BUT NOT ALL, functional units; |
| 485 | * see the block comment above for an example that cannot be accommodated. Here |
| 486 | * we assume that there are at most 2 iterators in any register's definition. |
| 487 | * Use this when possible, as it provides a large number of useful checks on |
| 488 | * DEBUG bits. Similar checks should be incorporated into implementations for |
| 489 | * nonstandard functional units to the extent possible. |
| 490 | */ |
| 491 | |
| 492 | #define AMDZEN_MAKE_SMN_REG_FN(_fn, _unit, _base, _mask, _nunits, _unitshift) \ |
| 493 | CTASSERT(((_base) & ~(_mask)) == 0); \ |
| 494 | static inline smn_reg_t \ |
| 495 | _fn(const uint8_t unitno, const smn_reg_def_t def, const uint16_t reginst) \ |
| 496 | { \ |
| 497 | const uint32_t unit32 = (const uint32_t)unitno; \ |
| 498 | const uint32_t reginst32 = (const uint32_t)reginst; \ |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 499 | const uint32_t size32 = (def.srd_size == 0) ? 4 : \ |
| 500 | (const uint32_t)def.srd_size; \ |
| 501 | ASSERT(size32 == 1 || size32 == 2 || size32 == 4); \ |
| 502 | const uint32_t stride = (def.srd_stride == 0) ? size32 : \ |
| 503 | def.srd_stride; \ |
| 504 | ASSERT3U(stride, >=, size32); \ |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 505 | const uint32_t nents = (def.srd_nents == 0) ? 1 : \ |
| 506 | (const uint32_t)def.srd_nents; \ |
| 507 | \ |
| 508 | ASSERT3S(def.srd_unit, ==, SMN_UNIT_ ## _unit); \ |
| 509 | ASSERT3U(unit32, <, (_nunits)); \ |
| 510 | ASSERT3U(nents, >, reginst32); \ |
| 511 | ASSERT0(def.srd_reg & (_mask)); \ |
| 512 | \ |
| 513 | const uint32_t aperture_base = (_base); \ |
| 514 | \ |
| 515 | const uint32_t aperture_off = (unit32 << (_unitshift)); \ |
| 516 | ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base); \ |
| 517 | \ |
| 518 | const uint32_t aperture = aperture_base + aperture_off; \ |
| 519 | ASSERT0(aperture & ~(_mask)); \ |
| 520 | \ |
| 521 | const uint32_t reg = def.srd_reg + reginst32 * stride; \ |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 522 | ASSERT0(reg & (_mask)); \ |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 523 | \ |
Keith M Wesolowski | 4adf43b | 2022-11-09 07:00:30 +0000 | [diff] [blame^] | 524 | return (SMN_MAKE_REG_SIZED(aperture + reg, size32)); \ |
Keith M Wesolowski | ba215ef | 2022-07-21 06:57:54 -0700 | [diff] [blame] | 525 | } |
| 526 | |
| 527 | #ifdef __cplusplus |
| 528 | } |
| 529 | #endif |
| 530 | |
| 531 | #endif /* _SYS_AMDZEN_SMN_H */ |