blob: 950fab1272cddcadc979bc1dda812bdaf6744b1b [file] [log] [blame]
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
ayznaga12af71e2006-02-21 23:14:25 -08005 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07007 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
Jonathan Adams56f33202010-01-12 17:06:34 -080022 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070023 * Use is subject to license terms.
Daniel Hoffman48bbca82017-02-17 11:48:20 -080024 * Copyright (c) 2016 by Delphix. All rights reserved.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070025 */
26
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070027/*
Patrick Mooney80d56892017-09-22 23:43:19 +000028 * Copyright 2017 Joyent, Inc.
James Blachlyb1c760b2017-02-18 09:13:59 -050029 * Copyright 2017 James S Blachly, MD <james.blachly@gmail.com>
Bryan Cantrill915c8482015-10-22 15:34:32 +000030 */
31
32/*
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070033 * Memory special file
34 */
35
36#include <sys/types.h>
37#include <sys/param.h>
38#include <sys/user.h>
39#include <sys/buf.h>
40#include <sys/systm.h>
41#include <sys/cred.h>
42#include <sys/vm.h>
43#include <sys/uio.h>
44#include <sys/mman.h>
45#include <sys/kmem.h>
46#include <vm/seg.h>
47#include <vm/page.h>
48#include <sys/stat.h>
49#include <sys/vmem.h>
50#include <sys/memlist.h>
51#include <sys/bootconf.h>
52
53#include <vm/seg_vn.h>
54#include <vm/seg_dev.h>
55#include <vm/seg_kmem.h>
56#include <vm/seg_kp.h>
57#include <vm/seg_kpm.h>
58#include <vm/hat.h>
59
60#include <sys/conf.h>
61#include <sys/mem.h>
62#include <sys/types.h>
63#include <sys/conf.h>
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/errno.h>
67#include <sys/modctl.h>
68#include <sys/memlist.h>
69#include <sys/ddi.h>
70#include <sys/sunddi.h>
71#include <sys/debug.h>
ayznagad00f0152005-12-23 22:45:19 -080072#include <sys/fm/protocol.h>
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070073
cindi7aec1d62006-02-11 15:36:52 -080074#if defined(__sparc)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070075extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
76extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
77 uint64_t *, int *, int *, int *);
78extern size_t cpu_get_name_bufsize(void);
ayznagad00f0152005-12-23 22:45:19 -080079extern int cpu_get_mem_sid(char *, char *, int, int *);
80extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
mrjae115bc2007-01-19 08:10:06 -080081#elif defined(__x86)
cindi7aec1d62006-02-11 15:36:52 -080082#include <sys/cpu_module.h>
ayznagad00f0152005-12-23 22:45:19 -080083#endif /* __sparc */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -070084
85/*
86 * Turn a byte length into a pagecount. The DDI btop takes a
87 * 32-bit size on 32-bit machines, this handles 64-bit sizes for
88 * large physical-memory 32-bit machines.
89 */
90#define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
91
92static kmutex_t mm_lock;
93static caddr_t mm_map;
94
95static dev_info_t *mm_dip; /* private copy of devinfo pointer */
96
97static int mm_kmem_io_access;
98
99static int mm_kstat_update(kstat_t *ksp, int rw);
100static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
101
ayznagad00f0152005-12-23 22:45:19 -0800102static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
ayznagad00f0152005-12-23 22:45:19 -0800103
Bryan Cantrill915c8482015-10-22 15:34:32 +0000104#define MM_KMEMLOG_NENTRIES 64
105
106static int mm_kmemlogent;
107static mm_logentry_t mm_kmemlog[MM_KMEMLOG_NENTRIES];
108
109/*
110 * On kmem/allmem writes, we log information that might be useful in the event
111 * that a write is errant (that is, due to operator error) and induces a later
112 * problem. Note that (in particular) in the event of such operator-induced
113 * corruption, a search over the kernel address space for the corrupted
114 * address will yield the ring buffer entry that recorded the write. And
115 * should it seem baroque or otherwise unnecessary, yes, we need this kind of
116 * auditing facility and yes, we learned that the hard way: disturbingly,
117 * there exist recommendations for "tuning" the system that involve writing to
118 * kernel memory addresses via the kernel debugger, and -- as we discovered --
119 * these can easily be applied incorrectly or unsafely, yielding an entirely
120 * undebuggable "can't happen" kind of panic.
121 */
122static void
123mm_logkmem(struct uio *uio)
124{
125 mm_logentry_t *ent;
126 proc_t *p = curthread->t_procp;
127
128 mutex_enter(&mm_lock);
129
130 ent = &mm_kmemlog[mm_kmemlogent++];
131
132 if (mm_kmemlogent == MM_KMEMLOG_NENTRIES)
133 mm_kmemlogent = 0;
134
135 ent->mle_vaddr = (uintptr_t)uio->uio_loffset;
136 ent->mle_len = uio->uio_resid;
137 gethrestime(&ent->mle_hrestime);
138 ent->mle_hrtime = gethrtime();
139 ent->mle_pid = p->p_pidp->pid_id;
140
141 (void) strncpy(ent->mle_psargs,
142 p->p_user.u_psargs, sizeof (ent->mle_psargs));
143
144 mutex_exit(&mm_lock);
145}
146
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700147/*ARGSUSED1*/
148static int
149mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
150{
151 int i;
152 struct mem_minor {
153 char *name;
154 minor_t minor;
155 int privonly;
156 const char *rdpriv;
157 const char *wrpriv;
158 mode_t priv_mode;
159 } mm[] = {
160 { "mem", M_MEM, 0, NULL, "all", 0640 },
161 { "kmem", M_KMEM, 0, NULL, "all", 0640 },
162 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 },
163 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 },
164 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 },
James Blachlyb1c760b2017-02-18 09:13:59 -0500165 { "full", M_FULL, PRIVONLY_DEV, NULL, NULL, 0666 },
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700166 };
167 kstat_t *ksp;
168
169 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
170 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
171
172 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
173 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
174 mm[i].minor, DDI_PSEUDO, mm[i].privonly,
175 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
176 DDI_FAILURE) {
177 ddi_remove_minor_node(devi, NULL);
178 return (DDI_FAILURE);
179 }
180 }
181
182 mm_dip = devi;
183
184 ksp = kstat_create("mm", 0, "phys_installed", "misc",
185 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
186 if (ksp != NULL) {
187 ksp->ks_update = mm_kstat_update;
188 ksp->ks_snapshot = mm_kstat_snapshot;
189 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
190 kstat_install(ksp);
191 }
192
193 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
194 "kmem_io_access", 0);
195
196 return (DDI_SUCCESS);
197}
198
199/*ARGSUSED*/
200static int
201mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
202{
203 register int error;
204
205 switch (infocmd) {
206 case DDI_INFO_DEVT2DEVINFO:
207 *result = (void *)mm_dip;
208 error = DDI_SUCCESS;
209 break;
210 case DDI_INFO_DEVT2INSTANCE:
211 *result = (void *)0;
212 error = DDI_SUCCESS;
213 break;
214 default:
215 error = DDI_FAILURE;
216 }
217 return (error);
218}
219
220/*ARGSUSED1*/
221static int
222mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
223{
224 switch (getminor(*devp)) {
225 case M_NULL:
226 case M_ZERO:
James Blachlyb1c760b2017-02-18 09:13:59 -0500227 case M_FULL:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700228 case M_MEM:
229 case M_KMEM:
230 case M_ALLKMEM:
231 /* standard devices */
232 break;
233
234 default:
235 /* Unsupported or unknown type */
236 return (EINVAL);
237 }
cth4f7df452008-05-27 17:03:53 -0700238 /* must be character device */
239 if (typ != OTYP_CHR)
240 return (EINVAL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700241 return (0);
242}
243
244struct pollhead mm_pollhd;
245
246/*ARGSUSED*/
247static int
248mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
249 struct pollhead **phpp)
250{
251 switch (getminor(dev)) {
252 case M_NULL:
253 case M_ZERO:
James Blachlyb1c760b2017-02-18 09:13:59 -0500254 case M_FULL:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700255 case M_MEM:
256 case M_KMEM:
257 case M_ALLKMEM:
258 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
mb91622d7a4d862007-06-01 12:18:50 -0700259 POLLWRNORM | POLLRDBAND | POLLWRBAND);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700260 /*
261 * A non NULL pollhead pointer should be returned in case
Patrick Mooney80d56892017-09-22 23:43:19 +0000262 * user polls for 0 events or is doing an edge-triggerd poll.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700263 */
Patrick Mooney80d56892017-09-22 23:43:19 +0000264 if ((!*reventsp && !anyyet) || (events & POLLET)) {
265 *phpp = &mm_pollhd;
266 }
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700267 return (0);
268 default:
269 /* no other devices currently support polling */
270 return (ENXIO);
271 }
272}
273
274static int
275mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
276 char *name, caddr_t valuep, int *lengthp)
277{
278 /*
279 * implement zero size to reduce overhead (avoid two failing
280 * property lookups per stat).
281 */
282 return (ddi_prop_op_size(dev, dip, prop_op,
283 flags, name, valuep, lengthp, 0));
284}
285
286static int
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700287mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
288 page_t *pp)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700289{
290 int error = 0;
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700291 int devload = 0;
292 int is_memory = pf_is_memory(pfn);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700293 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
294 (size_t)uio->uio_iov->iov_len);
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700295 caddr_t va = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700296
297 mutex_enter(&mm_lock);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700298
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700299 if (is_memory && kpm_enable) {
300 if (pp)
301 va = hat_kpm_mapin(pp, NULL);
302 else
303 va = hat_kpm_mapin_pfn(pfn);
304 }
305
306 if (va == NULL) {
307 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
308 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
309 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
310 va = mm_map;
311 devload = 1;
312 }
313
314 if (!is_memory) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700315 if (allowio) {
316 size_t c = uio->uio_iov->iov_len;
317
318 if (ddi_peekpokeio(NULL, uio, rw,
319 (caddr_t)(uintptr_t)uio->uio_loffset, c,
320 sizeof (int32_t)) != DDI_SUCCESS)
321 error = EFAULT;
322 } else
323 error = EIO;
Jason Beloro9d0d62a2009-08-06 17:39:39 -0700324 } else
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700325 error = uiomove(va + pageoff, nbytes, rw, uio);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700326
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700327 if (devload)
328 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
329 else if (pp)
330 hat_kpm_mapout(pp, NULL, va);
331 else
332 hat_kpm_mapout_pfn(pfn);
333
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700334 mutex_exit(&mm_lock);
335 return (error);
336}
337
elowe1bd5c352005-10-10 14:42:35 -0700338static int
339mmpagelock(struct as *as, caddr_t va)
340{
341 struct seg *seg;
342 int i;
343
Josef 'Jeff' Sipekdc32d872015-12-20 23:52:23 -0500344 AS_LOCK_ENTER(as, RW_READER);
elowe1bd5c352005-10-10 14:42:35 -0700345 seg = as_segat(as, va);
346 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
Josef 'Jeff' Sipekdc32d872015-12-20 23:52:23 -0500347 AS_LOCK_EXIT(as);
elowe1bd5c352005-10-10 14:42:35 -0700348
349 return (i);
350}
351
johnlev843e1982007-09-18 15:46:43 -0700352#ifdef __sparc
353
elowe1bd5c352005-10-10 14:42:35 -0700354#define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva)
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700355
356#else /* __i386, __amd64 */
357
358#define NEED_LOCK_KVADDR(va) 0
359
360#endif /* __sparc */
361
362/*ARGSUSED3*/
363static int
364mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
365{
366 pfn_t v;
367 struct iovec *iov;
368 int error = 0;
369 size_t c;
370 ssize_t oresid = uio->uio_resid;
371 minor_t minor = getminor(dev);
372
373 while (uio->uio_resid > 0 && error == 0) {
374 iov = uio->uio_iov;
375 if (iov->iov_len == 0) {
376 uio->uio_iov++;
377 uio->uio_iovcnt--;
378 if (uio->uio_iovcnt < 0)
379 panic("mmrw");
380 continue;
381 }
382 switch (minor) {
383
384 case M_MEM:
385 memlist_read_lock();
386 if (!address_in_memlist(phys_install,
387 (uint64_t)uio->uio_loffset, 1)) {
388 memlist_read_unlock();
389 error = EFAULT;
390 break;
391 }
392 memlist_read_unlock();
393
394 v = BTOP((u_offset_t)uio->uio_loffset);
395 error = mmio(uio, rw, v,
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700396 uio->uio_loffset & PAGEOFFSET, 0, NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700397 break;
398
399 case M_KMEM:
400 case M_ALLKMEM:
401 {
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700402 page_t **ppp = NULL;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700403 caddr_t vaddr = (caddr_t)uio->uio_offset;
404 int try_lock = NEED_LOCK_KVADDR(vaddr);
405 int locked = 0;
406
johnlev843e1982007-09-18 15:46:43 -0700407 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
mrjae115bc2007-01-19 08:10:06 -0800408 break;
409
Bryan Cantrill915c8482015-10-22 15:34:32 +0000410 if (rw == UIO_WRITE)
411 mm_logkmem(uio);
412
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700413 /*
414 * If vaddr does not map a valid page, as_pagelock()
415 * will return failure. Hence we can't check the
416 * return value and return EFAULT here as we'd like.
417 * seg_kp and seg_kpm do not properly support
418 * as_pagelock() for this context so we avoid it
419 * using the try_lock set check above. Some day when
420 * the kernel page locking gets redesigned all this
421 * muck can be cleaned up.
422 */
423 if (try_lock)
424 locked = (as_pagelock(&kas, &ppp, vaddr,
425 PAGESIZE, S_WRITE) == 0);
426
jongkise42d2a12005-09-09 14:11:12 -0700427 v = hat_getpfnum(kas.a_hat,
428 (caddr_t)(uintptr_t)uio->uio_loffset);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700429 if (v == PFN_INVALID) {
430 if (locked)
431 as_pageunlock(&kas, ppp, vaddr,
432 PAGESIZE, S_WRITE);
433 error = EFAULT;
434 break;
435 }
436
437 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
Pavel Tatashind20abfa2009-06-17 15:32:10 -0700438 minor == M_ALLKMEM || mm_kmem_io_access,
439 (locked && ppp) ? *ppp : NULL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700440 if (locked)
441 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
442 S_WRITE);
443 }
444
445 break;
446
James Blachlyb1c760b2017-02-18 09:13:59 -0500447 case M_FULL:
448 if (rw == UIO_WRITE) {
449 error = ENOSPC;
450 break;
451 }
452 /* else it's a read, fall through to zero case */
453 /*FALLTHROUGH*/
454
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700455 case M_ZERO:
456 if (rw == UIO_READ) {
457 label_t ljb;
458
459 if (on_fault(&ljb)) {
460 no_fault();
461 error = EFAULT;
462 break;
463 }
464 uzero(iov->iov_base, iov->iov_len);
465 no_fault();
466 uio->uio_resid -= iov->iov_len;
467 uio->uio_loffset += iov->iov_len;
468 break;
469 }
470 /* else it's a write, fall through to NULL case */
471 /*FALLTHROUGH*/
472
473 case M_NULL:
474 if (rw == UIO_READ)
475 return (0);
476 c = iov->iov_len;
477 iov->iov_base += c;
478 iov->iov_len -= c;
479 uio->uio_loffset += c;
480 uio->uio_resid -= c;
481 break;
482
483 }
484 }
485 return (uio->uio_resid == oresid ? error : 0);
486}
487
488static int
489mmread(dev_t dev, struct uio *uio, cred_t *cred)
490{
491 return (mmrw(dev, uio, UIO_READ, cred));
492}
493
494static int
495mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
496{
497 return (mmrw(dev, uio, UIO_WRITE, cred));
498}
499
500/*
501 * Private ioctl for libkvm to support kvm_physaddr().
502 * Given an address space and a VA, compute the PA.
503 */
504static int
505mmioctl_vtop(intptr_t data)
506{
cindi7aec1d62006-02-11 15:36:52 -0800507#ifdef _SYSCALL32
508 mem_vtop32_t vtop32;
509#endif
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700510 mem_vtop_t mem_vtop;
511 proc_t *p;
512 pfn_t pfn = (pfn_t)PFN_INVALID;
513 pid_t pid = 0;
514 struct as *as;
515 struct seg *seg;
516
cindi7aec1d62006-02-11 15:36:52 -0800517 if (get_udatamodel() == DATAMODEL_NATIVE) {
518 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
519 return (EFAULT);
520 }
521#ifdef _SYSCALL32
522 else {
523 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
524 return (EFAULT);
wesolows80ab8862006-03-30 16:06:07 -0800525 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
526 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
cindi7aec1d62006-02-11 15:36:52 -0800527
528 if (mem_vtop.m_as != NULL)
529 return (EINVAL);
530 }
531#endif
532
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700533 if (mem_vtop.m_as == &kas) {
534 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700535 } else {
cindi7aec1d62006-02-11 15:36:52 -0800536 if (mem_vtop.m_as == NULL) {
537 /*
538 * Assume the calling process's address space if the
539 * caller didn't specify one.
540 */
541 p = curthread->t_procp;
542 if (p == NULL)
543 return (EIO);
544 mem_vtop.m_as = p->p_as;
545 }
546
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700547 mutex_enter(&pidlock);
548 for (p = practive; p != NULL; p = p->p_next) {
549 if (p->p_as == mem_vtop.m_as) {
550 pid = p->p_pid;
551 break;
552 }
553 }
554 mutex_exit(&pidlock);
555 if (p == NULL)
556 return (EIO);
557 p = sprlock(pid);
558 if (p == NULL)
559 return (EIO);
560 as = p->p_as;
561 if (as == mem_vtop.m_as) {
562 mutex_exit(&p->p_lock);
Josef 'Jeff' Sipekdc32d872015-12-20 23:52:23 -0500563 AS_LOCK_ENTER(as, RW_READER);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700564 for (seg = AS_SEGFIRST(as); seg != NULL;
565 seg = AS_SEGNEXT(as, seg))
566 if ((uintptr_t)mem_vtop.m_va -
567 (uintptr_t)seg->s_base < seg->s_size)
568 break;
569 if (seg != NULL)
570 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
Josef 'Jeff' Sipekdc32d872015-12-20 23:52:23 -0500571 AS_LOCK_EXIT(as);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700572 mutex_enter(&p->p_lock);
573 }
574 sprunlock(p);
575 }
576 mem_vtop.m_pfn = pfn;
577 if (pfn == PFN_INVALID)
578 return (EIO);
cindi7aec1d62006-02-11 15:36:52 -0800579
580 if (get_udatamodel() == DATAMODEL_NATIVE) {
581 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
582 return (EFAULT);
583 }
584#ifdef _SYSCALL32
585 else {
586 vtop32.m_pfn = mem_vtop.m_pfn;
587 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
588 return (EFAULT);
589 }
590#endif
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700591
592 return (0);
593}
594
595/*
elowedb874c52005-11-14 22:03:14 -0800596 * Given a PA, execute the given page retire command on it.
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700597 */
598static int
599mmioctl_page_retire(int cmd, intptr_t data)
600{
elowedb874c52005-11-14 22:03:14 -0800601 extern int page_retire_test(void);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700602 uint64_t pa;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700603
elowedb874c52005-11-14 22:03:14 -0800604 if (copyin((void *)data, &pa, sizeof (uint64_t))) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700605 return (EFAULT);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700606 }
607
elowedb874c52005-11-14 22:03:14 -0800608 switch (cmd) {
609 case MEM_PAGE_ISRETIRED:
610 return (page_retire_check(pa, NULL));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700611
elowedb874c52005-11-14 22:03:14 -0800612 case MEM_PAGE_UNRETIRE:
613 return (page_unretire(pa));
614
615 case MEM_PAGE_RETIRE:
616 return (page_retire(pa, PR_FMA));
617
618 case MEM_PAGE_RETIRE_MCE:
619 return (page_retire(pa, PR_MCE));
620
621 case MEM_PAGE_RETIRE_UE:
622 return (page_retire(pa, PR_UE));
623
624 case MEM_PAGE_GETERRORS:
625 {
626 uint64_t page_errors;
627 int rc = page_retire_check(pa, &page_errors);
628 if (copyout(&page_errors, (void *)data,
629 sizeof (uint64_t))) {
630 return (EFAULT);
631 }
632 return (rc);
633 }
634
635 case MEM_PAGE_RETIRE_TEST:
636 return (page_retire_test());
637
638 }
639
640 return (EINVAL);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700641}
642
643#ifdef __sparc
644/*
645 * Given a syndrome, syndrome type, and address return the
646 * associated memory name in the provided data buffer.
647 */
648static int
649mmioctl_get_mem_name(intptr_t data)
650{
651 mem_name_t mem_name;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700652 void *buf;
653 size_t bufsize;
654 int len, err;
655
656 if ((bufsize = cpu_get_name_bufsize()) == 0)
657 return (ENOTSUP);
658
ayznagad00f0152005-12-23 22:45:19 -0800659 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
660 return (err);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700661
662 buf = kmem_alloc(bufsize, KM_SLEEP);
663
664 /*
665 * Call into cpu specific code to do the lookup.
666 */
667 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
668 mem_name.m_addr, buf, bufsize, &len)) != 0) {
669 kmem_free(buf, bufsize);
670 return (err);
671 }
672
673 if (len >= mem_name.m_namelen) {
674 kmem_free(buf, bufsize);
pothier6684e112008-06-05 07:42:40 -0700675 return (ENOSPC);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700676 }
677
678 if (copyoutstr(buf, (char *)mem_name.m_name,
679 mem_name.m_namelen, NULL) != 0) {
680 kmem_free(buf, bufsize);
681 return (EFAULT);
682 }
683
684 kmem_free(buf, bufsize);
685 return (0);
686}
687
688/*
689 * Given a syndrome and address return information about the associated memory.
690 */
691static int
692mmioctl_get_mem_info(intptr_t data)
693{
694 mem_info_t mem_info;
695 int err;
696
697 if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
698 return (EFAULT);
699
700 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
701 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
702 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
703 return (err);
704
705 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
706 return (EFAULT);
707
708 return (0);
709}
ayznagad00f0152005-12-23 22:45:19 -0800710
711/*
712 * Given a memory name, return its associated serial id
713 */
714static int
715mmioctl_get_mem_sid(intptr_t data)
716{
717 mem_name_t mem_name;
718 void *buf;
719 void *name;
720 size_t name_len;
721 size_t bufsize;
722 int len, err;
723
724 if ((bufsize = cpu_get_name_bufsize()) == 0)
725 return (ENOTSUP);
726
727 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
728 return (err);
729
730 buf = kmem_alloc(bufsize, KM_SLEEP);
731
732 if (mem_name.m_namelen > 1024)
733 mem_name.m_namelen = 1024; /* cap at 1024 bytes */
734
735 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
736
737 if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
738 mem_name.m_namelen, &name_len)) != 0) {
739 kmem_free(buf, bufsize);
740 kmem_free(name, mem_name.m_namelen);
741 return (err);
742 }
743
744 /*
745 * Call into cpu specific code to do the lookup.
746 */
747 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
748 kmem_free(buf, bufsize);
749 kmem_free(name, mem_name.m_namelen);
750 return (err);
751 }
752
753 if (len > mem_name.m_sidlen) {
754 kmem_free(buf, bufsize);
755 kmem_free(name, mem_name.m_namelen);
756 return (ENAMETOOLONG);
757 }
758
759 if (copyoutstr(buf, (char *)mem_name.m_sid,
760 mem_name.m_sidlen, NULL) != 0) {
761 kmem_free(buf, bufsize);
762 kmem_free(name, mem_name.m_namelen);
763 return (EFAULT);
764 }
765
766 kmem_free(buf, bufsize);
767 kmem_free(name, mem_name.m_namelen);
768 return (0);
769}
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700770#endif /* __sparc */
771
772/*
773 * Private ioctls for
774 * libkvm to support kvm_physaddr().
775 * FMA support for page_retire() and memory attribute information.
776 */
777/*ARGSUSED*/
778static int
779mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
780{
ayznaga33129b32006-01-17 09:58:16 -0800781 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
782 (cmd != MEM_VTOP && getminor(dev) != M_MEM))
ayznagad00f0152005-12-23 22:45:19 -0800783 return (ENXIO);
784
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700785 switch (cmd) {
786 case MEM_VTOP:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700787 return (mmioctl_vtop(data));
788
789 case MEM_PAGE_RETIRE:
790 case MEM_PAGE_ISRETIRED:
elowedb874c52005-11-14 22:03:14 -0800791 case MEM_PAGE_UNRETIRE:
792 case MEM_PAGE_RETIRE_MCE:
793 case MEM_PAGE_RETIRE_UE:
794 case MEM_PAGE_GETERRORS:
795 case MEM_PAGE_RETIRE_TEST:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700796 return (mmioctl_page_retire(cmd, data));
797
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700798#ifdef __sparc
ayznagad00f0152005-12-23 22:45:19 -0800799 case MEM_NAME:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700800 return (mmioctl_get_mem_name(data));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700801
802 case MEM_INFO:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700803 return (mmioctl_get_mem_info(data));
ayznagad00f0152005-12-23 22:45:19 -0800804
805 case MEM_SID:
806 return (mmioctl_get_mem_sid(data));
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700807#else
ayznagad00f0152005-12-23 22:45:19 -0800808 case MEM_NAME:
809 case MEM_INFO:
810 case MEM_SID:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700811 return (ENOTSUP);
ayznagad00f0152005-12-23 22:45:19 -0800812#endif /* __sparc */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700813 }
814 return (ENXIO);
815}
816
817/*ARGSUSED2*/
818static int
819mmmmap(dev_t dev, off_t off, int prot)
820{
821 pfn_t pf;
822 struct memlist *pmem;
823 minor_t minor = getminor(dev);
824
825 switch (minor) {
826 case M_MEM:
827 pf = btop(off);
828 memlist_read_lock();
Jonathan Adams56f33202010-01-12 17:06:34 -0800829 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
830 if (pf >= BTOP(pmem->ml_address) &&
831 pf < BTOP(pmem->ml_address + pmem->ml_size)) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700832 memlist_read_unlock();
833 return (impl_obmem_pfnum(pf));
834 }
835 }
836 memlist_read_unlock();
837 break;
838
839 case M_KMEM:
840 case M_ALLKMEM:
841 /* no longer supported with KPR */
842 return (-1);
843
James Blachlyb1c760b2017-02-18 09:13:59 -0500844 case M_FULL:
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700845 case M_ZERO:
846 /*
847 * We shouldn't be mmap'ing to /dev/zero here as
848 * mmsegmap() should have already converted
849 * a mapping request for this device to a mapping
850 * using seg_vn for anonymous memory.
851 */
852 break;
853
854 }
855 return (-1);
856}
857
858/*
859 * This function is called when a memory device is mmap'ed.
860 * Set up the mapping to the correct device driver.
861 */
862static int
863mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
864 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
865{
866 struct segvn_crargs vn_a;
867 struct segdev_crargs dev_a;
868 int error;
869 minor_t minor;
870 off_t i;
871
872 minor = getminor(dev);
873
874 as_rangelock(as);
mec60946fe2008-02-19 13:14:17 -0800875 /*
876 * No need to worry about vac alignment on /dev/zero
877 * since this is a "clone" object that doesn't yet exist.
878 */
879 error = choose_addr(as, addrp, len, off,
880 (minor == M_MEM) || (minor == M_KMEM), flags);
881 if (error != 0) {
882 as_rangeunlock(as);
883 return (error);
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700884 }
885
886 switch (minor) {
887 case M_MEM:
888 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
889 if ((flags & MAP_TYPE) != MAP_SHARED) {
890 as_rangeunlock(as);
891 return (EINVAL);
892 }
893
894 /*
895 * Check to ensure that the entire range is
896 * legal and we are not trying to map in
897 * more than the device will let us.
898 */
899 for (i = 0; i < len; i += PAGESIZE) {
900 if (mmmmap(dev, off + i, maxprot) == -1) {
901 as_rangeunlock(as);
902 return (ENXIO);
903 }
904 }
905
906 /*
907 * Use seg_dev segment driver for /dev/mem mapping.
908 */
909 dev_a.mapfunc = mmmmap;
910 dev_a.dev = dev;
911 dev_a.offset = off;
912 dev_a.type = (flags & MAP_TYPE);
913 dev_a.prot = (uchar_t)prot;
914 dev_a.maxprot = (uchar_t)maxprot;
915 dev_a.hat_attr = 0;
916
917 /*
918 * Make /dev/mem mappings non-consistent since we can't
919 * alias pages that don't have page structs behind them,
920 * such as kernel stack pages. If someone mmap()s a kernel
Daniel Hoffman48bbca82017-02-17 11:48:20 -0800921 * stack page and if we give them a tte with cv, a line from
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -0700922 * that page can get into both pages of the spitfire d$.
923 * But snoop from another processor will only invalidate
924 * the first page. This later caused kernel (xc_attention)
925 * to go into an infinite loop at pil 13 and no interrupts
926 * could come in. See 1203630.
927 *
928 */
929 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
930 dev_a.devmap_data = NULL;
931
932 error = as_map(as, *addrp, len, segdev_create, &dev_a);
933 break;
934
935 case M_ZERO:
936 /*
937 * Use seg_vn segment driver for /dev/zero mapping.
938 * Passing in a NULL amp gives us the "cloning" effect.
939 */
940 vn_a.vp = NULL;
941 vn_a.offset = 0;
942 vn_a.type = (flags & MAP_TYPE);
943 vn_a.prot = prot;
944 vn_a.maxprot = maxprot;
945 vn_a.flags = flags & ~MAP_TYPE;
946 vn_a.cred = cred;
947 vn_a.amp = NULL;
948 vn_a.szc = 0;
949 vn_a.lgrp_mem_policy_flags = 0;
950 error = as_map(as, *addrp, len, segvn_create, &vn_a);
951 break;
952
953 case M_KMEM:
954 case M_ALLKMEM:
955 /* No longer supported with KPR. */
956 error = ENXIO;
957 break;
958
959 case M_NULL:
960 /*
961 * Use seg_dev segment driver for /dev/null mapping.
962 */
963 dev_a.mapfunc = mmmmap;
964 dev_a.dev = dev;
965 dev_a.offset = off;
966 dev_a.type = 0; /* neither PRIVATE nor SHARED */
967 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
968 dev_a.hat_attr = 0;
969 dev_a.hat_flags = 0;
970 error = as_map(as, *addrp, len, segdev_create, &dev_a);
971 break;
972
973 default:
974 error = ENXIO;
975 }
976
977 as_rangeunlock(as);
978 return (error);
979}
980
981static struct cb_ops mm_cb_ops = {
982 mmopen, /* open */
983 nulldev, /* close */
984 nodev, /* strategy */
985 nodev, /* print */
986 nodev, /* dump */
987 mmread, /* read */
988 mmwrite, /* write */
989 mmioctl, /* ioctl */
990 nodev, /* devmap */
991 mmmmap, /* mmap */
992 mmsegmap, /* segmap */
993 mmchpoll, /* poll */
994 mmpropop, /* prop_op */
995 0, /* streamtab */
996 D_NEW | D_MP | D_64BIT | D_U64BIT
997};
998
999static struct dev_ops mm_ops = {
1000 DEVO_REV, /* devo_rev, */
1001 0, /* refcnt */
1002 mm_info, /* get_dev_info */
1003 nulldev, /* identify */
1004 nulldev, /* probe */
1005 mm_attach, /* attach */
1006 nodev, /* detach */
1007 nodev, /* reset */
1008 &mm_cb_ops, /* driver operations */
Sherry Moore19397402008-09-22 16:30:26 -07001009 (struct bus_ops *)0, /* bus operations */
1010 NULL, /* power */
1011 ddi_quiesce_not_needed, /* quiesce */
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001012};
1013
1014static struct modldrv modldrv = {
Cheng Sean Yee4b86882008-09-09 22:09:03 -07001015 &mod_driverops, "memory driver", &mm_ops,
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001016};
1017
1018static struct modlinkage modlinkage = {
1019 MODREV_1, &modldrv, NULL
1020};
1021
1022int
1023_init(void)
1024{
1025 return (mod_install(&modlinkage));
1026}
1027
1028int
1029_info(struct modinfo *modinfop)
1030{
1031 return (mod_info(&modlinkage, modinfop));
1032}
1033
1034int
1035_fini(void)
1036{
1037 return (mod_remove(&modlinkage));
1038}
1039
1040static int
1041mm_kstat_update(kstat_t *ksp, int rw)
1042{
1043 struct memlist *pmem;
1044 uint_t count;
1045
1046 if (rw == KSTAT_WRITE)
1047 return (EACCES);
1048
1049 count = 0;
1050 memlist_read_lock();
Jonathan Adams56f33202010-01-12 17:06:34 -08001051 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001052 count++;
1053 }
1054 memlist_read_unlock();
1055
1056 ksp->ks_ndata = count;
1057 ksp->ks_data_size = count * 2 * sizeof (uint64_t);
1058
1059 return (0);
1060}
1061
1062static int
1063mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1064{
1065 struct memlist *pmem;
1066 struct memunit {
1067 uint64_t address;
1068 uint64_t size;
1069 } *kspmem;
1070
1071 if (rw == KSTAT_WRITE)
1072 return (EACCES);
1073
1074 ksp->ks_snaptime = gethrtime();
1075
1076 kspmem = (struct memunit *)buf;
1077 memlist_read_lock();
Jonathan Adams56f33202010-01-12 17:06:34 -08001078 for (pmem = phys_install; pmem != NULL;
1079 pmem = pmem->ml_next, kspmem++) {
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001080 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1081 break;
Jonathan Adams56f33202010-01-12 17:06:34 -08001082 kspmem->address = pmem->ml_address;
1083 kspmem->size = pmem->ml_size;
stevel@tonic-gate7c478bd2005-06-14 00:00:00 -07001084 }
1085 memlist_read_unlock();
1086
1087 return (0);
1088}
ayznagad00f0152005-12-23 22:45:19 -08001089
1090/*
1091 * Read a mem_name_t from user-space and store it in the mem_name_t
1092 * pointed to by the mem_name argument.
1093 */
1094static int
1095mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
1096{
1097 if (get_udatamodel() == DATAMODEL_NATIVE) {
1098 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
1099 return (EFAULT);
1100 }
1101#ifdef _SYSCALL32
1102 else {
1103 mem_name32_t mem_name32;
1104
1105 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
1106 return (EFAULT);
1107 mem_name->m_addr = mem_name32.m_addr;
1108 mem_name->m_synd = mem_name32.m_synd;
1109 mem_name->m_type[0] = mem_name32.m_type[0];
1110 mem_name->m_type[1] = mem_name32.m_type[1];
ayznaga33129b32006-01-17 09:58:16 -08001111 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
ayznagad00f0152005-12-23 22:45:19 -08001112 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
ayznaga33129b32006-01-17 09:58:16 -08001113 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
ayznagad00f0152005-12-23 22:45:19 -08001114 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
1115 }
1116#endif /* _SYSCALL32 */
1117
1118 return (0);
1119}