5566 elfexec is overzealous with vpages
5572 elfexec and mapelfexec can disagree on aux vectors
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Rich Lowe <richlowe@richlowe.net>
Approved by: Rich Lowe <richlowe@richlowe.net>
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 7d89db1..0eeb4e7 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -417,11 +417,11 @@
* AT_BASE
* AT_FLAGS
* AT_PAGESZ
- * AT_SUN_LDSECURE
+ * AT_SUN_AUXFLAGS
* AT_SUN_HWCAP
* AT_SUN_HWCAP2
- * AT_SUN_PLATFORM
- * AT_SUN_EXECNAME
+ * AT_SUN_PLATFORM (added in stk_copyout)
+ * AT_SUN_EXECNAME (added in stk_copyout)
* AT_NULL
*
* total == 9
@@ -501,6 +501,7 @@
aux = bigwad->elfargs;
/*
* Move args to the user's stack.
+ * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
*/
if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
if (error == -1) {
@@ -717,7 +718,8 @@
if (hasauxv) {
int auxf = AF_SUN_HWCAPVERIFY;
/*
- * Note: AT_SUN_PLATFORM was filled in via exec_args()
+ * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
+ * exec_args()
*/
ADDAUX(aux, AT_BASE, voffset)
ADDAUX(aux, AT_FLAGS, at_flags)
@@ -791,7 +793,20 @@
ADDAUX(aux, AT_NULL, 0)
postfixsize = (char *)aux - (char *)bigwad->elfargs;
- ASSERT(postfixsize == args->auxsize);
+
+ /*
+ * We make assumptions above when we determine how many aux
+ * vector entries we will be adding. However, if we have an
+ * invalid elf file, it is possible that mapelfexec might
+ * behave differently (but not return an error), in which case
+ * the number of aux entries we actually add will be different.
+ * We detect that now and error out.
+ */
+ if (postfixsize != args->auxsize) {
+ DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
+ int, args->auxsize);
+ goto bad;
+ }
ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
}
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 994c034..7adc5c0 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -1255,6 +1255,22 @@
/*
* Before we go to zero the remaining space on the last
* page, make sure we have write permission.
+ *
+ * Normal illumos binaries don't even hit the case
+ * where we have to change permission on the last page
+ * since their protection is typically either
+ * PROT_USER | PROT_WRITE | PROT_READ
+ * or
+ * PROT_ZFOD (same as PROT_ALL).
+ *
+ * We need to be careful how we zero-fill the last page
+ * if the segment protection does not include
+ * PROT_WRITE. Using as_setprot() can cause the VM
+ * segment code to call segvn_vpage(), which must
+ * allocate a page struct for each page in the segment.
+ * If we have a very large segment, this may fail, so
+ * we have to check for that, even though we ignore
+ * other return values from as_setprot.
*/
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
@@ -1265,8 +1281,11 @@
AS_LOCK_EXIT(as, &as->a_lock);
if (seg != NULL && (zprot & PROT_WRITE) == 0) {
- (void) as_setprot(as, (caddr_t)end,
- zfoddiff - 1, zprot | PROT_WRITE);
+ if (as_setprot(as, (caddr_t)end, zfoddiff - 1,
+ zprot | PROT_WRITE) == ENOMEM) {
+ error = ENOMEM;
+ goto bad;
+ }
}
if (on_fault(&ljb)) {
diff --git a/usr/src/uts/common/os/mmapobj.c b/usr/src/uts/common/os/mmapobj.c
index b33ef69..578e6da 100644
--- a/usr/src/uts/common/os/mmapobj.c
+++ b/usr/src/uts/common/os/mmapobj.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -1126,10 +1127,23 @@
zfodbase = (caddr_t)P2ROUNDUP(end, PAGESIZE);
zfoddiff = (uintptr_t)zfodbase - end;
if (zfoddiff) {
+ /*
+ * Before we go to zero the remaining space on the last
+ * page, make sure we have write permission.
+ *
+ * We need to be careful how we zero-fill the last page
+ * if the protection does not include PROT_WRITE. Using
+ * as_setprot() can cause the VM segment code to call
+ * segvn_vpage(), which must allocate a page struct for
+ * each page in the segment. If we have a very large
+ * segment, this may fail, so we check for that, even
+ * though we ignore other return values from as_setprot.
+ */
MOBJ_STAT_ADD(zfoddiff);
if ((prot & PROT_WRITE) == 0) {
- (void) as_setprot(as, (caddr_t)end,
- zfoddiff, prot | PROT_WRITE);
+ if (as_setprot(as, (caddr_t)end, zfoddiff,
+ prot | PROT_WRITE) == ENOMEM)
+ return (ENOMEM);
MOBJ_STAT_ADD(zfoddiff_nowrite);
}
if (on_fault(&ljb)) {
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 9426a12..8feb104 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -5769,6 +5770,11 @@
* to len.
*/
segvn_vpage(seg);
+ if (svd->vpage == NULL) {
+ SEGVN_LOCK_EXIT(seg->s_as,
+ &svd->lock);
+ return (ENOMEM);
+ }
svp = &svd->vpage[seg_page(seg, addr)];
evp = &svd->vpage[seg_page(seg,
addr + len)];
@@ -5862,6 +5868,10 @@
* the operation.
*/
segvn_vpage(seg);
+ if (svd->vpage == NULL) {
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (ENOMEM);
+ }
svd->pageprot = 1;
if ((amp = svd->amp) != NULL) {
anon_idx = svd->anon_index + seg_page(seg, addr);
@@ -5966,6 +5976,10 @@
}
} else {
segvn_vpage(seg);
+ if (svd->vpage == NULL) {
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (ENOMEM);
+ }
svd->pageprot = 1;
evp = &svd->vpage[seg_page(seg, addr + len)];
for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) {
@@ -7656,9 +7670,13 @@
*/
if ((vpp = svd->vpage) == NULL) {
- if (op == MC_LOCK)
+ if (op == MC_LOCK) {
segvn_vpage(seg);
- else {
+ if (svd->vpage == NULL) {
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (ENOMEM);
+ }
+ } else {
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
return (0);
}
@@ -8236,6 +8254,10 @@
page = seg_page(seg, addr);
segvn_vpage(seg);
+ if (svd->vpage == NULL) {
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (ENOMEM);
+ }
switch (behav) {
struct vpage *bvpp, *evpp;
@@ -8470,6 +8492,7 @@
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
struct vpage *vp, *evp;
+ static pgcnt_t page_limit = 0;
ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
@@ -8478,9 +8501,32 @@
* and the advice from the segment itself to the individual pages.
*/
if (svd->vpage == NULL) {
+ /*
+ * Start by calculating the number of pages we must allocate to
+ * track the per-page vpage structs needs for this entire
+ * segment. If we know now that it will require more than our
+ * heuristic for the maximum amount of kmem we can consume then
+ * fail. We do this here, instead of trying to detect this deep
+ * in page_resv and propagating the error up, since the entire
+ * memory allocation stack is not amenable to passing this
+ * back. Instead, it wants to keep trying.
+ *
+ * As a heuristic we set a page limit of 5/8s of total_pages
+ * for this allocation. We use shifts so that no floating
+ * point conversion takes place and only need to do the
+ * calculation once.
+ */
+ ulong_t mem_needed = seg_pages(seg) * sizeof (struct vpage);
+ pgcnt_t npages = mem_needed >> PAGESHIFT;
+
+ if (page_limit == 0)
+ page_limit = (total_pages >> 1) + (total_pages >> 3);
+
+ if (npages > page_limit)
+ return;
+
svd->pageadvice = 1;
- svd->vpage = kmem_zalloc(seg_pages(seg) * sizeof (struct vpage),
- KM_SLEEP);
+ svd->vpage = kmem_zalloc(mem_needed, KM_SLEEP);
evp = &svd->vpage[seg_page(seg, seg->s_base + seg->s_size)];
for (vp = svd->vpage; vp < evp; vp++) {
VPP_SETPROT(vp, svd->prot);