5566 elfexec is overzealous with vpages
5572 elfexec and mapelfexec can disagree on aux vectors
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Rich Lowe <richlowe@richlowe.net>
Approved by: Rich Lowe <richlowe@richlowe.net>
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 7d89db1..0eeb4e7 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -417,11 +417,11 @@
 		 *	AT_BASE
 		 *	AT_FLAGS
 		 *	AT_PAGESZ
-		 *	AT_SUN_LDSECURE
+		 *	AT_SUN_AUXFLAGS
 		 *	AT_SUN_HWCAP
 		 *	AT_SUN_HWCAP2
-		 *	AT_SUN_PLATFORM
-		 *	AT_SUN_EXECNAME
+		 *	AT_SUN_PLATFORM (added in stk_copyout)
+		 *	AT_SUN_EXECNAME (added in stk_copyout)
 		 *	AT_NULL
 		 *
 		 * total == 9
@@ -501,6 +501,7 @@
 	aux = bigwad->elfargs;
 	/*
 	 * Move args to the user's stack.
+	 * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 	 */
 	if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 		if (error == -1) {
@@ -717,7 +718,8 @@
 	if (hasauxv) {
 		int auxf = AF_SUN_HWCAPVERIFY;
 		/*
-		 * Note: AT_SUN_PLATFORM was filled in via exec_args()
+		 * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
+		 * exec_args()
 		 */
 		ADDAUX(aux, AT_BASE, voffset)
 		ADDAUX(aux, AT_FLAGS, at_flags)
@@ -791,7 +793,20 @@
 
 		ADDAUX(aux, AT_NULL, 0)
 		postfixsize = (char *)aux - (char *)bigwad->elfargs;
-		ASSERT(postfixsize == args->auxsize);
+
+		/*
+		 * We make assumptions above when we determine how many aux
+		 * vector entries we will be adding. However, if we have an
+		 * invalid elf file, it is possible that mapelfexec might
+		 * behave differently (but not return an error), in which case
+		 * the number of aux entries we actually add will be different.
+		 * We detect that now and error out.
+		 */
+		if (postfixsize != args->auxsize) {
+			DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
+			    int, args->auxsize);
+			goto bad;
+		}
 		ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
 	}
 
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 994c034..7adc5c0 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -26,7 +26,7 @@
 /*	Copyright (c) 1988 AT&T	*/
 /*	  All Rights Reserved  	*/
 /*
- * Copyright (c) 2012, Joyent, Inc.  All rights reserved.
+ * Copyright 2014, Joyent, Inc.  All rights reserved.
  */
 
 #include <sys/types.h>
@@ -1255,6 +1255,22 @@
 			/*
 			 * Before we go to zero the remaining space on the last
 			 * page, make sure we have write permission.
+			 *
+			 * Normal illumos binaries don't even hit the case
+			 * where we have to change permission on the last page
+			 * since their protection is typically either
+			 *    PROT_USER | PROT_WRITE | PROT_READ
+			 * or
+			 *    PROT_ZFOD (same as PROT_ALL).
+			 *
+			 * We need to be careful how we zero-fill the last page
+			 * if the segment protection does not include
+			 * PROT_WRITE. Using as_setprot() can cause the VM
+			 * segment code to call segvn_vpage(), which must
+			 * allocate a page struct for each page in the segment.
+			 * If we have a very large segment, this may fail, so
+			 * we have to check for that, even though we ignore
+			 * other return values from as_setprot.
 			 */
 
 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
@@ -1265,8 +1281,11 @@
 			AS_LOCK_EXIT(as, &as->a_lock);
 
 			if (seg != NULL && (zprot & PROT_WRITE) == 0) {
-				(void) as_setprot(as, (caddr_t)end,
-				    zfoddiff - 1, zprot | PROT_WRITE);
+				if (as_setprot(as, (caddr_t)end, zfoddiff - 1,
+				    zprot | PROT_WRITE) == ENOMEM) {
+					error = ENOMEM;
+					goto bad;
+				}
 			}
 
 			if (on_fault(&ljb)) {
diff --git a/usr/src/uts/common/os/mmapobj.c b/usr/src/uts/common/os/mmapobj.c
index b33ef69..578e6da 100644
--- a/usr/src/uts/common/os/mmapobj.c
+++ b/usr/src/uts/common/os/mmapobj.c
@@ -21,6 +21,7 @@
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc.  All rights reserved.
  */
 
 #include <sys/types.h>
@@ -1126,10 +1127,23 @@
 		zfodbase = (caddr_t)P2ROUNDUP(end, PAGESIZE);
 		zfoddiff = (uintptr_t)zfodbase - end;
 		if (zfoddiff) {
+			/*
+			 * Before we go to zero the remaining space on the last
+			 * page, make sure we have write permission.
+			 *
+			 * We need to be careful how we zero-fill the last page
+			 * if the protection does not include PROT_WRITE. Using
+			 * as_setprot() can cause the VM segment code to call
+			 * segvn_vpage(), which must allocate a page struct for
+			 * each page in the segment. If we have a very large
+			 * segment, this may fail, so we check for that, even
+			 * though we ignore other return values from as_setprot.
+			 */
 			MOBJ_STAT_ADD(zfoddiff);
 			if ((prot & PROT_WRITE) == 0) {
-				(void) as_setprot(as, (caddr_t)end,
-				    zfoddiff, prot | PROT_WRITE);
+				if (as_setprot(as, (caddr_t)end, zfoddiff,
+				    prot | PROT_WRITE) == ENOMEM)
+					return (ENOMEM);
 				MOBJ_STAT_ADD(zfoddiff_nowrite);
 			}
 			if (on_fault(&ljb)) {
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 9426a12..8feb104 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
@@ -5769,6 +5770,11 @@
 					 * to len.
 					 */
 					segvn_vpage(seg);
+					if (svd->vpage == NULL) {
+						SEGVN_LOCK_EXIT(seg->s_as,
+						    &svd->lock);
+						return (ENOMEM);
+					}
 					svp = &svd->vpage[seg_page(seg, addr)];
 					evp = &svd->vpage[seg_page(seg,
 					    addr + len)];
@@ -5862,6 +5868,10 @@
 		 * the operation.
 		 */
 		segvn_vpage(seg);
+		if (svd->vpage == NULL) {
+			SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+			return (ENOMEM);
+		}
 		svd->pageprot = 1;
 		if ((amp = svd->amp) != NULL) {
 			anon_idx = svd->anon_index + seg_page(seg, addr);
@@ -5966,6 +5976,10 @@
 		}
 	} else {
 		segvn_vpage(seg);
+		if (svd->vpage == NULL) {
+			SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+			return (ENOMEM);
+		}
 		svd->pageprot = 1;
 		evp = &svd->vpage[seg_page(seg, addr + len)];
 		for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) {
@@ -7656,9 +7670,13 @@
 	 */
 
 	if ((vpp = svd->vpage) == NULL) {
-		if (op == MC_LOCK)
+		if (op == MC_LOCK) {
 			segvn_vpage(seg);
-		else {
+			if (svd->vpage == NULL) {
+				SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+				return (ENOMEM);
+			}
+		} else {
 			SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
 			return (0);
 		}
@@ -8236,6 +8254,10 @@
 		page = seg_page(seg, addr);
 
 		segvn_vpage(seg);
+		if (svd->vpage == NULL) {
+			SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+			return (ENOMEM);
+		}
 
 		switch (behav) {
 			struct vpage *bvpp, *evpp;
@@ -8470,6 +8492,7 @@
 {
 	struct segvn_data *svd = (struct segvn_data *)seg->s_data;
 	struct vpage *vp, *evp;
+	static pgcnt_t page_limit = 0;
 
 	ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
 
@@ -8478,9 +8501,32 @@
 	 * and the advice from the segment itself to the individual pages.
 	 */
 	if (svd->vpage == NULL) {
+		/*
+		 * Start by calculating the number of pages we must allocate to
+		 * track the per-page vpage structs needs for this entire
+		 * segment. If we know now that it will require more than our
+		 * heuristic for the maximum amount of kmem we can consume then
+		 * fail. We do this here, instead of trying to detect this deep
+		 * in page_resv and propagating the error up, since the entire
+		 * memory allocation stack is not amenable to passing this
+		 * back. Instead, it wants to keep trying.
+		 *
+		 * As a heuristic we set a page limit of 5/8s of total_pages
+		 * for this allocation. We use shifts so that no floating
+		 * point conversion takes place and only need to do the
+		 * calculation once.
+		 */
+		ulong_t mem_needed = seg_pages(seg) * sizeof (struct vpage);
+		pgcnt_t npages = mem_needed >> PAGESHIFT;
+
+		if (page_limit == 0)
+			page_limit = (total_pages >> 1) + (total_pages >> 3);
+
+		if (npages > page_limit)
+			return;
+
 		svd->pageadvice = 1;
-		svd->vpage = kmem_zalloc(seg_pages(seg) * sizeof (struct vpage),
-		    KM_SLEEP);
+		svd->vpage = kmem_zalloc(mem_needed, KM_SLEEP);
 		evp = &svd->vpage[seg_page(seg, seg->s_base + seg->s_size)];
 		for (vp = svd->vpage; vp < evp; vp++) {
 			VPP_SETPROT(vp, svd->prot);