/*	START NEW ARIX SCCS HEADER			*/
/*							*/
/*	@(#) region.c: version 25.6 created on 2/19/93 at 20:10:40	*/
/*							*/
/*	Copyright (c) 1990 by Arix Corporation		*/
/*	All Rights Reserved				*/
/*							*/
#ident	"@(#)region.c	25.6	2/19/93 Copyright (c) 1990 by Arix Corporation"
/*							*/
/*	END NEW ARIX SCCS HEADER			*/
/*							*/

/*	Copyright (c) 1984 AT&T	*/
/*	  All Rights Reserved  	*/

/*	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T	*/
/*	The copyright notice above does not evidence any   	*/
/*	actual or intended publication of such source code.	*/


#include "sys/types.h"
#include "sys/param.h"
#include "sys/immu.h"
#include "sys/systm.h"
#include "sys/sysmacros.h"
#include "sys/pfdat.h"
#include "sys/user.h"
#include "sys/errno.h"
#include "sys/mount.h"
#include "sys/inode.h"
#include "sys/fstyp.h"
#include "sys/var.h"
#include "sys/buf.h"
#include "sys/debug.h"
#include "sys/region.h"
#include "sys/proc.h"
#include "sys/ipc.h"
#include "sys/shm.h"
#include "sys/cmn_err.h"
#include "sys/tuneable.h"
#include "sys/sysinfo.h"
#include "sys/conf.h"
#include "sys/mfs.h"
#include "sys/spm_mem.h"
#include "sys/synch.h"
#include "sys/lio.h"

/***	reg_t	nullregion;	***/
static preg_t		nullpregion;
static reg_t		*rfree;		/* region free list pointer */
/*
 * rlist_lock is used by rlstlock and rlstunlock.  It protects both the
 * region freelist (singly linked rfree) and the region active list
 * (doubly linked ractive).
 */
static suspend_lock_t	rlist_lock = SUSPEND_INIT(PZERO);

/*	The following defines the minimum number of
**	regions for a process.  This includes one
**	region each for the text, data, and stack
**	plus one null region pointer to indicate
**	the end of the pregion list.
*/

#define	MINPREGPP	(3 + 1)

void
reginit()
{
	register reg_t	*rp;
	register int	n;

	pregpp = MINPREGPP + shmseg() + 2*shlbinfo.shlbs;

	ractive.r_forw = &ractive;
	ractive.r_back = &ractive;

	for (n = v.v_region, rp = &region[n - 1]; --n >= 0; rp--) {
		rp->r_forw = rfree;
		rfree = rp;
	}
}

/*
 * reglock -- lock a region
 *
 * region_lock_sem protects the r_lock field (and r_lock_* if defined),
 * the dbds in its page tables, and synchronizes the RG_DONE and RG_WAITING
 * flags in r_flags (see regwait),
 *
 * It specifically does not protect the r_forw and r_back fields, which
 * are locked by rlstlock();
 */

reglock(rp)
register reg_t	*rp;
{
	spin_lock(&region_lock_sem);
	while (rp->r_lock & REGION_LOCK) {
		if (reg_mylock(rp)) {
#ifdef REG_DEBUG
			reg_t	my_reg;

			regcheckpoint(&my_reg);
			regbadmsg("reglock: already got lock", rp, &my_reg);
			break;
#else /* REG_DEBUG */
			cmn_err(CE_PANIC,
			  "reglock: proc=%x (pid=%d) already locked region=%x",
			  REG_PROC, u.u_procp->p_pid, rp);
#endif
		}
		rp->r_lock |= REGION_WANTED;
		mfs_sleep((caddr_t)rp, PZERO, &region_lock_sem);
	}
	rp->r_lock = REG_PROC;
#ifdef REG_DEBUG
	regcheckpoint(rp);
#endif
	spin_unlock(&region_lock_sem);
}

regrele(rp)
register reg_t	*rp;
{
	if (reg_mylock(rp) == 0) {
#ifdef REG_DEBUG
		reg_t	my_reg;

		regcheckpoint(&my_reg);
		regbadmsg("regrele: releasing lock", rp, &my_reg);
#else /* REG_DEBUG */
		cmn_err(CE_PANIC,
		  "regrele: proc=%x (pid=%d) released region=%x locked by %x",
		  REG_PROC, u.u_procp->p_pid, rp, (rp->r_lock & REGION_LOCK));
#endif
	}

	spin_lock(&region_lock_sem);
	ASSERT(rp->r_lock & REGION_LOCK);
	if (rp->r_lock & REGION_WANTED)
		mfs_wakeup_all((caddr_t)rp);
	rp->r_lock = 0;
#ifdef REG_DEBUG
	regcheckpoint(rp);
#endif
	spin_unlock(&region_lock_sem);
}

/*
 * reg_trylock -- try to lock a region - returns non-zero on success, else zero
 */

reg_trylock(rp)
register reg_t	*rp;
{
	spin_lock(&region_lock_sem);
	if (rp->r_lock & REGION_LOCK) {
		spin_unlock(&region_lock_sem);
		return (0);
	}
	rp->r_lock = REG_PROC;
#ifdef REG_DEBUG
	regcheckpoint(rp);
#endif
	spin_unlock(&region_lock_sem);
	return (1);
}

#ifdef REG_DEBUG

#include "sys/kmem.h"
/*
 * regcheckpoint -- record which process changed this region lock, and a
 *			short stack trace
 */
regcheckpoint(rrp)
reg_t	*rrp;
{
	register reg_t	*rp;
	register uint	*fp;
	register uint	nfp;
	register int	n;
	extern uint	sys_stk_end[];

	rp = rrp;

	nfp = (uint)(&rrp - 2);	/* stack frame is fp, pc, arg, .... */
	for (n = 0; n < REG_DEBUG && nfp >= ADDR_EXTRA_STK &&
	  nfp < (uint)sys_stk_end - USERSTACK_OFF; n++) {
		fp = (uint *)nfp;
		nfp = fp[0];
		rp->r_lock_func[n] = fp[1];	/* store return pc */
	}

	for ( ; n < REG_DEBUG; n++)
		rp->r_lock_func[n] = ~0;	/* invalid pc */
}

/*
 * regbadmsg -- print out a failure message and report the procs and traces
 *		rp0 is the region, rp1 is a temp checkpoint region
 */
regbadmsg(msg, rp0, rp1)
char	*msg;
reg_t	*rp0, *rp1;
{
	register int	i, n;
	register reg_t	*rp;
	static char	*rpass[] = { "was", "now" };

	printf("%s\n", msg);
	for (rp = rp0, i = 0; i <= 1; rp = rp1, i++) {
		printf("%s: proc=%x, trace: ", rpass[i],
		  rp->r_lock & REGION_LOCK);
		for (n = 0; n < REG_DEBUG; n++)
			printf("%s%x", (n ? ", " : ""), rp->r_lock_func[n]);
		printf("\n");
	}
	cmn_err(CE_PANIC, "region=%x", rp0);
}
#endif /* REG_DEBUG */

rlstlock()
{
	suspend_lock(&rlist_lock);
}

rlstunlock()
{
	ASSERT(suspend_islocked(&rlist_lock));
	suspend_unlock(&rlist_lock);
}

rlst_mylock()
{
	return(suspend_islocked(&rlist_lock));
}

regwait(rp)
register reg_t	*rp;
{
	spin_lock(&region_lock_sem);
	while (!(rp->r_flags & RG_DONE))  {
		rp->r_flags |= RG_WAITING;
		mfs_sleep((caddr_t)&rp->r_flags, PZERO, &region_lock_sem);
	}
	spin_unlock(&region_lock_sem);
}


/*
 * Allocate a new region.
 * Returns a locked region pointer or NULL on failure
 * The region is linked into the active list.
 */

reg_t *
allocreg(ip, type)
register struct inode	*ip;
short type;
{
	register reg_t *rp;

	rlstlock();

	if ((rp = rfree) == NULL) {
		rlstunlock();
		cmn_err(CE_WARN, "Region table overflow");
		u.u_error = EAGAIN;
		return(NULL);
	}
	/*
	 * Remove from free list
	 */
	rfree = rp->r_forw;
	rp->r_forw = NULL;

	/* Initialize region fields and bump inode reference
	 * count.  Inode is locked by the caller.
	 */

	rp->r_type = type;
	rp->r_iptr = ip;
	reglock(rp);

	if (ip != NULL) {
		ASSERT(inode_locked(ip));
		ip->i_count++;
	}

	/*
	 * Link onto active list
	 */
	rp->r_forw = ractive.r_forw;
	rp->r_back = &ractive;
	ractive.r_forw->r_back = rp;
	ractive.r_forw = rp;

	rlstunlock();
	return(rp);
}

/*
 * Free an unused region table entry.
 */
void
freereg(rp)
register reg_t *rp;	/* pointer to a locked region */
{
	register inode_t	*ip;
	register pde_t		*pt;
	register int		i;
	register int		lim;
	register int		size;
	register int		tsize;

	ASSERT(reg_mylock(rp));
	ASSERT(rp->r_refcnt == 0);

	/* take off of active list first, to avoid race with xalloc() */
	rlstlock();
	rp->r_back->r_forw = rp->r_forw;
	rp->r_forw->r_back = rp->r_back;
	rlstunlock();

	/* If the region is still in use, then don't free it.  */

	ip = rp->r_iptr;

	spin_lock(&region_lock_sem);

	if (rp->r_lock & REGION_WANTED) {
		spin_unlock(&region_lock_sem);

		/* put region back on active list */
		rlstlock();
		rp->r_forw = ractive.r_forw;
		rp->r_back = &ractive;
		ractive.r_forw->r_back = rp;
		ractive.r_forw = rp;
		rlstunlock();

		if (ip)
			prele(ip);
		regrele(rp);
		return;
	}
	spin_unlock(&region_lock_sem);

	/*
	 * Decrement use count on associated inode
	 * Inode is locked by the caller.
	 */

	if (ip) {
		ASSERT(inode_locked(ip));
		iput(ip);
	}

	/*	Free the memory pages and the page tables and
	 *	disk use maps.  These latter are always allocated
	 *	together in pairs in a contiguous 128 word piece
	 *	of kernel virtual address space.  Note that the
	 *	pfree for the first page table is special because
	 *	pages may not have been allocated from the beginning
	 *	of the segment.  The last page table is also special
	 *	since it may not have been fully allocated.
	 */

	tsize = rp->r_pgsz;
	lim = ctos(tsize);

	for (i = 0;  i < lim;  i++) {

		pt = rp->r_list[i];
		size = tsize - stoc(i);
		if (size > NPGPT)
			size = NPGPT;
		else
		   if (rp->r_flags & RG_STACK)
			pt += NPGPT - size;

		memlock();
		pfree(rp, pt, pt + NPGPT, size);
		memunlock();
		/* free the 2 paired page tables: pde + dbd */
		uptfree(rp->r_list[i], 2);
	}
	atom_add(&availsmem, tsize - rp->r_gapsz);

	ASSERT((rp->r_noswapcnt >= 0) &&
	       (rp->r_noswapcnt <= 1 || rp->r_type == RT_SHMEM));

	if (rp->r_noswapcnt)
		atom_add(&availrmem, tsize - rp->r_gapsz);

	/*
	 * Free the list.
	 */

	uptfree(rp->r_list, rp->r_listsz);

	 /* and clean up region fields.  */

	ASSERT(rp->r_refcnt == 0);
	ASSERT(! (rp->r_lock & REGION_WANTED));
	rp->r_flags = 0;
	rp->r_lock = 0;
	rp->r_listsz = 0;
	rp->r_pgsz = 0;
	rp->r_gapsz = 0;
	rp->r_nvalid = 0;
	rp->r_type = 0;
	rp->r_filesz = 0;
	rp->r_list = NULL;
	rp->r_noswapcnt = 0;

	/*
	 * Link into free list
	 */

	rlstlock();
	rp->r_forw = rfree;
	rfree = rp;
	rlstunlock();
}

#define	SEOFFMASK	(stob(1) - USERSTACK_OFF)
/*
 * Attach a region to a process' address space
 */
preg_t *
attachreg(rp, up, vaddr, type, prot)
register reg_t	*rp;	/* pointer to region to be attached */
user_t		*up;	/* pointer to u-block (needed by fork) */
caddr_t		vaddr;	/* virtual address to attach at */
register int	type;	/* Type to make the pregion. */
int		prot;	/* permissions for segment table entries. */
{
	register preg_t *prp1, *first;
	register preg_t *prp;
	register int	ptype;
	preg_t 		tmp;

	ASSERT(reg_mylock(rp));

	/*	Check attach address.
	 *	It must be segment aligned.
	 */

	ptype = type & ~PF_A1000;
	if (ptype == PT_STACK) {
		if (((int)vaddr & SOFFMASK) != SEOFFMASK) {
			u.u_error = EINVAL;
			return(NULL);
		}
	}
	else {
		if ((int)vaddr & SOFFMASK) {
			u.u_error = EINVAL;
			return(NULL);
		}
	}

	/*	Allocate a pregion.  We should always find a
	 *	free pregion because of the way the system
	 *	is configured.
	 */

	prp = findpreg(up->u_procp, PT_UNUSED);
	if (prp == NULL) {
		u.u_error = EMFILE;
		return(NULL);
	}

	/*	init pregion
	 */

	prp1 = &tmp;
	prp1->p_reg = rp;
	prp1->p_regva = vaddr;
	prp1->p_type = ptype;
	prp1->p_flags = type & PF_A1000;
	if (prot == (SEG_RO))
		prp1->p_flags |= PF_RDONLY;

	/*	Check that region does not go beyond end of virtual
	 *	address space.
	 */

	if (chkgrowth(up, prp1, 0, rp->r_pgsz)) {
		u.u_error = EINVAL;
		return(NULL);
	}

	/*	Load the segment table.
	 */

	if (rp->r_pgsz > 0 && !loadstbl(up, prp1, 0)) {
		u.u_error = ENOMEM;
		return (NULL);
	}

	/* Insert tmp in the pregion list
	*/

	first = (up->u_procp)->p_region;
	for (prp1 = prp; prp1 > first; --prp1)
		if ((prp1-1)->p_regva > vaddr)
			*prp1 = *(prp1-1);
		else break;
	*prp1 = tmp;

	++rp->r_refcnt;
	up->u_procp->p_size += rp->r_pgsz - rp->r_gapsz;
	if (ptype == PT_STACK)
		rp->r_flags |= RG_STACK;

	return(prp1);
}

/*	Detach a region from a process' address space.
**/

void
detachreg(prp, up)
register preg_t *prp;
register user_t	*up;
{
	register reg_t	*rp;
	register int	i;
	inode_t		*ip;

	rp = prp->p_reg;

	ASSERT(rp);
	ASSERT(reg_mylock(rp));

	/*
	 *	If detaching A1000 shared memory "region"
	 *	just shrink the "common" region for the
	 *	size of shared memory.
	 */
	if ((prp->p_flags & PF_A1000) && prp->p_type == PT_SHMEM)
		(void) growreg(prp, -prp->p_pgsz, DBD_NONE); 

	/*
	 *	Invalidate segment table entries pointing
	 *	at the region.
	 */
	else if (rp->r_pgsz > 0) {
		loadstbl(up, prp, -rp->r_pgsz);
		/*
		 *	Decrement process size by size of region.
		 */
		up->u_procp->p_size -= rp->r_pgsz - rp->r_gapsz;
	}

	/*
	 * Decrement use count and free region if zero
	 * and RG_NOFREE is not set, otherwise unlock.
	 */
	if ((--rp->r_refcnt == 0) && !(rp->r_flags & RG_NOFREE)) {
		freereg(rp);
	} else {
		if (ip = rp->r_iptr)
			if (inode_locked(ip))
				prele(ip);
		regrele(rp);
	}

	/*	Clear the proc region we just detached.
	 */

	for (i = prp - up->u_procp->p_region; i < pregpp-1; i++, prp++) {
		*prp = *(prp+1);
	}
	*prp = nullpregion;
}

/*
 * Duplicate a region
 */

reg_t *
dupreg(rp, slpflg, force)
register	reg_t *rp;
int		slpflg, force;
{
	register pde_t	*ppte;
	register pde_t	*cpte;
	register reg_t	*rp2;
	register int	i;
	register int	j;
	register int	size, npgs;
	pde_t		*cpgtbl;
	extern pde_t	*ptalloc();

	ASSERT(reg_mylock(rp));

	/* If region is shared, and we're not forcing a duplicate,
	 * there is no work to do.
	 * Just return the passed region.  The region reference
	 * counts are incremented by attachreg
	 */

	if (rp->r_type != RT_PRIVATE && force == 0)
		return(rp);

	/*	Make sure we have enough space to duplicate
	**	this region without potential deadlock.
	*/

	npgs = rp->r_pgsz - rp->r_gapsz;
	atom_sub(&availsmem, npgs);
	if (availsmem < tune.t_minasmem) {
		atom_add(&availsmem, npgs);
		if (rp->r_iptr)
			prele(rp->r_iptr);
		nomemmsg("dupreg", npgs, 0, 0);
		u.u_error = EAGAIN;
		return(NULL);
	}

	/*
	 * Need to copy the region.
	 * Allocate a region descriptor
	 */

	rp2 = allocreg(rp->r_iptr, force ? RT_PRIVATE : rp->r_type);
	if (rp2 == NULL) {
		atom_add(&availsmem, npgs);
		if (rp->r_iptr)
			prele(rp->r_iptr);
		u.u_error = EAGAIN;
		return(NULL);
	}

	/*	Allocate a list for the new region.
	 */

	rp2->r_listsz = rp->r_listsz;
	rp2->r_list = (pde_t **)ptalloc(rp2->r_listsz, slpflg, rp, 0);
	rp2->r_flags = rp->r_flags;
	rp2->r_lock = rp->r_lock;	/* lock moved from r_flags to r_lock */
#ifdef REG_DEBUG
	regcheckpoint(rp2);
#endif
	if (rp2->r_list == NULL) {
		rp2->r_listsz = 0;
		freereg(rp2);
		atom_add(&availsmem, npgs);
		u.u_error = EAGAIN;
		return(NULL);
	}

	/*
	 * Copy pertinent data to new region
	 */

	rp2->r_pgsz = rp->r_pgsz;
	rp2->r_gapsz = rp->r_gapsz;
	rp2->r_filesz = rp->r_filesz;
	rp2->r_nvalid = rp->r_nvalid;
	rp2->r_noswapcnt = rp->r_noswapcnt;

	/* Scan the parents page table list and fix up each page table.
	 * Allocate a page table and map table for the child and
	 * copy it from the parent.
	 */

	for (i = 0;  i < ctos(rp->r_pgsz);  i++) {
		/* Allocate a page descriptor (table) and 
		 * map table for the child.
		 */

		/* allocate the 2 paired page tables: pde + dbd */
		if ((cpgtbl = cpte = ptalloc(2, slpflg, rp, 0)) == 0) {
			npgs -= (rp2->r_pgsz = stoc(i));
			freereg(rp2);
			if (npgs > 0)
				atom_add(&availsmem, npgs);
			u.u_error = EAGAIN;
			return(NULL);
		}

		/* Set pointer to the newly allocated page descriptor (table)
		 * and dbd into the child's list.  Then get a
		 * pointer to the parents page descriptor (table) and dbd.
		 */

		rp2->r_list[i] = cpte;
		ppte = rp->r_list[i];

		/* Get the total number of unmapped pages remaining.
		 * This is the total size of the region minus the
		 * number of segments for which we have allocated
		 * page tables already.
		 */

		size = rp->r_pgsz - stoc(i);

		/* If this size is greater than a segment, then
		 * we will only process a segment.
		 */

		if (size > NPGPT)
			size = NPGPT;
		else {
			if (rp->r_flags & RG_STACK) {
				ppte += NPGPT - size;
				cpte += NPGPT - size;
			}
		}

		/* Check each parents page and then copy it to
		 * the childs pte.  Also check the map table
		 * entries.
		 */

		for (j = 0;  j < size;  j++, ppte++, cpte++) {
			dbd_t	map;

			map = *(dbd_t *)(ppte + NPGPT);
#ifdef M68020
			/* 001
			 * Kludge for the 020 board:
			 * If the page has been modified and the type is
			 * DBD_SWAP, then we won't be able to detect the
			 * modify bit later (pg_ismod() is dependent on the
			 * write permission bit), so break the connection
			 * to the swap area, and change back to DBD_NONE.
			 */
			if (map.dbd_type == DBD_SWAP &&
			  pg_isvalid(ppte) && pg_ismod(ppte)) {
				if (swfree1(&map) == 0) {
					memlock();
					if (!pbremove(rp, &map))
						cmn_err(CE_PANIC,
						  "dupreg - pbremove");
					memunlock();
				}
				map.dbd_type = DBD_NONE;
				*(dbd_t *)(ppte + NPGPT) = map;
			}
#endif /* M68020 */

			/* Set copy-on-write bit */
			pg_setcw(ppte);
			pg_setwp(ppte);

			/* Copy parents page to child.  */
			*cpte = *ppte;

			/*	If the page is in core, then
			 *	increment the page use count.
			 */

			if (pg_isvalid(ppte)) {
				struct pfdat *pfd;

				pfd = pde_to_pfdat(*ppte);
				memlock();
				ASSERT(pfd->pf_use > 0); 
				pfd->pf_use++;
				memunlock();
			}

			/* Increment the swap use count for pages which
			 * are on swap.
			 */

			if (map.dbd_type == DBD_SWAP) {
				ASSERT(swpuse(&map) != 0);
				if (!swpinc(&map, "dupreg")) {

					/* The swap use count overflowed.
					 * Free the region and return
					 * an error.
					 */

					((dbd_t *)(cpte + NPGPT))->dbd_type =
						DBD_NONE;
					freereg(rp2);
					u.u_error = EAGAIN;
					atom_add(&availsmem, npgs);
					return(NULL);
				}
			}
			*(dbd_t *)(cpte + NPGPT) = map;
		}
#ifdef M68040
		/*
		 * We must set uninitialized pde's PG_UNINIT, or write_back
		 * doesn't work right.
		 */
		if (size < NPGPT)
			bset_long((rp->r_flags & RG_STACK) ? cpgtbl : cpte,
			  PG_UNINIT, NPGPT - size);
#endif
	}
	return(rp2);
}

/*
 * Change the size of a region
 *  change == 0  -> no-op
 *  change  < 0  -> shrink
 *  change  > 0  -> expand
 * For expansion, you get (fill) real pages (change-fill) demand zero pages
 * For shrink, the caller must flush the ATB
 * Returns 0 on no-op, -1 on failure, and 1 on success.
 */

growreg(prp, change, type)
register preg_t *prp;
register int	type;
{
	register pde_t	*pt;
	register reg_t	*rp;
	register int	i;
	register int	size;
	register int	osize;
	pde_t		pte;

	rp = prp->p_reg;
	ASSERT(reg_mylock(rp));

	if (change == 0)
		return(0);

	osize = rp->r_pgsz;

	ASSERT(rp->r_noswapcnt >=0);

	if (change < 0) {

		/*	The region is being shrunk.  Compute the new
		 *	size and free up the unneeded space.
		 */

		/* Note: this works because we currently ONLY shrink with
		 *	type DBD_NONE for whole gaps.  Shrinking a combination
		 *	of gaps and data pages will screw up the rmem and smem
		 *	counts!
		 */
		if (type != DBD_NONE) {
			atom_sub(&availsmem, change);	/* change < 0.	*/
			if (rp->r_noswapcnt)
				atom_sub(&availrmem, change);
		}
		i = ctost(osize + change);

		for (;  i < ctos(osize)  ;  i++) {
			/*	Free up the allocated pages for
			 *	this segment.
			 */

			pt = rp->r_list[i];
			size = osize - stoc(i);
			if (size > NPGPT)
				size = NPGPT;
			if (osize + change > stoc(i)) {
				size -= osize + change - stoc(i);
				pt += osize + change - stoc(i);
			}
			memlock();
			pfree(rp, pt, pt + NPGPT, size);
			memunlock();
		}

		/*	Free up the page tables which we no
		 *	longer need.
		 */

		(void) ptexpand(rp, change);
	} else {
		/*	We are expanding the region.  Make sure that
		 *	the new size is legal and then allocate new
		 *	page tables if necessary.
		 */

		if (type != DBD_NONE) {
			atom_sub(&availsmem, change);
			if (availsmem < tune.t_minasmem) {
				atom_add(&availsmem, change);
				nomemmsg("growreg", change, 0, 0);
				u.u_error = EAGAIN;
				return(-1);
			}
			if (rp->r_noswapcnt) {
				atom_sub(&availrmem, change);
				if (availrmem < tune.t_minarmem) {
					atom_add(&availsmem, change);
					atom_add(&availrmem, change);
					nomemmsg("growreg", change, 0, 1);
					u.u_error = EAGAIN;
					return(-1);
				}
			}
		}
		if (chkgrowth(&u, prp, osize, osize + change) ||
		   ptexpand(rp, change)) {
			if (type != DBD_NONE) {
				atom_add(&availsmem, change);
				if (rp->r_noswapcnt)
					atom_add(&availrmem, change);
			}
			u.u_error = ENOMEM;
			return(-1);
		}

		/*	Initialize the new page tables and allocate
		 *	pages if required.
		 */

		pg_setuninit(&pte);
		pg_setrwx(&pte);	/* allow execution in data pages */
#if M68040
		/* must write protect gap pages or write_back() won't work */
		if (type == DBD_NONE)
			pte.pde.wp = 1;
#ifdef Copyback_M68040
		/* try stack and type 410 data regions in copyback mode */
		else if (prp->p_type == PT_STACK ||
			 (prp->p_type == PT_DATA &&
			  (type == DBD_DZERO || type == DBD_DFILL)))
			pte.pde.cache0 = 1;
#endif /* Copyback_M68040 */
#endif /* M68040 */

		i = ctost(osize);

		for (; i < ctos(osize + change) ; i++) {
			pt = rp->r_list[i];
			if (rp->r_flags & RG_STACK)
				pt += NPGPT;
			size = osize + change - stoc(i);
			if (size > NPGPT)
				size = NPGPT;
			if (osize > stoc(i)) {
				size -= osize - stoc(i);
				if (rp->r_flags & RG_STACK)
					pt -= osize - stoc(i) + size;
				else
					pt += osize - stoc(i);
			}
			else  {
				if (rp->r_flags & RG_STACK)
					pt -= size;
			}

			while (--size >= 0) {
				*pt = pte;
				((dbd_t *)(pt + NPGPT))->dbd_type = type;
				pt++;
			}
		}
	}

	if (!loadstbl(&u, prp, change))
		if (change > 0)  {
			u.u_error = ENOMEM;
			return (0);
		}


	rp->r_pgsz += change;
	if (type != DBD_NONE)
		u.u_procp->p_size += change;
	else
		rp->r_gapsz += change;
	return(1);
}

/*
 * Check that grow of a pregion is legal
 * returns 0 if legal, -1 if illegal.
 */

static
chkgrowth(up, prp, osize, nsize)
register user_t	*up;
register preg_t	*prp;
register int	osize;	/* Old size in pages. */
register int	nsize;	/* New size in pages. */
{
	register preg_t		*prp1;
	register caddr_t	start, end, vaddr;
	register		size;
	extern int		userstack[];

	if (osize != 0) {			/* prp is in pregion list */
		if (nsize <= osize)
			return(0);
		if (prp->p_type == PT_STACK) {
			prp1 = prp - 1;		/* stack is the last pregion */
			ASSERT(prp1->p_type != PT_STACK); /* one per customer */
			end = prp1->p_regva + ctob(prp1->p_reg->r_pgsz) - 1;
			vaddr = prp->p_regva - ctob(nsize) + USERSTACK_OFF;
			if (vaddr <= end ||
			    ctost(btoct(vaddr)) <= ctost(btoct(end)))
				return(-1);
			return(0);
		}
		else {
			prp1 = prp + 1;

			if (prp1->p_reg != NULL) {
				if (prp1->p_type == PT_STACK) {
					end = prp1->p_regva - 
					  ctob(prp1->p_reg->r_pgsz)
						+ USERSTACK_OFF;
				}
				else {
					end = prp1->p_regva;
					ASSERT(end != 0);
				}
			}
			else
				end = (caddr_t)userstack;
			vaddr = prp->p_regva + ctob(nsize) - 1;
			if (vaddr >= end ||
			    ctost(btoct(vaddr)) >= ctost(btoct(end)))
				return(-1);
			return(0);
		}
	}

	/* prp is a new region */
	prp1 = (up->u_procp)->p_region;
	size = ctob(nsize);
	if (prp->p_type == PT_STACK) {
		end = prp->p_regva + USERSTACK_OFF - 1;
		start = (size > 0) ? end - size + 1 : end;
	}
	else {
		start = prp->p_regva;
		end = (size > 0) ? start + size - 1 : start;
	}

	for ( ; prp1->p_reg; ++prp1) {
		if (prp1 == prp)
			continue;
		size = ctob((prp1->p_reg)->r_pgsz);
		if (prp1->p_type == PT_STACK) {
			vaddr = prp1->p_regva;
			if (size > 0)
				vaddr += USERSTACK_OFF - size;
		}
		else {
			/*
			 *	Mask is for A1000 shared memory compatibility
			 */
			vaddr = (caddr_t) ((uint)prp1->p_regva & ~SOFFMASK);
		}
		if (ctost(btoct(end)) == ctost(btoct(vaddr)))
			return(-1);
		if (end < vaddr)
			break;
		if (start == vaddr || (start < vaddr && end >= vaddr))
			return(-1);
		if (start > vaddr) {
			if (size != 0 && start < vaddr+size)
				return(-1);
		}
	}

	return(0);
}


/*
 * Expand user page tables for a region 
 */
 
ptexpand(rp, change)
register reg_t *rp;
{
	register pde_t	**lp;
	register pde_t	**lp2;
	register int	osize;
	register int	nsize;
	register int	i;
	int		alloced;
	extern pde_t	*uptalloc(), *ptalloc();

	/* Calculate the new size in pages.
	 */

	osize = rp->r_pgsz;
	nsize = osize + change;

	/*	If we are shrinking the region, then free up
	 *	the page tables and map tables.  Use a smaller
	 *	list if possible.
	 */

	if (ctos(nsize) < ctos(osize)) {
		lp = &rp->r_list[ctos(nsize)];
		for (;  lp < &rp->r_list[ctos(osize)]  ;  lp++) {
			/* free the 2 paired page tables: pde + dbd */
			uptfree(*lp, 2);
			*lp = 0;
		}
		if (ctos(ctos(nsize)) < rp->r_listsz) {
			if (nsize > 0) {
				lp2 = (pde_t **)ptalloc(ctos(ctos(nsize)),
				  NOSLEEP, (reg_t *)NULL, 0);
				if (lp2 == NULL)
					/* we are shrinking and can't get space
					 * for a smaller list; just keep the
					 * bigger one.  Return OK
					 */
					return(0);
				bcopy(rp->r_list, lp2, ctos(nsize)<<2);
			}else{
				lp2 = 0;
			}
			uptfree(rp->r_list, rp->r_listsz);
			rp->r_list = lp2;
			rp->r_listsz = ctos(ctos(nsize));
		}
	}

	/*	If the region shrunk or didn't grow by enough to
	 *	require any more page tables, then we are done.
	 */

	if (change <= 0) {
		return(0);
	}

	/*	If the region grew into the next segment,  then we 
	 *	must allocate one or more new page and map tables.
	 *	See if we have enough space in the list for the
	 *	new segments.  If not, allocate a new list and
	 *	copy over the old data.
	 */

	i = ctos(ctos(nsize));
	if (i > rp->r_listsz) {
		lp2 = (pde_t **)uptalloc(i, rp);
		if (lp2 == NULL)
			return(-1);
		if (rp->r_list) {
			bcopy(rp->r_list, lp2, ctos(osize)<<2);
			uptfree(rp->r_list, rp->r_listsz);
		}
		rp->r_list = lp2;
		rp->r_listsz = i;
	}

	/*
	 * Allocate a new set of page tables and disk maps.
	 */

	lp = &rp->r_list[ctos(osize)];
	lp2 = &rp->r_list[ctos(nsize)];

	for (alloced = 0; lp < lp2 ; lp++) {
		/* allocate the 2 paired page tables: pde & dbd */
		if ((*lp = uptalloc(2, rp)) == NULL) {
			while (--alloced >= 0) {
				/* free the 2 paired page tables: pde + dbd */
				uptfree(*--lp, 2);
				*lp = NULL;
			}
			return(-1);
		}
		++alloced;
#ifdef M68040
		/*
		 * We must use PG_UNINIT, or write_back doesn't work right.
		 */
		bset_long(*lp, PG_UNINIT, NPGPT);
#endif
	}

	return(0);
}

loadreg(prp, vaddr, ip, off, count)
register preg_t		*prp;
register caddr_t	vaddr;
register inode_t	*ip;
int			off, count;
{
	register reg_t	*rp;
	register int	gap, n;

	/*	Grow the region to the proper size to load the file.
	 */

	rp = prp->p_reg;
	ASSERT(reg_mylock(rp));
	gap = vaddr - prp->p_regva;

	if ((n = btoct(gap)) < 0)
		return (-1);
	if (n > 0 && growreg(prp, n, DBD_NONE) < 0) 
		return (-1);
	if ((n = btoc(count + gap) - n) < 0)
		return (-1);
	if (n > 0 && growreg(prp, n, DBD_DFILL) < 0) 
		return (-1);

	flush_all_user_tlb();

	/*	Set up to do the I/O.
	 */

	u.u_segflg = 0;
	u.u_base = vaddr;
	u.u_count = count;
	u.u_offset = off;

	/*	We must unlock the region here because we are going
	 *	to fault in the pages as we read them.  No one else
	 *	will try to use the region before we finish because
	 *	the RG_DONE flag is not set yet.
	 */

	regrele(rp);

	FS_READI(ip);

	if (u.u_error) {
		reglock(rp);
		return(-1);
	}

	/*	Clear the last (unused)  part of the last page.
	 */

	vaddr += count;
	count = ctob(1) - poff(vaddr);
	if (count > 0  &&  count < ctob(1))
		bzeroba(vaddr, count);

	reglock(rp);
	spin_lock(&region_lock_sem);
	rp->r_flags |= RG_DONE;
	if (rp->r_flags & RG_WAITING) {
		rp->r_flags &= ~RG_WAITING;
		wakeup(&rp->r_flags);
	}
	spin_unlock(&region_lock_sem);
	if (u.u_count)
		return(-1);
	return(0);
}

mapreg(prp, vaddr, ip, off, count)
preg_t	*prp;
caddr_t		vaddr;
struct inode	*ip;
int		off;
register int	count;
{
	register reg_t	*rp;
	register pde_t	*pt;
	register dbd_t	*dbd;
	register int	i;
	register int	j;
	register int	blkspp;
	register int	lim;
	int		gap;
	int		seglim;

	/*	Make sure that we are not trying to map
	 *	beyond the end of the file.  This can
	 *	happen for a bad a.out where the header
	 *	lies and says the file is bigger than
	 *	it actually is.
	 */

	if (off + count > ip->i_size)
		return(-1);

	/*
	 * Make sure the map is built.
	 */

	if (FS_ALLOCMAP(ip) == 0)
		return(-1);

	/*	Get region pointer and effective device number.  */

	rp = prp->p_reg;
	ASSERT(reg_mylock(rp));

	/*	Compute the number of file system blocks in a page.
	 *	This depends on the file system block size.
	 */

	blkspp = NBPP/FSBSIZE(ip);

	/*	Allocate invalid pages for the gap at the start of
	 *	the region and demand-fill pages for the actual
	 *	text.
	 */

	gap = vaddr - prp->p_regva;
	if (growreg(prp, btoct(gap), DBD_NONE) < 0)
		return(-1);
	if (growreg(prp, btoc(count+gap) - btoct(gap), DBD_DFILL) < 0)
		return(-1);

	rp->r_filesz = count + off;

	/*	Build block list pointing to map table.
	 */

	gap = btoct(gap);  /* Gap in pages. */
	off = btoct(off) * blkspp;  /* File offset in blocks. */
	i = ctost(gap);
	seglim = ctos(rp->r_pgsz);

	for (;  i < seglim;  i++) {

		if (gap > stoc(i))
			j = gap - stoc(i);
		else
			j = 0;

		lim = rp->r_pgsz - stoc(i);
		if (lim > NPGPT)
			lim = NPGPT;

		pt = (pde_t *)rp->r_list[i] + j;
		dbd = (dbd_t *)pt + NPGPT;

		for (;  j < lim;  j++, pt++, dbd++) {

			/*	If these are private pages, then make
			 *	them copy-on-write since they will
			 *	be put in the hash table.
			 */

			if (rp->r_type == RT_PRIVATE) {
				pg_setcw(pt);
				pg_setwp(pt);
			}
			dbd->dbd_type  = DBD_FILE;
			dbd->dbd_blkno = off;
			off += blkspp;
		}
	}

	/*	Mark the last page for special handling
	 */

	dbd[-1].dbd_type = DBD_LSTFILE;

	spin_lock(&region_lock_sem);
	rp->r_flags |= RG_DONE;
	if (rp->r_flags & RG_WAITING)  {
		rp->r_flags &= ~RG_WAITING;
		wakeup(&rp->r_flags);
	}
	spin_unlock(&region_lock_sem);
	return(0);

}

/*
 * Find the region corresponding to a virtual address.
 * If successful, returns a locked region.
 */

reg_t	*
findreg(p, vaddr)
proc_t	*p;
caddr_t	vaddr;
{
	register reg_t	*rp;
	preg_t		*prp;
	extern preg_t	*vtopreg();

	if (prp = vtopreg(p, vaddr)) {
		reglock(rp = prp->p_reg);
		return (rp);
	}
	return (NULL);
}



/*	Find the pregion of a particular type.
 */

preg_t *
findpreg(pp, type)
register proc_t	*pp;
register int	type;
{
	register preg_t	*prp;

	for (prp = pp->p_region; prp->p_reg; prp++) {
		if (prp->p_type == type)
			return(prp);
	}

	/*	We stopped on an unused region.  If this is what
	 *	was called for, then return it unless it is the
	 *	last region for the process.  We leave the last
	 *	region unused as a marker.
	 */

	if ((type == PT_UNUSED)  &&  (prp < &pp->p_region[pregpp - 1]))
		return(prp);
	return(NULL);
}

/*
 * Change protection of pdes for a region
 */
void
chgprot(prp, prot)
register preg_t	*prp;
{
	register reg_t *rp;
	register int i, j;
	register int lim;
	register int tsize;


	if (prot == SEG_RO)
		prp->p_flags |= PF_RDONLY;
	else
		prp->p_flags &= ~PF_RDONLY;

	rp = prp->p_reg;
	tsize = rp->r_pgsz;
	lim = ctos(tsize);

	for (i = 0;  i < lim;  i++) {
		register int size;
		pde_t *pt;

		pt = rp->r_list[i];
		size = tsize - stoc(i);
		if (size > NPGPT)
			size = NPGPT;
		else
		   if (rp->r_flags & RG_STACK)
			pt += NPGPT - size;
		switch (prot) {

			case SEG_TEXT:
				for (j=0; j < size; j++, pt++) {
					pg_setwp(pt);
					pg_setcp(pt);
				}
				break;

			case SEG_RO:
				for (j=0; j < size; j++, pt++) {
					pg_setwp(pt);
				}
				break;

			case SEG_RW:
				for (j=0; j < size; j++, pt++) {
					pg_clrwp(pt);
				}

			default:
				break;
		}
	}

	flush_all_user_tlb();
}

/* Locate process region for a given virtual address. */

preg_t *
vtopreg(p, vaddr)
proc_t			*p;
register caddr_t	vaddr;
{
	register preg_t	*prp;
	register reg_t	*rp;
	register caddr_t regva;

	for (prp = p->p_region; (rp = prp->p_reg); prp++) {
		regva = prp->p_regva;
		if (prp->p_type == PT_STACK) {
			regva += 3;		/* 7ffffffc to 7fffffff */
			if (vaddr <= regva && vaddr > regva - ctob(rp->r_pgsz))
				return(prp);
		}
		else if (vaddr >= regva && vaddr < regva + ctob(rp->r_pgsz)) {
			if (vaddr >= regva + ctob(rp->r_gapsz))
				return(prp);
			else
				/* paged A1000 code has a gap between the text*/
				/* and data in the same region, so ignore gap */
				if ( prp->p_flags & PF_A1000 )
					return prp;
				else
					return(NULL);	/* vaddr in gap */
		}
	}

	return(NULL);
}


A1000_loadreg(prp, vaddr, ip, off, count)
register preg_t		*prp;
register caddr_t	vaddr;
register inode_t	*ip;
int			off, count;
{
	register reg_t	*rp;
	register int	gap, n;

	/*
	 *	Grow the region to the proper size to load the file.
	 */
	rp = prp->p_reg;
	ASSERT(reg_mylock(rp));
	gap = vaddr - prp->p_regva;

	if ((n = btoct(gap) - rp->r_pgsz) < 0)
		return (-1);
	if (n > 0 && growreg(prp, n, DBD_NONE) < 0) 
		return (-1);
	if ((n = btoc(count + gap) - btoct(gap)) < 0)
		return (-1);
	if (n > 0 && growreg(prp, n, DBD_DFILL) < 0) 
		return (-1);

 	flush_all_user_tlb(); /* io will flush tlb */

	/*
	 *	Set up to do the I/O.
	 */
	u.u_segflg = 0;
	u.u_base = vaddr;
	u.u_count = count;
	u.u_offset = off;

	/*
	 *	We must unlock the region here because we are going
	 *	to fault in the pages as we read them.  No one else
	 *	will try to use the region before we finish because
	 *	the RG_DONE flag is not set yet.
	 */
	regrele(rp);

	FS_READI(ip);

	if (u.u_error) {
		reglock(rp);
		return(-1);
	}

	/*
	 *	Clear the last (unused)  part of the last page.
	 */
	vaddr += count;
	count = ctob(1) - poff(vaddr);
	if (count > 0  &&  count < ctob(1))
		bzeroba(vaddr, count);

	reglock(rp);
	if (u.u_count)
		return(-1);

	return(0);
}
