/*	START NEW ARIX SCCS HEADER			*/
/*							*/
/*	@(#) fault.c: version 25.1 created on 11/27/91 at 15:09:19	*/
/*							*/
/*	Copyright (c) 1990 by Arix Corporation		*/
/*	All Rights Reserved				*/
/*							*/
#ident	"@(#)fault.c	25.1	11/27/91 Copyright (c) 1990 by Arix Corporation"
/*							*/
/*	END NEW ARIX SCCS HEADER			*/
/*							*/
/*	Copyright (c) 1984 AT&T	*/
/*	  All Rights Reserved  	*/

/*	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T	*/
/*	The copyright notice above does not evidence any   	*/
/*	actual or intended publication of such source code.	*/

#ident	"@(#)uts/os:fault.c	23.2"

#include "sys/types.h"
#include "sys/param.h"
#include "sys/sysmacros.h"
#include "sys/immu.h"
#include "sys/systm.h"
#include "sys/fs/s5dir.h"
#include "sys/signal.h"
#include "sys/user.h"
#include "sys/errno.h"
#include "sys/inode.h"
#include "sys/mount.h"
#include "sys/fstyp.h"
#include "sys/var.h"
#include "sys/buf.h"
#include "sys/utsname.h"
#include "sys/sysinfo.h"
#include "sys/pfdat.h"
#include "sys/region.h"
#include "sys/proc.h"
#include "sys/map.h"
#include "sys/swap.h"
#include "sys/getpages.h"
#include "sys/debug.h"
#include "sys/cmn_err.h"
#include "sys/conf.h"
#include "sys/spm_mem.h"
#include "sys/own.h"
#include "sys/sbus.h"
#include "sys/mfs.h"
#include "sys/lio.h"
#include "sys/ints.h"

extern suspend_lock_t	mem_lock;


/*	Protection fault handler
 */

pfault(vaddr, pd)
register pde_t	*pd;	/* Virtual address of faulting pde.	*/
{
	register dbd_t	*dbd;
	register pfd_t	*pfd;
	register reg_t	*rp;
	pde_t		pttmp;


	/*	Get a pointer to the region which the faulting
	 *	virtual address is in.
	 */

	if ((rp = findreg(u.u_procp, vaddr))==NULL)
		return(SIGSEGV);

	/*	Check to see that the pde hasn't been modified
	 *	while waiting for the lock
	 */

	if (!pg_isvalid(pd)) {
		regrele(rp);
		return(0);
	}

	/*	Now check for a real protection error as opposed
	 *	to a copy on write.
	 */

	atom_inc(&minfo.pfault);
	if (!pg_iscw(pd)) {
		regrele(rp);
		return(SIGSEGV);
	}

	ASSERT(rp->r_type == RT_PRIVATE);

	dbd = (dbd_t *)pd + NPGPT;

	memlock();

	pfd = pde_to_pfdat(*pd);

	/*	Copy on write
	 *	If use is 1, and page is not from a file,
	 *	steal it, otherwise copy it
	 */

	if (pfd->pf_use > 1  ||  dbd->dbd_type == DBD_FILE
	   ||  dbd->dbd_type == DBD_LSTFILE) {
		minfo.cw++;
		pg_setuninit(&pttmp);

		/*	We are locking the page we faulted on
		**	before calling ptmemall because
		**	ptmemall may unlock the region.  If
		**	he does, then the page could be stolen
		**	and we would be copying incorrect
		**	data into our new page.
		*/

		pg_setlock(dbd);
		pfd->pf_rawcnt++;
		ptmemall(rp, &pttmp, 1, 1, 0);

		/*	Its O.K. to unlock the page now since
		**	ptmemall has locked the region again.
		*/

		ASSERT(reg_mylock(rp));
		ASSERT(pg_islocked(dbd));
		ASSERT(pfd->pf_rawcnt > 0);

		if (--pfd->pf_rawcnt == 0)
			pg_clrlock(dbd);
		memunlock();
		bcopy(pde_to_km(*pd), pde_to_km(pttmp), NBPP);
		memlock();
		pfree(rp, pd, dbd, 1);
		pg_setall(pd, pg_all(&pttmp));
	} else {

		/*	We are modifiying the page so
		 *	break the disk association to swap.
		 */
		
		if (pfd->pf_flags & P_HASH)
			premove(pfd);

		if (dbd->dbd_type == DBD_SWAP)
			swfree1(dbd);
		dbd->dbd_type = DBD_NONE;
		minfo.steal++;
	}

	memunlock();

	/*	Set the modify bit here before the region is unlocked
	 *	so that getpages will write the page to swap if necessary.
	 */

	pg_setmod(pd);
	pg_clrcw(pd);
	pg_setrw(pd);


	flush_all_own_tlb();
	regrele(rp);

	return(0);
}

/*	Translation fault handler
 */

vfault(vaddr, pd)
register pde_t	*pd;	/* Virtual address of faulting pde.	*/
{
	register dbd_t	*dbd;
	register pfd_t	*pfd;
	register reg_t	*rp;
	inode_t 	*ip;
	pde_t		pde_save;

	ASSERT(u.u_procp->p_flag & SLOAD);

	dbd = (dbd_t *)pd + NPGPT;

	/*	Lock the region containing the page that faulted.
	 */

	if ((rp = findreg(u.u_procp, vaddr))==NULL) {
		return(SIGSEGV);
	}

/* 001 */
	/*	Check that the page has not been read in by
	 *	another process while we were waiting for
	 *	it on the reglock above.
	 */

	if (pg_isvalid(pd)) {
		regrele(rp);
		return(0);
	}

	/*	Check for an unassigned page.  This is a real
	 *	error.
	 */

	if (dbd->dbd_type == DBD_NONE) {
		regrele(rp);
		return(SIGSEGV);
	}

	/*	Allocate a page in case we need it.  We must
	 *	do it now because it is not convenient to
	 *	wait later if no memory is available.  If
	 *	ptmemall does a wait and some other process
	 *	allocates the page first, then we have
	 *	nothing to do.
	 */
	
	memlock();
	minfo.vfault++;
	if (ptmemall(rp, pd, 1, 0, 0)) {
		ASSERT(pg_isvalid(pd));
		memunlock();
		regrele(rp);
		return(0);
	}

	/*	See what state the page is in.
	 */

	switch (dbd->dbd_type) {
	case DBD_DFILL:
	case DBD_DZERO:{

		/* 	Demand zero or demand fill page.
		 */

		minfo.demand++;
		memunlock();
		if (dbd->dbd_type == DBD_DZERO)
			bzero(pde_to_km(*pd), NBPP);
		dbd->dbd_type = DBD_NONE;
		break;
	}
	case DBD_SWAP:
	case DBD_FILE:
	case DBD_LSTFILE:{

		/*	Page is on swap or in a file.  See if a
		 *	copy is in the hash table.
		 */

		if (pfd = pfind(rp, dbd)) {

			/*	Page is in cache.
			 *	If it is also on the free list,
			 *	remove it.
			 */

			ASSERT(memlocked());

			minfo.cache++;
			if (pfd->pf_flags&P_QUEUE) {
				ASSERT(pfd->pf_use == 0);
				ASSERT(freemem > 0);
				freemem--;
				pfd->pf_flags &= ~P_QUEUE;
				pfd->pf_prev->pf_next = pfd->pf_next;
				pfd->pf_next->pf_prev = pfd->pf_prev;
				pfd->pf_next = NULL;
				pfd->pf_prev = NULL;
			}

			/*	Free the page we allocated above
			 *	since we don't need it.
			 */

			pde_save = *pd;
			pg_setvalid(&pde_save);
			pfree(rp, &pde_save, NULL, 1);
			rp->r_nvalid++;
			pfd->pf_use++;
			pg_setaddr(pd, pfdattopfn(pfd));

			/*	If the page has not yet been read
			 *	in from swap or file, then wait for
			 *	the I/O to complete.
			 */

			while (!(pfd->pf_flags & P_DONE)) {
				pfd->pf_waitcnt++;
				suspend_sleep(pfd, PZERO, &mem_lock);
				if (pfd->pf_flags & P_BAD) {
					memunlock();
cmn_err(CE_WARN, "vfault: IO err! pd=%x *pd=%x pfd=%x", pd, pg_all(pd), pfd);
					regrele(rp);
					return(SIGKILL);
				}
			}
			memunlock();
		} else {

			/*	Must read from swap or a file.
			 *	Get the pfdat for the newly allocated
			 *	page and insert it in the hash table.
			 *	Note that it cannot already be there
			 *	because the pfind above failed and
			 *	mem_lock is still locked.
			 */
			
			pfd = pde_to_pfdat(*pd);
			ASSERT((pfd->pf_flags & P_HASH) == 0);

			/*	Don't insert in hash table if this
			 *	block is from a swap file we are
			 *	trying to delete.
			 */

			if (dbd->dbd_type == DBD_SWAP) {
				register int	swapdel;
				pglst_t		pglist;

				swapdel = swaptab[dbd->dbd_swpi].st_flags
						& ST_INDEL;
				pinsert(rp, dbd, pfd);
				minfo.swap++;
				memunlock();

				/*	Read from swap.
				 */

				pglist.gp_ptptr = pd;
				swap(&pglist, 1, B_READ);
				if (swapdel) {
					if (swfree1(dbd) == 0) {
						memlock();
						premove(pfd);
						memunlock();
					}
					dbd->dbd_type = DBD_NONE;
				}
			} else {
				/*	Read from file
				 */
				int vaddr1;
				int offset;
				int retval;
				int i;

				minfo.file++;
				pinsert(rp, dbd, pfd);
				memunlock();
				vaddr1 = (int) pde_to_km(*pd);
				ip = rp->r_iptr;
				offset = dbd->dbd_blkno*FSBSIZE(ip);
				retval = FS_READMAP(ip, offset, NBPP, vaddr1,1);
				if (retval <= 0) {
					killpage(rp, pd);
cmn_err(CE_WARN, "vfault: IO error! pd=%x *pd=%x pfd=%x", pd, pg_all(pd), pfd);
					regrele(rp);
					return(SIGKILL);
				}

				/*
				 * clear the last bytes of a partial page
				 */
				if (dbd->dbd_type == DBD_LSTFILE) {
					i = poff(rp->r_filesz);
					if (i > 0)
					       bzero(vaddr1+i, NBPP-i);
				}
			}

			/*	Mark the I/O done in the pfdat and
			 *	awaken anyone who is waiting for it.
			 */

			memlock();
			pfd->pf_flags |= P_DONE;
			if (pfd->pf_waitcnt) {
				pfd->pf_waitcnt = 0;
				suspend_wakeup(pfd);
			}
			memunlock();
		}
		break;
	}
	default:
		cmn_err(CE_PANIC,
		  "vfault - bad dbd_type %x, pt=%x *pt=%x *dbd=%x vaddr=%x",
		  dbd->dbd_type, pd, pg_all(pd), *dbd, vaddr);
	}

	pg_clrmod(pd);
	pg_setvalid(pd);

	regrele(rp);

	return(0);
}


/*
 * Clean up after a read error during vfault processing.
 * This code frees the previously allocated page, and marks
 * the pfdat as bad.  It leaves the pde, and dbd in their original
 * state.  It assumes the pde is presently invalid.
 */
killpage(rp, pd)
reg_t *rp;
register pde_t *pd;
{
	register pfd_t	*pfd;
	pde_t		save_pde;

	save_pde = *pd;
	pg_setvalid(pd);
	memlock();
	pfd = pde_to_pfdat(*pd);
	pfd->pf_flags |= P_BAD|P_DONE;
	if (pfd->pf_flags & P_HASH)
		premove(pfd);
	if (pfd->pf_waitcnt) {
		pfd->pf_waitcnt = 0;
		suspend_wakeup(pfd);
	}
	pfree(rp, pd, NULL, 1);
	memunlock();
	pg_setall(pd, pg_all(&save_pde));
}


static disp_int_t	tlb_flush_int_data;

tlb_flush_init()
{
	tlb_flush_int_data.fields.vector = DO_TLB_FLUSH;
	tlb_flush_int_data.fields.level = MOT_LEVEL_ONE;
	tlb_flush_int_data.fields.directed = NON_DIRECTED;
}

/*
 * tell all PM's to flush their tlb's.
 * doesn't return until that happens.
 */

flush_all_tlbs()
{
	register int		pm_id;
	register own_t		**o;
	static suspend_lock_t	tlb_flush_lock = SUSPEND_INIT(PZERO);

	suspend_lock(&tlb_flush_lock);
	o = spm_mem.pm_own;
	for (pm_id = spm_mem.num_pm; --pm_id >= 0; o++) {
		(*o)->o_flush_tlb = 1;
		queue_level_one(*o, tlb_flush_int_data);
	}

	o = spm_mem.pm_own;
	for (pm_id = spm_mem.num_pm; --pm_id >= 0; o++) {
		while ((*o)->o_flush_tlb)
			delay(1);
	}
	suspend_unlock(&tlb_flush_lock);
}

void
do_tlb_flush()
{
	ASSERT(own.o_flush_tlb);
	flush_all_own_tlb();
	own.o_flush_tlb = 0;
}
