/* @(#)bio.c	1.7 */#include "sys/param.h"#include "sys/types.h"#include "sys/mmu.h"#include "sys/sysmacros.h"#include "sys/systm.h"#include "sys/sysinfo.h"#include "sys/dir.h"#include "sys/signal.h"#include "sys/user.h"#include "sys/errno.h"#include "sys/buf.h"#include "sys/iobuf.h"#include "sys/conf.h"#include "sys/proc.h"#include "sys/seg.h"#include "sys/var.h"#include "sys/scat.h"/* * swap IO headers. */struct	buf	swbuf[NSWB];/* * The following several routines allocate and free * buffers with various side effects.  In general the * arguments to an allocate routine are a device and * a block number, and the value is a pointer to * to the buffer header; the buffer is marked "busy" * so that no one else can touch it.  If the block was * already in core, no I/O need be done; if it is * already busy, the process waits until it becomes free. * The following routines allocate a buffer: *	getblk *	bread *	breada * Eventually the buffer must be released, possibly with the * side effect of writing it out, by using one of *	bwrite *	bdwrite *	bawrite *	brelse *//* * Unlink a buffer from the available list and mark it busy. * (internal interface) */#define	notavail(bp)	\{\	register s;\\	s = spl6();\	bp->av_back->av_forw = bp->av_forw;\	bp->av_forw->av_back = bp->av_back;\	bp->b_flags |= B_BUSY;\	bfreelist.b_bcount--;\	splx(s);\}/* * Pick up the device's error number and pass it to the user; * if there is an error but the number is 0 set a generalized * code.  Actually the latter is always true because devices * don't yet return specific errors. */#define	geterror(bp)	\{\	if (bp->b_flags&B_ERROR)\		if ((u.u_error = bp->b_error)==0)\			u.u_error = EIO;\}/* * Read in (if necessary) the block and return a buffer pointer. */struct buf *bread(dev, blkno)dev_t dev;daddr_t blkno;{	register struct buf *bp;	sysinfo.lread++;	bp = getblk(dev, blkno);	if (bp->b_flags&B_DONE)		return(bp);	bp->b_flags |= B_READ;	bp->b_bcount = FsBSIZE(dev);	(*bdevsw[bmajor(dev)].d_strategy)(bp);	u.u_ior++;	sysinfo.bread++;	iowait(bp);	return(bp);}/* * Read in the block, like bread, but also start I/O on the * read-ahead block (which is not allocated to the caller) */struct buf *breada(dev, blkno, rablkno)dev_t dev;daddr_t blkno, rablkno;{	register struct buf *bp, *rabp;	bp = NULL;	if (!incore(dev, blkno)) {		sysinfo.lread++;		bp = getblk(dev, blkno);		if ((bp->b_flags&B_DONE) == 0) {			bp->b_flags |= B_READ;			bp->b_bcount = FsBSIZE(dev);			(*bdevsw[bmajor(dev)].d_strategy)(bp);			u.u_ior++;			sysinfo.bread++;		}	}	if (rablkno && bfreelist.b_bcount>1 && !incore(dev, rablkno)) {		rabp = getblk(dev, rablkno);		if (rabp->b_flags & B_DONE)			brelse(rabp);		else {			rabp->b_flags |= B_READ|B_ASYNC;			rabp->b_bcount = FsBSIZE(dev);			(*bdevsw[bmajor(dev)].d_strategy)(rabp);			u.u_ior++;			sysinfo.bread++;		}	}	if (bp == NULL)		return(bread(dev, blkno));	iowait(bp);	return(bp);}/* * Write the buffer, waiting for completion. * Then release the buffer. */bwrite(bp)register struct buf *bp;{	register flag;	sysinfo.lwrite++;	flag = bp->b_flags;	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);	u.u_iow++;	sysinfo.bwrite++;	if ((flag&B_ASYNC) == 0) {		iowait(bp);		brelse(bp);	} else if (flag & B_DELWRI)		bp->b_flags |= B_AGE;	else		geterror(bp);}/* * Release the buffer, marking it so that if it is grabbed * for another purpose it will be written out before being * given up (e.g. when writing a partial block where it is * assumed that another write for the same block will soon follow). * This can't be done for magtape, since writes must be done * in the same order as requested. */bdwrite(bp)register struct buf *bp;{	sysinfo.lwrite++;	bp->b_flags |= B_DELWRI | B_DONE;	bp->b_resid = 0;	brelse(bp);}/* * Release the buffer, start I/O on it, but don't wait for completion. */bawrite(bp)register struct buf *bp;{	if (bfreelist.b_bcount>4)		bp->b_flags |= B_ASYNC;	bwrite(bp);}/* * release the buffer, with no I/O implied. */brelse(bp)register struct buf *bp;{	register struct buf **backp;	register s;	if (bp->b_flags&B_WANTED)		wakeup((caddr_t)bp);	if (bfreelist.b_flags&B_WANTED) {		bfreelist.b_flags &= ~B_WANTED;		wakeup((caddr_t)&bfreelist);	}	if (bp->b_flags&B_ERROR) {		bp->b_flags |= B_STALE|B_AGE;		bp->b_flags &= ~(B_ERROR|B_DELWRI);		bp->b_error = 0;	}/* Put buffer on freelist, at the beginning if B_AGE, otherwise at the end. */	s = spl6();	if (bp->b_flags & B_AGE) {		backp = &bfreelist.av_forw;		(*backp)->av_back = bp;		bp->av_forw = *backp;		*backp = bp;		bp->av_back = &bfreelist;	} else {		backp = &bfreelist.av_back;		(*backp)->av_forw = bp;		bp->av_back = *backp;		*backp = bp;		bp->av_forw = &bfreelist;	}	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);	bfreelist.b_bcount++;	splx(s);}/* * See if the block is associated with some buffer * (mainly to avoid getting hung up on a wait in breada) */incore(dev, blkno)register dev_t dev;daddr_t blkno;{	register struct buf *bp;	register struct buf *dp;	blkno = FsLTOP(dev, blkno);	dp = bhash(dev, blkno);	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw)		if (bp->b_blkno==blkno && bp->b_dev==dev && (bp->b_flags&B_STALE)==0)			return(1);	return(0);}/* * Assign a buffer for the given block.  If the appropriate * block is already associated, return it; otherwise search * for the oldest non-busy buffer and reassign it. */struct buf *getblk(dev, blkno)register dev_t dev;daddr_t blkno;{	register struct buf *bp;	register struct buf *dp;	blkno = FsLTOP(dev, blkno);    loop:	SPL0();	dp = bhash(dev, blkno);	if (dp == NULL)		panic("devtab");	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) {		if (bp->b_blkno!=blkno || bp->b_dev!=dev || bp->b_flags&B_STALE)			continue;		SPL6();		if (bp->b_flags&B_BUSY) {			bp->b_flags |= B_WANTED;			syswait.iowait++;			(void) sleep((caddr_t)bp, PRIBIO+2);			syswait.iowait--;			goto loop;		}		SPL0();		notavail(bp);		return(bp);	}	SPL6();	if (bfreelist.av_forw == &bfreelist) {		bfreelist.b_flags |= B_WANTED;		(void) sleep((caddr_t)&bfreelist, PRIBIO+1);		goto loop;	}	SPL0();	bp = bfreelist.av_forw;#ifdef OLD	notavail(bp);	if (bp->b_flags & B_DELWRI) {		bp->b_flags |= B_ASYNC;		bwrite(bp);		goto loop;	}#else	if (bp->b_flags & B_DELWRI) {		bflush(NODEV);		goto loop;	}	notavail(bp);#endif	bp->b_flags = B_BUSY;	bp->b_back->b_forw = bp->b_forw;	bp->b_forw->b_back = bp->b_back;	bp->b_forw = dp->b_forw;	bp->b_back = dp;	dp->b_forw->b_back = bp;	dp->b_forw = bp;	bp->b_dev = dev;	bp->b_blkno = blkno;	bp->b_bcount = FsBSIZE(dev);	return(bp);}/* * get an empty block, * not assigned to any particular device */struct buf *geteblk(){	register struct buf *bp;	register struct buf *dp;loop:	SPL6();	while (bfreelist.av_forw == &bfreelist) {		bfreelist.b_flags |= B_WANTED;		(void) sleep((caddr_t)&bfreelist, PRIBIO+1);	}	SPL0();	dp = &bfreelist;	bp = bfreelist.av_forw;	notavail(bp);	if (bp->b_flags & B_DELWRI) {		bp->b_flags |= B_ASYNC;		bwrite(bp);		goto loop;	}	bp->b_flags = B_BUSY|B_AGE;	bp->b_back->b_forw = bp->b_forw;	bp->b_forw->b_back = bp->b_back;	bp->b_forw = dp->b_forw;	bp->b_back = dp;	dp->b_forw->b_back = bp;	dp->b_forw = bp;	bp->b_dev = (dev_t)NODEV;	bp->b_bcount = SBUFSIZE;	return(bp);}/* * Wait for I/O completion on the buffer; return errors * to the user. */iowait(bp)register struct buf *bp;{	syswait.iowait++;	SPL6();	while ((bp->b_flags&B_DONE)==0)		(void) sleep((caddr_t)bp, PRIBIO);	SPL0();	syswait.iowait--;	geterror(bp);}/* * Mark I/O complete on a buffer, release it if I/O is asynchronous, * and wake up anyone waiting for it. */iodone(bp)register struct buf *bp;{	bp->b_flags |= B_DONE;	if (bp->b_flags&B_ASYNC)		brelse(bp);	else {		bp->b_flags &= ~B_WANTED;		wakeup((caddr_t)bp);	}}/* * Zero the core associated with a buffer. */clrbuf(bp)struct buf *bp;{	clear((caddr_t)bp->b_un.b_words, (int)bp->b_bcount);	bp->b_resid = 0;}/* * swap I/O */swap(blkno, coreaddr, count, rdflg)daddr_t blkno;register coreaddr, count;{	static struct buf *sbp;	register struct buf *bp;	register int c;#ifdef SWAPTRACE	printf("SWAP %s %d disk=0x%x core=0x%x\n",		(rdflg==B_READ)?"IN":"OUT", count, blkno, coreaddr);#endif SWAPTRACE	syswait.swap++;	if (sbp==NULL)		sbp = &swbuf[0];	bp = sbp++;	if (sbp > &swbuf[NSWB-1])		sbp = &swbuf[0];	SPL6();	while (bp->b_flags&B_BUSY) {		bp->b_flags |= B_WANTED;		(void) sleep((caddr_t)bp, PSWP+1);	}	bp->b_flags = B_BUSY | B_PHYS | rdflg;	SPL0();	bp->b_dev = swapdev;#ifdef NONSCATLOAD	bp->b_un.b_addr = (caddr_t)ctob(coreaddr);	while (count > 0) {		if (count <= btoc(MAXCOUNT))			c = count;		else			c = btoc(MAXCOUNT);		bp->b_bcount = ctob(c);		bp->b_blkno = swplo+blkno;		(*bdevsw[(short)bmajor(swapdev)].d_strategy)(bp);		u.u_iosw++;		if (rdflg) {			sysinfo.swapin++;			sysinfo.bswapin += ctod(c);		} else {			sysinfo.swapout++;			sysinfo.bswapout += ctod(c);		}		SPL6();		while((bp->b_flags&B_DONE)==0)			(void) sleep((caddr_t)bp, PSWP);		SPL0();		bp->b_un.b_addr += ctob(c);		bp->b_flags &= ~B_DONE;		if (bp->b_flags & B_ERROR)			panic("IO err in swap");		count -= c;		blkno += ctod(c);	}#else#define sindex coreaddr	while (count > 0) {		if (sindex == SCATEND) {			printf("swap error:swapping beyond process\n");			break;		}		c = memcontig(sindex, count);		bp->b_un.b_addr = (caddr_t)ctob(ixtoc(sindex));		bp->b_bcount = ctob(c);		bp->b_blkno = swplo+blkno;#ifdef SWAPTRACE		printf("    SWAP %s %d disk=0x%x sindex=0x%x\n",			(rdflg==B_READ)?"IN":"OUT", c, blkno, sindex);#endif SWAPTRACE		(*bdevsw[(short)bmajor(swapdev)].d_strategy)(bp);		u.u_iosw++;		if (rdflg) {			sysinfo.swapin++;			sysinfo.bswapin += ctod(c);		} else {			sysinfo.swapout++;			sysinfo.bswapout += ctod(c);		}		SPL6();		while((bp->b_flags&B_DONE)==0)			(void) sleep((caddr_t)bp, PSWP);		SPL0();		bp->b_flags &= ~B_DONE;		if (bp->b_flags & B_ERROR)			panic("IO err in swap");		count -= c;		blkno += ctod(c);		while (c-- > 0 && sindex != SCATEND)			sindex = scatmap[sindex].sc_index;	}#endif	if (bp->b_flags&B_WANTED)		wakeup((caddr_t)bp);	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);	syswait.swap--;#ifndef NONSCATLOAD	return(sindex);#endif}/* * make sure all write-behind blocks * on dev (or NODEV for all) * are flushed out. * (from umount and update) */bflush(dev)dev_t dev;{	register struct buf *bp;	SPL6();	for (bp = bfreelist.av_forw; bp != &bfreelist;) {		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {			bp->b_flags |= B_ASYNC;			notavail(bp);			bwrite(bp);			SPL6();			bp = bfreelist.av_forw;		} else {			if (bp->av_forw) bp = bp->av_forw;			else panic("bflush: bad free list\n");		}	}	SPL0();}/* * Raw I/O. The arguments are *	The strategy routine for the device *	A buffer header, sometimes of a special type owned by the *	device, and sometimes from the physio pool of headers. *	The device number *	Read/write flag * Essentially all the work is computing physical addresses and * validating them. */physio(strat, bp, dev, rw)register struct buf *bp;int (*strat)();{	register struct user *up;	register struct proc *p;	register unsigned base;	register unsigned limit;	register unsigned dsstart, dsend;	int	hpf;	up = &u;	p = up->u_procp;	base = (unsigned)up->u_base;	dsstart = v.v_ustart + ctob(stoc(ctos(up->u_tsize)));	dsend = dsstart + ctob(up->u_dsize);	limit = base + up->u_count - 1;	/*	 * Check that transfer is either entirely in the	 * virtual data space or in the virtual stack space	 */	if (limit < base)	/* wraparound, base < 0, count <= 0 */		goto bad;	if (base >= dsstart && limit < dsend)		goto cont;	if (base >= v.v_uend - ctob(up->u_ssize) && limit < v.v_uend)		goto cont;	if (rw != B_READ && base >= v.v_ustart &&	    limit < v.v_ustart + ctob(up->u_tsize))		goto cont;	if (chkphys((int)base, limit))		goto cont;bad:	up->u_error = EFAULT;	return;cont:	if (rw)		sysinfo.phread++;	else		sysinfo.phwrite++;	syswait.physio++;#ifndef NONSCATLOAD	if ((p->p_flag&SCONTIG)==0) {		p->p_flag |= SSWAPIT;		if (runout) {			runout = 0;			wakeup((caddr_t)&runout);		}		if (runin) {			runin = 0;			wakeup((caddr_t)&runin);		}		SPL6();		while ((p->p_flag&SCONTIG)==0)			(void) sleep((caddr_t)scatmap, PRIBIO);	}#endif	hpf = (bp == NULL);	SPL6();	if (hpf) {		while ((bp = pfreelist.av_forw) == NULL) {			pfreelist.b_flags |= B_WANTED;			(void) asleep((caddr_t)&pfreelist, PRIBIO+1);		}		pfreelist.av_forw = bp->av_forw;	} else while (bp->b_flags&B_BUSY) {		bp->b_flags |= B_WANTED;		(void) asleep((caddr_t)bp, PRIBIO+1);	}	bp->b_error = 0;	bp->b_un.b_addr = vtop((caddr_t)base);	bp->b_flags = B_BUSY | B_PHYS | rw;	bp->b_dev = dev;	bp->b_blkno = up->u_offset >> BSHIFT;	bp->b_bcount = up->u_count;	p->p_flag |= SLOCK;	(*strat)(bp);	SPL6();	while ((bp->b_flags&B_DONE) == 0)		(void) sleep((caddr_t)bp, PRIBIO);	p->p_flag &= ~SLOCK;	if (hpf) {		bp->av_forw = pfreelist.av_forw;		pfreelist.av_forw = bp;		if (pfreelist.b_flags&B_WANTED) {			pfreelist.b_flags &= ~B_WANTED;			wakeup((caddr_t)&pfreelist);		}	} else if (bp->b_flags&B_WANTED)		wakeup((caddr_t)bp);	if (runin) {		runin = 0;		wakeup(&runin);	}	SPL0();	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);	up->u_count = bp->b_resid;	geterror(bp);	syswait.physio--;}physck(nblocks, rw)daddr_t nblocks;{	register struct user *up;	register unsigned over;	off_t upper, limit;	struct a {		int	fdes;		char	*cbuf;		unsigned count;	} *uap;	up = &u;	limit = nblocks << BSHIFT;	if (up->u_offset >= limit) {		if (up->u_offset > limit || rw == B_WRITE)			up->u_error = ENXIO;		return(0);	}	upper = up->u_offset + up->u_count;	if (upper > limit) {		over = upper - limit;		up->u_count -= over;		uap = (struct a *)up->u_ap;		uap->count -= over;	}	return(1);}/* * Invalidate blocks for a dev after last close. */binval(dev)register dev;{	register struct buf *dp;	register struct buf *bp;	register i;	if (dev == swapdev)		return;	for (i=0; i<v.v_hbuf; i++) {		dp = (struct buf *)&hbuf[(short)i];		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)			if (bp->b_dev == dev)				bp->b_flags |= B_STALE|B_AGE;	}}/* * Get the major device number given a strategy routine address */getmajor(strat)register int (*strat)();{	register struct bdevsw *bdp;	register i;	bdp = &bdevsw[0]; 	for (i = 0; i < bdevcnt; i++, bdp++)		if (bdp->d_strategy == strat)			return(i);	panic("findmajor");	/* NOTREACHED */}