/*	START NEW ARIX SCCS HEADER			*/
/*							*/
/*	@(#) bio.c: version 25.1 created on 11/27/91 at 15:08:46	*/
/*							*/
/*	Copyright (c) 1990 by Arix Corporation		*/
/*	All Rights Reserved				*/
/*							*/
#ident	"@(#)bio.c	25.1	11/27/91 Copyright (c) 1990 by Arix Corporation"
/*							*/
/*	END NEW ARIX SCCS HEADER			*/
/*							*/
/* bio.c */


#include "sys/sysmacros.h"
#include "sys/fs/s5macros.h"
#include "sys/user.h"
#include "sys/systm.h"
#include "sys/sysinfo.h"
#include "sys/errno.h"
#include "sys/buf.h"
#include "sys/iobuf.h"
#include "sys/conf.h"
#include "sys/var.h"
#include "sys/tuneable.h"
#include "sys/mfs.h"
#include "sys/spl.h"
#include "sys/debug.h"
#include "sys/fs/s5inode.h"


/*
 * bio_lock;
 * 	spin_lock used to control access to the bfreelist, bdirtylist
 *	the hash chains, and the buffer flags, and syswait structure
 */

/* count and flag for outstanding async writes */
int	basyncnt;
int	basynwait;

/* bdirtylist high water mark for waking bdflush */
static uint	bdirty_high_water;

/* state flags for bdflush behavior */
static uint	bdflush_wait_iodone;
static uint	bdflush_wait_work;

static void	merge_partial();
buf_t		*partial_getblk();

/*
 * The following several routines allocate and free
 * buffers with various side effects.  In general the
 * arguments to an allocate routine are a device and
 * a block number, and the value is a pointer to
 * to the buffer header; the buffer is marked "busy"
 * so that no one else can touch it.  If the block was
 * already in core, no I/O need be done; if it is
 * already busy, the process waits until it becomes free.
 * The following routines allocate a buffer:
 *	getblk
 *	bread
 *	breada
 * Eventually the buffer must be released, possibly with the
 * side effect of writing it out, by using one of
 *	bwrite
 *	bdwrite
 *	bawrite
 *	brelse
 */

/*
 * Unlink a buffer from the available list and mark it busy.
 * (internal interface)
 */
#define	notavail(bp)	\
{\
	bp->av_back->av_forw = bp->av_forw;\
	bp->av_forw->av_back = bp->av_back;\
	bp->b_flags |= B_BUSY;\
	if (bp->b_flags & B_DELWRI)\
		bdirtylist.b_bcount--;\
}

extern int	pfreecnt;



/*
 * Read in (if necessary) the block and return a buffer pointer.
 */
struct buf *
bread(dev, blkno)
register dev_t dev;
daddr_t blkno;
{
	register struct buf *bp;

	dev = notminored(dev);
	atom_inc(&sysinfo.lread);
	bp = getblk(dev, blkno);
	ASSERT(! bp->b_invalid);
	if (bp->b_flags&B_DONE)
		return(bp);
	bp->b_flags |= B_READ;
	(*bdevsw[bmajor(dev)].d_strategy)(bp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	iowait(bp);
	return(bp);
}

/*
 * Read in the block, like bread, but also start I/O on the
 * read-ahead block (which is not allocated to the caller)
 */
struct buf *
breada(dev, blkno)
register dev_t dev;
register daddr_t blkno;
{
	register struct buf *bp, *rabp;
	register struct buf *dp;
        register struct buf *bfreep;

	/*
	   Not too excited about two entrances into freelist critical region
	   and two searches down the hashbucket, so collapsed the incore test
	   and the getblk test.
	*/
 	dev = notminored(dev);
	bfreep = &bfreelist;
	blkno = FsLTOP(BSIZE, blkno);
	dp = bhash(dev, blkno);
	spin_lock(&bio_lock);

read_loop:
	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) {
		if (bp->b_blkno==blkno && bp->b_dev==dev &&
		    ((bp->b_flags & B_STALE) == 0)) {
			if (bp->b_flags & B_BUSY) {
				if ((bp->b_flags & B_READAHEAD) == 0) {
					bp->b_want_flag = B_WANTED;
					atom_inc(&syswait.iowait);
					mfs_sleep(bp, PRIBIO+1, &bio_lock);
					atom_dec(&syswait.iowait);
					goto read_loop;
				}
				bp->b_flags &= ~(B_ASYNC|B_READAHEAD);
			}
			else
				notavail(bp);
			atom_inc(&sysinfo.lread);
			goto exit_read;
		}
	}

	/* buffer not found in hash chain */
	if ((bp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &bio_lock);
		goto read_loop;
	}

	ASSERT(! bp->b_invalid);
	ASSERT(! (bp->b_flags & B_DELWRI));
	notavail(bp);
	bp->b_flags = B_BUSY|B_READ;
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;
	bp->b_dev = dev;
	bp->b_blkno = blkno;
	bp->b_bcount = BSIZE;
	spin_unlock(&bio_lock);
	atom_inc(&sysinfo.lread);
	(*bdevsw[bmajor(dev)].d_strategy)(bp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	spin_lock(&bio_lock);

exit_read:
	blkno = u.u_rablock;
	blkno = FsLTOP(BSIZE, blkno);
	dp = bhash(dev, blkno);
readahead_loop:
	for (rabp = dp->b_forw; rabp != dp; rabp = rabp->b_forw)
		if (rabp->b_blkno == blkno && rabp->b_dev == dev)
			goto exit_readahead;

	if ((rabp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &bio_lock);
		goto readahead_loop;
	}
	ASSERT(! rabp->b_invalid);
	ASSERT(! (rabp->b_flags & B_DELWRI));
	notavail(rabp);
	rabp->b_flags = B_BUSY|B_READ|B_ASYNC|B_READAHEAD;
	rabp->b_back->b_forw = rabp->b_forw;
	rabp->b_forw->b_back = rabp->b_back;
	rabp->b_forw = dp->b_forw;
	rabp->b_back = dp;
	dp->b_forw->b_back = rabp;
	dp->b_forw = rabp;
	rabp->b_dev = dev;
	rabp->b_blkno = blkno;
	rabp->b_bcount = BSIZE;
	spin_unlock(&bio_lock);
	(*bdevsw[bmajor(dev)].d_strategy)(rabp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	spin_lock(&bio_lock);

exit_readahead:
	if (u.u_next_rablock) {
		u.u_rablock = u.u_next_rablock;
		u.u_next_rablock = 0;
		goto exit_read;
	}

	atom_inc(&syswait.iowait);
	while ((bp->b_flags & B_DONE) == 0)
		mfs_sleep(bp, PRIBIO, &bio_lock);
	atom_dec(&syswait.iowait);
	spin_unlock(&bio_lock);
	geterror(bp);
	if (bp->b_invalid)
		merge_partial(bp, NULL);
	return(bp);
}

/*
 * Write the buffer, waiting for completion.
 * Then release the buffer.
 */
bwrite(bp)
register struct buf *bp;
{
	if (bp->b_invalid)
		merge_partial(bp, NULL);
	bp->b_dev = notminored(bp->b_dev);
	atom_inc(&sysinfo.lwrite);
	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
	u.u_iow++;
	atom_inc(&sysinfo.bwrite);
	iowait(bp);
	brelse(bp);
}

/*
 * Release the buffer, marking it so that if it is grabbed
 * for another purpose it will be written out before being
 * given up (e.g. when writing a partial block where it is
 * assumed that another write for the same block will soon follow).
 * This can't be done for magtape, since writes must be done
 * in the same order as requested.
 */

bdwrite(bp)
register struct buf *bp;
{
	register struct buf	**backp;
        register struct buf	*bdirtyp;
	int			bdflush();

	bdirtyp = &bdirtylist;

	if (! (bp->b_flags & B_DELWRI))
		bp->b_start = lbolt;

	bp->b_flags |= B_DELWRI | B_DONE;

	bp->b_resid = 0;

	atom_inc(&sysinfo.lwrite);
	spin_lock(&bio_lock);

	/* add to end of dirtylist */
	backp = &bdirtyp->av_back;
	(*backp)->av_forw = bp;
	bp->av_back = *backp;
	*backp = bp;
	bp->av_forw = bdirtyp;
	bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE);
	if ((bdirtyp->b_bcount++ > bdirty_high_water) && bdflush_wait_work) {
		bdflush_wait_work = 0;
		wakeup(bdflush);
	}
	ASSERT(bdirtylist.b_bcount >= 0);

	if (bp->b_want_flag) {
		bp->b_want_flag = 0;
		mfs_wakeup_all((caddr_t)bp);
	}

	spin_unlock(&bio_lock);
}

/*
 * Release the buffer, start I/O on it, but don't wait for completion.
 */
bawrite(bp)
register struct buf *bp;
{
	if (bp->b_invalid)
		merge_partial(bp, NULL);
	atom_inc(&sysinfo.lwrite);
 	bp->b_dev = notminored(bp->b_dev);
	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
	spin_lock(&bio_lock);
	basyncnt++;
	bp->b_flags |= B_ASYNC;
	spin_unlock(&bio_lock);
	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
	u.u_iow++;
	atom_inc(&sysinfo.bwrite);
}


/*
 * Assign a buffer for the given block.  If the appropriate
 * block is already associated, return it; otherwise search
 * for the oldest non-busy buffer and reassign it.
 */
struct buf *
getblk(dev, blkno)
dev_t	dev;
daddr_t	blkno;
{
	register struct buf *bp;

	bp = partial_getblk(dev, blkno, 0);

	if (bp->b_invalid == BSIZE)
		bp->b_invalid = 0;
	else if (bp->b_invalid)
		merge_partial(bp, NULL);

	ASSERT(! bp->b_invalid);
	return(bp);
}

/*
 * return a bp with at least valid_count bytes valid
 */
struct buf *
partial_getblk(dev, blkno, valid_count)
register dev_t		dev;
register daddr_t	blkno;
uint			valid_count;
{
	register struct buf *bp;
	register struct buf *dp;
        register struct buf *bfreep;

 	dev = notminored(dev);
	bfreep = &bfreelist;
	blkno = FsLTOP(BSIZE, blkno);
	dp = bhash(dev, blkno);

	spin_lock(&bio_lock);

    loop:
	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) {
		if (bp->b_blkno!=blkno || bp->b_dev!=dev ||
		    (bp->b_flags & B_STALE))
			continue;
		if (bp->b_flags & B_BUSY) {
			bp->b_want_flag = B_WANTED;
			atom_inc(&syswait.iowait);
			mfs_sleep(bp, PRIBIO+1, &bio_lock);
			atom_dec(&syswait.iowait);
			goto loop;
		}
		notavail(bp);
		spin_unlock(&bio_lock);
		if (valid_count > BSIZE - bp->b_invalid)
			merge_partial(bp, NULL);
		return(bp);
	}

#ifdef RFSCACHE
	/* FIX THIS, NAW, DS: */
	ASSERT(0);
#endif /* RFSCACHE */

	if ( (bp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &bio_lock);
		goto loop;
	}
	notavail(bp);
	ASSERT(! (bp->b_flags & B_DELWRI));
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_flags = B_BUSY;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;
	bp->b_dev = dev;
	bp->b_blkno = blkno;
	bp->b_bcount = BSIZE;
	bp->b_invalid = BSIZE;
	spin_unlock(&bio_lock);
	if (valid_count)
		merge_partial(bp, NULL);
	ASSERT((BSIZE - bp->b_invalid) >= valid_count);
	return(bp);
}

static void
merge_partial(bp, bp_provided)
register buf_t	*bp;
buf_t		*bp_provided;
{
	register buf_t	*merge_bp;

	ASSERT(bp->b_invalid);
	merge_bp = (bp_provided ? bp_provided : geteblk());
	ASSERT(merge_bp->b_bcount == BSIZE);
	ASSERT(! merge_bp->b_proc);
	merge_bp->b_dev = bp->b_dev;
	merge_bp->b_blkno = bp->b_blkno;
	merge_bp->b_flags |= B_READ;
	merge_bp->b_flags &= ~(B_DONE | B_ERROR);
	merge_bp->b_error = 0;
	atom_inc(&sysinfo.lread);
	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(merge_bp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	iowait(merge_bp);
	if (merge_bp->b_flags & B_ERROR) {
		bp->b_flags |= B_ERROR;
		bp->b_error = merge_bp->b_error;
	}
	else
		bcopy(merge_bp->b_un.b_addr + (BSIZE - bp->b_invalid),
		  bp->b_un.b_addr + (BSIZE - bp->b_invalid), bp->b_invalid);
	bp->b_invalid = 0;
	if (! bp_provided)
		brelse(merge_bp);
}

/*
 * get an empty block,
 * not assigned to any particular device
 */

struct buf *
geteblk()
{
	register struct buf *bp;
	register struct buf *dp;
        register struct buf *bfreep;

	bfreep =  &bfreelist;

	spin_lock(&bio_lock);
	dp = bfreep;
#ifdef RFSCACHE
	/* Take buffer off RFS freelist, if available. */
	if ( rcacheinit && (rbfreelist.av_forw != &rbfreelist)
	   && ((bp = (struct buf *)chk_rlist(LGET)) != NULL))
		goto rfound;
	/* Else take buffer from local freelist. */
#endif /* RFSCACHE */

	while ( (bp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &bio_lock);
	}

	notavail(bp);
	ASSERT(! (bp->b_flags & B_DELWRI));
	ASSERT(! bp->b_invalid);
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
rfound:
	bp->b_flags = B_BUSY|B_AGE;
	bp->b_bcount = SBUFSIZE;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;

	bp->b_dev = (dev_t)NODEV;
	spin_unlock(&bio_lock);
	return(bp);
}

/*
 * Wait for I/O completion on the buffer; return errors
 * to the user.
 */
iowait(bp)
register struct buf *bp;
{
	atom_inc(&syswait.iowait);
	spin_lock(&bio_lock);
	while ((bp->b_flags & B_DONE) == 0)
		mfs_sleep(bp, PRIBIO, &bio_lock);
	spin_unlock(&bio_lock);
	atom_dec(&syswait.iowait);
	geterror(bp);
}

/*
 * Mark I/O complete on a buffer, release it if I/O is asynchronous,
 * and wake up anyone waiting for it.
 */
iodone(bp)
register struct buf *bp;
{
	/* If there is a special iodone associated with this bp, call it */
	if (bp->b_iodone) {
		(*bp->b_iodone)(bp);
		return;
	}

	ASSERT(! bp->b_invalid);
	spin_lock(&bio_lock);

	if (bp->b_driver_flags & B_ERROR) {
		bp->b_driver_flags &= ~B_ERROR;
		bp->b_flags |= B_ERROR;
	}
	bp->b_flags |= B_DONE;

	if (bp->b_flags & B_ASYNC)
		/* mcm if invoked via breada then an error will be ignored */
		iodone_brelse(bp);	/* release bp to 1k freelist */
	else {
		bp->b_want_flag = 0;
		mfs_wakeup_all((caddr_t)bp);
	}
	spin_unlock(&bio_lock);
}

/*
 * Zero the core associated with a buffer.
 */
clrbuf(bp)
register struct buf *bp;
{
	ASSERT(bp->b_bcount != 0);
	bzero(bp->b_un.b_words, SBUFSIZE);
	bp->b_resid = 0;
}


/*
 * wait for asynchronous writes to finish
 */
bdwait()
{
	spin_lock(&bio_lock);
	while (basyncnt) {
		basynwait = 1;
		mfs_sleep((caddr_t)&basyncnt, PRIBIO, &bio_lock);
	}
	spin_unlock(&bio_lock);
}

/*
 * wait for asynchronous writes to finish, with timeout
 */
bdwait_timeout(ticks)
register int	ticks;
{
	register time_t	start = lbolt;
	int		id, setrun();

	spin_lock(&bio_lock);
	while (basyncnt && lbolt - start < ticks) {
		basynwait = 1;
		id = timeout(setrun, (caddr_t)u.u_procp,
			ticks - (lbolt - start) + 1);
		mfs_sleep((caddr_t)&basyncnt, PRIBIO, &bio_lock);
		untimeout(id);
	}
	spin_unlock(&bio_lock);
	return (basyncnt);
}


/*
 * Invalidate blocks for a dev after last close.
 */
binval(dev)
register dev_t dev;
{
	register struct buf *dp;
	register struct buf *bp;
	register i;

	dev = notminored(dev);
	spin_lock(&bio_lock);
	for (i = 0; i < v.v_hbuf; i++) {
		dp = (struct buf *)&hbuf[i];
		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
			if (bp->b_dev == dev) {
				bp->b_flags |= B_STALE|B_AGE;
				bp->b_invalid = 0;
			}
	}
	spin_unlock(&bio_lock);
}
/*
 * make sure all write-behind blocks
 * on dev (or NODEV for all)
 * are flushed out.
 * (from umount and update)
 */
bflush(dev)
register dev_t	dev;
{
	register buf_t	*bp;
	register buf_t	*head_bp;
	register buf_t	*next_bp;
	register buf_t	*bdirty;
	register 	count;
	buf_t		*m_bp;

	count = 0;
	bdirty = &bdirtylist;
 	dev = notminored(dev);
	m_bp = geteblk();

	spin_lock(&bio_lock);

	if (dev == NODEV) {
		for (bp = bdirty->av_forw; bp != bdirty; bp = bp->av_forw) {
			bp->b_flags &=~(B_READ|B_DONE|B_ERROR|B_DELWRI);
			bp->b_flags |=  B_ASYNC | B_BUSY;
			count++;
		}
		if ((head_bp = bdirty->av_forw) != bdirty)
			bdirty->av_back->av_forw = NULL;
		else
			head_bp = NULL;

		bdirty->av_forw = &bdirtylist;
		bdirty->av_back = &bdirtylist;
	}
	else {
		head_bp = NULL;
		for (bp = bdirty->av_forw; bp != bdirty;) {
			next_bp = bp->av_forw;
			if (dev == bp->b_dev) {
				bp->b_flags |= B_ASYNC | B_BUSY;
				bp->b_flags &=~(B_READ|B_DONE|B_ERROR|B_DELWRI);
				bp->av_back->av_forw = next_bp;
				next_bp->av_back = bp->av_back;
				count++;
				bp->av_forw = head_bp;
				head_bp = bp;
			}
			bp = next_bp;
		}
	}
	bdirty->b_bcount -= count;
	ASSERT(bdirtylist.b_bcount >= 0);
	basyncnt += count;
	spin_unlock(&bio_lock);

	for (bp = head_bp; bp;) {
		next_bp = bp->av_forw;
		if (bp->b_invalid)
			merge_partial(bp, m_bp);
		bp->b_dev = notminored(bp->b_dev);
		(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
		bp = next_bp;
	}
	brelse(m_bp);
	atom_add(&sysinfo.lwrite, count);
	atom_add(&sysinfo.bwrite, count);
	s54kbflush(dev);		/* 4K-byte bflush routine */
}

static buf_t	*bdflush_buf;

bdflush()
{
	register buf_t	*bp;
	register buf_t	*next_bp;
	buf_t		*get_flush_list();

	/* reserve a buffer for bdflush partial block resolution */
	bdflush_buf = geteblk();

	while (1) {
		bp = get_flush_list();
		ASSERT(bp);
		while (bp) {
			next_bp = bp->av_forw;
			if (bp->b_invalid)
				merge_partial(bp, bdflush_buf);
			bp->b_dev = notminored(bp->b_dev);
			(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
			bp = next_bp;
		}
	}
}


/*
 * do_high_water_flush is set by bdwrite and cleared by bdflush
 *
 * bdflush_wait_iodone set by bdflush cleared by iodone_brelse
 */

buf_t	*
get_flush_list()
{
	register buf_t	*ret_list;
	static uint	last_aging_flush_lbolt = 0;
	buf_t		*build_bdirty_flush_list();
	buf_t		*build_aging_flush_list();

	ret_list = NULL;
	spin_lock(&bio_lock);

	while (1) {
		if (basyncnt >= tune.t_basynclo) {
			bdflush_wait_iodone = 1;
			mfs_sleep(bdflush, PRIBIO, &bio_lock);
			continue;
		}
		if ((lbolt - last_aging_flush_lbolt) >= tune.t_bdflushr) {
			if (ret_list = build_aging_flush_list())
				break;
			last_aging_flush_lbolt = lbolt;
		}
		if (bdirtylist.b_bcount > bdirty_high_water) {
			/* get at most tune.b_asynchi items */
			ret_list =  build_bdirty_flush_list();
			ASSERT(ret_list);
			break;
		}
		ASSERT(bdirtylist.b_bcount <= bdirty_high_water);

		bdflush_wait_work = 1;
		mfs_sleep(bdflush, PRIBIO, &bio_lock);
	}
	spin_unlock(&bio_lock);
	return(ret_list);
}

buf_t	*
build_bdirty_flush_list()
{
	register uint	count;
	register buf_t	*bp;
	buf_t		*bp_list;

	ASSERT(bdirtylist.b_bcount >= 0);
	ASSERT(bdirtylist.av_forw != &bdirtylist);

	bp = bdirtylist.av_forw;
	count = min(tune.t_basynchi, bdirtylist.b_bcount);
	bdirtylist.b_bcount -= count;
	basyncnt += count;
	atom_add(&sysinfo.lwrite, count);
	atom_add(&sysinfo.bwrite, count);

	for (; count; bp = bp->av_forw, count--) {
		ASSERT(bp->b_flags & B_DELWRI);
		bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
		bp->b_flags |= (B_ASYNC|B_BUSY);
	}

	/* bp is the first item to be left behind */
	bp_list = bdirtylist.av_forw;
	bp->av_back->av_forw = NULL;

	bdirtylist.av_forw = bp;
	bp->av_back = &bdirtylist;

	return(bp_list);
}

buf_t *
build_aging_flush_list()
{
	register buf_t	*bp;
	register buf_t	*head_bp;
	register buf_t	*next_bp;
	register	autoup;
	register	count;

	autoup = v.v_autoup_hz;
	head_bp = NULL;
	count = 0;

	for (bp = bdirtylist.av_forw; bp != &bdirtylist; bp = next_bp) {
		next_bp = bp->av_forw;

		ASSERT(bp->b_flags & B_DELWRI);

		if ((lbolt - bp->b_start) < autoup)
			continue;

		notavail(bp);
		ASSERT(bdirtylist.b_bcount >= 0);
		bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
		bp->b_flags |= B_ASYNC;
		bp->av_forw = head_bp;
		head_bp = bp;
		if (++count >= tune.t_basynchi)
			break;
	}
	basyncnt += count;
	atom_add(&sysinfo.lwrite, count);
	atom_add(&sysinfo.bwrite, count);
	return(head_bp);
}

/*
 * release the buffer, with no I/O implied.
 */
iodone_brelse(bp)
register struct buf *bp;
{
	register struct buf **backp;
        register struct buf *bfreep;

	ASSERT(! bp->b_invalid);
	ASSERT(! (bp->b_flags & B_DELWRI));
	bfreep = &bfreelist;
	/* Put buffer on freelist, at the beginning if B_AGE,
	   otherwise at the end.
	 */
	if (bp->b_flags & B_ERROR) {
		bp->b_flags |= B_STALE|B_AGE;
		bp->b_flags &= ~(B_ERROR|B_DELWRI);
		bp->b_dev = NODEV;
		bp->b_error = 0;
	}

	if (bp->b_flags & B_AGE) {
		backp = &bfreep->av_forw;
		(*backp)->av_back = bp;
		bp->av_forw = *backp;
		*backp = bp;
		bp->av_back = bfreep;
	} else {
		backp = &bfreep->av_back;
		(*backp)->av_forw = bp;
		bp->av_back = *backp;
		*backp = bp;
		bp->av_forw = bfreep;
	}

	bp->b_reltime = lbolt;

	if (bp->b_want_flag) {
		bp->b_want_flag = 0;
		bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE|B_READAHEAD);
		mfs_wakeup_all((caddr_t)bp);
	}
	else {
		bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE|B_READAHEAD);
		if (bfreep->b_flags & BFREELIST_WANTED) {
			bfreep->b_flags &= ~BFREELIST_WANTED;
			mfs_wakeup_all((caddr_t)bfreep);
		}
	}

	if ((bp->b_flags & B_READ) == 0) {
		basyncnt--;
		if (bdflush_wait_iodone && (basyncnt < tune.t_basynclo)) {
			bdflush_wait_iodone = 0;
			mfs_wakeup_all(bdflush);
		}
		if ((basyncnt == 0) && basynwait) {
			basynwait = 0;
			mfs_wakeup_all(&basyncnt);
		}
	}
}

/*
 * release the buffer, with no I/O implied.
 */
brelse(bp)
register struct buf *bp;
{
	register struct buf **backp;
        register struct buf *bfreep;

	/* Put buffer on freelist, at beginning if B_AGE, otherwise at end. */

	ASSERT(bp->b_flags & B_BUSY);
	ASSERT(! (bp->b_flags & B_ASYNC));

	spin_lock(&bio_lock);
	if (bp->b_flags & B_ERROR) {
		bp->b_flags |= B_STALE|B_AGE;
		bp->b_flags &= ~(B_ERROR|B_DELWRI);
		bp->b_dev = NODEV;
		bp->b_error = 0;
	}

	if (bp->b_flags & B_DELWRI) {
		bfreep = &bdirtylist;
		bfreep->b_bcount++;
	}
	else
		bfreep = &bfreelist;

	if (bp->b_flags & B_AGE) {
		backp = &bfreep->av_forw;
		(*backp)->av_back = bp;
		bp->av_forw = *backp;
		*backp = bp;
		bp->av_back = bfreep;
	} else {
		backp = &bfreep->av_back;
		(*backp)->av_forw = bp;
		bp->av_back = *backp;
		*backp = bp;
		bp->av_forw = bfreep;
	}

	if (bp->b_want_flag) {
		bp->b_want_flag = 0;
		mfs_wakeup_all((caddr_t)bp);
	}
	else  if (! (bp->b_flags & B_DELWRI)) {
		if (bfreep->b_flags & BFREELIST_WANTED) {
			bfreep->b_flags &= ~BFREELIST_WANTED;
			mfs_wakeup_all((caddr_t)bfreep);
		}
	}
	bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE|B_READAHEAD);
	bp->b_reltime = lbolt;
	spin_unlock(&bio_lock);
}

extern struct buf bufhdrs[1];

/*
 * Initialize the buffer I/O system by freeing
 * all buffers and setting all device hash buffer lists to empty.
 */
binit()
{
	register struct buf	*bp;
	register struct buf	*dp;
#ifdef RFSCACHE
	register struct rbuf	*rdp;
#endif /* RFSCACHE */
	register unsigned	i;
	register char		*pbuffer;

	dp = &bdirtylist;
	dp->b_forw = dp->b_back =
	    dp->av_forw = dp->av_back = dp;

	if (tune.t_bdirtypct == 0)
		bdirty_high_water = 1;
	else if ((tune.t_bdirtypct > 0) && (tune.t_bdirtypct <= 100)) {
		bdirty_high_water = (v.v_buf * tune.t_bdirtypct) / 100;
		if (bdirty_high_water == 0)
			bdirty_high_water = 1;
	}
	else
		bdirty_high_water = v.v_buf;

	dp = &bfreelist;
	dp->b_forw = dp->b_back =
	    dp->av_forw = dp->av_back = dp;

	bp = bufhdrs;

	pbuffer = buffers;

	for (i = 0; i < v.v_buf; i++, bp++, pbuffer += SBUFSIZE) {

		bp->b_dev = NODEV;
		bp->b_un.b_addr = pbuffer;
		bp->b_back = dp;
		bp->b_forw = dp->b_forw;
		dp->b_forw->b_back = bp;
		dp->b_forw = bp;
		bp->b_flags = B_BUSY;
		bp->b_bcount = SBUFSIZE;
		brelse(bp);
	}

	pfreecnt = v.v_pbuf;
	pfreelist.av_forw = bp = pbuf;
	for (; bp < &pbuf[v.v_pbuf-1]; bp++)
		bp->av_forw = bp+1;
	bp->av_forw = NULL;
	for (i = 0; i < v.v_hbuf; i++)
		hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i];

#ifdef RFSCACHE
	/*
 	*  Initialization for network cache
 	*/
	rdp = &rbfreelist;
	rdp->b_forw = rdp->b_back =
	    rdp->av_forw = rdp->av_back = rdp->f_forw = rdp->f_back = rdp;
	lbuf_ct = v.v_buf;
	rbuf_ct = 0;
#endif /* RFSCACHE */

	upkern_strategy_init();
}

buf_t *
reverse_bp_list(headp)
buf_t *headp;
{
	register buf_t  *bp = headp;
	register buf_t  *lastp = 0;
	register buf_t  *savforw;

	do {
		savforw = bp->av_forw;
		bp->av_forw = lastp;
		lastp = bp;
		bp = savforw;
	} while (bp);

	return (lastp);
}
