/*	START NEW ARIX SCCS HEADER			*/
/*							*/
/*	@(#) s54kbio.c: version 25.1 created on 11/27/91 at 14:56:43	*/
/*							*/
/*	Copyright (c) 1990 by Arix Corporation		*/
/*	All Rights Reserved				*/
/*							*/
#ident	"@(#)s54kbio.c	25.1	11/27/91 Copyright (c) 1990 by Arix Corporation"
/*							*/
/*	END NEW ARIX SCCS HEADER			*/
/*							*/
/* fs/s54k/s54kbio.c */


#define  FsTYPE 4
#include "sys/sysmacros.h"
#include "sys/fs/s5macros.h"
#include "sys/user.h"
#include "sys/systm.h"
#include "sys/sysinfo.h"
#include "sys/errno.h"
#include "sys/buf.h"
#include "sys/iobuf.h"
#include "sys/conf.h"
#include "sys/var.h"
#include "sys/tuneable.h"
#include "sys/mfs.h"
#include "sys/spl.h"
#include "sys/debug.h"
#include "sys/fs/s5inode.h"

struct buf *s54kgetblk();
/*
 * s54kbio_lock;
 * 	spin_lock used to control access to the bfreelist, bdirtylist
 *	the hash chains, and the buffer flags, and syswait structure
 */

/* count and flag for outstanding async writes */
extern int s54kbasyncnt;
extern int s54kbasynwait;

/* b4kdirtylist high water mark for waking s54kbdflush */
static uint	s54kbdirty_high_water;
/* state flags for bdflush behavior */
static uint	s54kbdflush_wait_iodone;
static uint	s54kbdflush_wait_work;

static void	s54kmerge_partial();
buf_t		*s54kpartial_getblk();

#define S54KSIZE 	4096

extern struct	buf	b4kfreelist;	/* head of the free list of buffers */
extern struct	buf	b4kdirtylist;	/* head of the list of dirty buffers */
extern struct	buf	bufhdrs4k[];	/* buffer headers */
extern int		s54knbuf;
extern struct	s5inode	s54kinode[];	/* s5inode table */
extern int		n4khbuf;	/* number of 4k hash buffers */
extern buf_t	s54khbuf[];		/* hash buffers	*/
extern int		s54khmask;	

#define	s54kbhash(d,b)	((struct buf *)&s54khbuf[((int)d+(int)b)&s54khmask])

/*
 * The following several routines allocate and free
 * buffers with various side effects.  In general the
 * arguments to an allocate routine are a device and
 * a block number, and the value is a pointer to
 * to the buffer header; the buffer is marked "busy"
 * so that no one else can touch it.  If the block was
 * already in core, no I/O need be done; if it is
 * already busy, the process waits until it becomes free.
 * The following routines allocate a buffer:
 *	s54kgetblk
 *	s54kbread
 *	s54kbreada
 * Eventually the buffer must be released, possibly with the
 * side effect of writing it out, by using one of
 *	s54kbwrite
 *	s54kbdwrite
 *	s54kbawrite
 *	s54kbrelse
 */

/*
 * Unlink a buffer from the available list and mark it busy.
 * (internal interface)
 */
#define	s54knotavail(bp)	\
{\
	bp->av_back->av_forw = bp->av_forw;\
	bp->av_forw->av_back = bp->av_back;\
	bp->b_flags |= B_BUSY;\
	if (bp->b_flags & B_DELWRI)\
		b4kdirtylist.b_bcount--;\
}

/*
 * Read in (if necessary) the block and return a buffer pointer.
 */
struct buf *
s54kbread(dev, blkno)
register dev_t dev;
daddr_t blkno;
{
	register struct buf *bp;

	dev = notminored(dev);
	atom_inc(&sysinfo.lread);
	bp = s54kgetblk(dev, blkno);
	ASSERT(bp->b_flags & B_S54K);
	ASSERT(! bp->b_invalid);
	if (bp->b_flags&B_DONE)
		return(bp);
	bp->b_flags |= B_READ;
	(*bdevsw[bmajor(dev)].d_strategy)(bp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	s54kiowait(bp);
	ASSERT(bp->b_flags & B_S54K);
	return(bp);
}

/*
 * Read in the block, like bread, but also start I/O on the
 * read-ahead block (which is not allocated to the caller)
 */
struct buf *
s54kbreada(dev, blkno)
register dev_t dev;
register daddr_t blkno;
{
	register struct buf *bp, *rabp;
	register struct buf *dp;
        register struct buf *bfreep;

	/*
	   Not too excited about two entrances into freelist critical region
	   and two searches down the hashbucket, so collapsed the incore test
	   and the getblk test.
	*/
 	dev = notminored(dev);
	bfreep = &b4kfreelist;
	blkno = FsLTOP(BSIZE, blkno);
	dp = s54kbhash(dev, blkno);
	spin_lock(&s54kbio_lock);

read_loop:
	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) {
		if (bp->b_blkno==blkno && bp->b_dev==dev &&
		    ((bp->b_flags & B_STALE) == 0)) {
			if (bp->b_flags & B_BUSY) {
				if ((bp->b_flags & B_READAHEAD) == 0) {
					bp->b_want_flag = B_WANTED;
					atom_inc(&syswait.iowait);
					mfs_sleep(bp, PRIBIO+1, &s54kbio_lock);
					atom_dec(&syswait.iowait);
					goto read_loop;
				}
				bp->b_flags &= ~(B_ASYNC|B_READAHEAD);
			}
			else
				s54knotavail(bp);
			atom_inc(&sysinfo.lread);
			goto exit_read;
		}
	}

	/* buffer not found in hash chain */
	if ((bp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &s54kbio_lock);
		goto read_loop;
	}

	ASSERT(! bp->b_invalid);
	ASSERT(! (bp->b_flags & B_DELWRI));
	s54knotavail(bp);
	bp->b_flags = B_S54K|B_BUSY|B_READ;
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;
	bp->b_dev = dev;
	bp->b_blkno = blkno;
	bp->b_bcount = BSIZE;
	spin_unlock(&s54kbio_lock);
	atom_inc(&sysinfo.lread);
	(*bdevsw[bmajor(dev)].d_strategy)(bp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	spin_lock(&s54kbio_lock);

exit_read:
	blkno = u.u_rablock;
	blkno = FsLTOP(BSIZE, blkno);
	dp = s54kbhash(dev, blkno);
readahead_loop:
	for (rabp = dp->b_forw; rabp != dp; rabp = rabp->b_forw)
		if (rabp->b_blkno == blkno && rabp->b_dev == dev)
			goto exit_readahead;

	if ((rabp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &s54kbio_lock);
		goto readahead_loop;
	}
	ASSERT(! rabp->b_invalid);
	ASSERT(! (rabp->b_flags & B_DELWRI));
	s54knotavail(rabp);
	rabp->b_flags = B_S54K|B_BUSY|B_READ|B_ASYNC|B_READAHEAD;
	rabp->b_back->b_forw = rabp->b_forw;
	rabp->b_forw->b_back = rabp->b_back;
	rabp->b_forw = dp->b_forw;
	rabp->b_back = dp;
	dp->b_forw->b_back = rabp;
	dp->b_forw = rabp;
	rabp->b_dev = dev;
	rabp->b_blkno = blkno;
	rabp->b_bcount = BSIZE;
	spin_unlock(&s54kbio_lock);
	(*bdevsw[bmajor(dev)].d_strategy)(rabp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	spin_lock(&s54kbio_lock);

exit_readahead:
	if (u.u_next_rablock) {
		u.u_rablock = u.u_next_rablock;
		u.u_next_rablock = 0;
		goto exit_read;
	}

	atom_inc(&syswait.iowait);
	while ((bp->b_flags & B_DONE) == 0)
		mfs_sleep(bp, PRIBIO, &s54kbio_lock);
	atom_dec(&syswait.iowait);
	spin_unlock(&s54kbio_lock);
	geterror(bp);
	ASSERT(bp->b_flags & B_S54K);
	if (bp->b_invalid)
		s54kmerge_partial(bp, NULL);

	return(bp);
}

/*
 * Write the buffer, waiting for completion.
 * Then release the buffer.
 */
s54kbwrite(bp)
register struct buf *bp;
{
	if (bp->b_invalid)
		s54kmerge_partial(bp, NULL);
	bp->b_dev = notminored(bp->b_dev);
	atom_inc(&sysinfo.lwrite);
	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
	u.u_iow++;
	atom_inc(&sysinfo.bwrite);
	s54kiowait(bp);
	s54kbrelse(bp);
}

/*
 * Release the buffer, marking it so that if it is grabbed
 * for another purpose it will be written out before being
 * given up (e.g. when writing a partial block where it is
 * assumed that another write for the same block will soon follow).
 * This can't be done for magtape, since writes must be done
 * in the same order as requested.
 */

s54kbdwrite(bp)
register struct buf *bp;
{
	register struct buf	**backp;
        register struct buf	*bdirtyp;
	int			s54kbdflush();

	bdirtyp = &b4kdirtylist;

	if (! (bp->b_flags & B_DELWRI))
		bp->b_start = lbolt;

	bp->b_flags |= B_DELWRI | B_DONE;

	bp->b_resid = 0;

	atom_inc(&sysinfo.lwrite);
	spin_lock(&s54kbio_lock);

	/* add to end of dirtylist */
	backp = &bdirtyp->av_back;
	(*backp)->av_forw = bp;
	bp->av_back = *backp;
	*backp = bp;
	bp->av_forw = bdirtyp;
	bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE);
	if ((bdirtyp->b_bcount++ > s54kbdirty_high_water) &&
	  s54kbdflush_wait_work) {
		s54kbdflush_wait_work = 0;
		wakeup(s54kbdflush);
	}
	ASSERT(b4kdirtylist.b_bcount >= 0);

	if (bp->b_want_flag) {
		bp->b_want_flag = 0;
		mfs_wakeup_all((caddr_t)bp);
	}

	spin_unlock(&s54kbio_lock);
}

/*
 * Release the buffer, start I/O on it, but don't wait for completion.
 */
s54kbawrite(bp)
register struct buf *bp;
{
	if (bp->b_invalid)
		s54kmerge_partial(bp, NULL);
	atom_inc(&sysinfo.lwrite);
 	bp->b_dev = notminored(bp->b_dev);
	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
	spin_lock(&s54kbio_lock);
	s54kbasyncnt++;
	bp->b_flags |= B_ASYNC;
	spin_unlock(&s54kbio_lock);
	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
	u.u_iow++;
	atom_inc(&sysinfo.bwrite);
}


/*
 * Assign a buffer for the given block.  If the appropriate
 * block is already associated, return it; otherwise search
 * for the oldest non-busy buffer and reassign it.
 */
struct buf *
s54kgetblk(dev, blkno)
dev_t	dev;
daddr_t	blkno;
{
	register struct buf *bp;

	bp = s54kpartial_getblk(dev, blkno, 0);

	if (bp->b_invalid == BSIZE)
		bp->b_invalid = 0;
	else if (bp->b_invalid)
		s54kmerge_partial(bp, NULL);

	ASSERT(! bp->b_invalid);
	return(bp);
}

/*
 * return a bp with at least valid_count bytes valid
 */
struct buf *
s54kpartial_getblk(dev, blkno, valid_count)
register dev_t		dev;
register daddr_t	blkno;
uint			valid_count;
{
	register struct buf *bp;
	register struct buf *dp;
        register struct buf *bfreep;

 	dev = notminored(dev);
	bfreep =  &b4kfreelist;
	blkno = FsLTOP(BSIZE, blkno);
	dp = s54kbhash(dev, blkno);

	spin_lock(&s54kbio_lock);

    loop:
	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) {
		if (bp->b_blkno!=blkno || bp->b_dev!=dev ||
		    (bp->b_flags & B_STALE))
			continue;
		if (bp->b_flags & B_BUSY) {
			bp->b_want_flag = B_WANTED;
			atom_inc(&syswait.iowait);
			mfs_sleep(bp, PRIBIO+1, &s54kbio_lock);
			atom_dec(&syswait.iowait);
			goto loop;
		}
		s54knotavail(bp);
		spin_unlock(&s54kbio_lock);
		if (valid_count > BSIZE - bp->b_invalid)
			s54kmerge_partial(bp, NULL);

		ASSERT(bp->b_flags & B_S54K);
		return(bp);
	}

	if ( (bp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &s54kbio_lock);
		goto loop;
	}
	s54knotavail(bp);
	ASSERT(! (bp->b_flags & B_DELWRI));
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_flags = B_BUSY | B_S54K;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;
	bp->b_dev = dev;
	bp->b_blkno = blkno;
	bp->b_bcount = BSIZE;
	bp->b_invalid = BSIZE;
	spin_unlock(&s54kbio_lock);
	if (valid_count)
		s54kmerge_partial(bp, NULL);
	ASSERT((BSIZE - bp->b_invalid) >= valid_count);
	return(bp);
}

static void
s54kmerge_partial(bp, bp_provided)
register buf_t	*bp;
buf_t		*bp_provided;
{
	register buf_t	*merge_bp;
	buf_t		*s54kgeteblk();

	ASSERT(bp->b_invalid);
	merge_bp = (bp_provided ? bp_provided : s54kgeteblk());
	ASSERT(merge_bp->b_bcount == BSIZE);
	ASSERT(! merge_bp->b_proc);
	merge_bp->b_dev = bp->b_dev;
	merge_bp->b_blkno = bp->b_blkno;
	merge_bp->b_flags |= B_READ;
	merge_bp->b_flags &= ~(B_DONE | B_ERROR);
	merge_bp->b_error = 0;
	atom_inc(&sysinfo.lread);
	(*bdevsw[bmajor(bp->b_dev)].d_strategy)(merge_bp);
	u.u_ior++;
	atom_inc(&sysinfo.bread);
	s54kiowait(merge_bp);
	if (merge_bp->b_flags & B_ERROR) {
		bp->b_flags |= B_ERROR;
		bp->b_error = merge_bp->b_error;
	}
	else
		bcopy(merge_bp->b_un.b_addr + (BSIZE - bp->b_invalid),
		  bp->b_un.b_addr + (BSIZE - bp->b_invalid), bp->b_invalid);
	bp->b_invalid = 0;
	if (! bp_provided)
		s54kbrelse(merge_bp);
}

/*
 * get an empty block,
 * not assigned to any particular device
 */

struct buf *
s54kgeteblk()
{
	register struct buf *bp;
	register struct buf *dp;
        register struct buf *bfreep;

	bfreep =  &b4kfreelist;
	spin_lock(&s54kbio_lock);
	dp = bfreep;

	while ( (bp = bfreep->av_forw) == bfreep) {
		bfreep->b_flags |= BFREELIST_WANTED;
		mfs_sleep(bfreep, PRIBIO+1, &s54kbio_lock);
	}

	s54knotavail(bp);
	ASSERT(! (bp->b_flags & B_DELWRI));
	ASSERT(! bp->b_invalid);
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_flags = B_BUSY|B_AGE|B_S54K;
	bp->b_bcount = S54KSIZE;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;

	bp->b_dev = (dev_t)NODEV;
	spin_unlock(&s54kbio_lock);
	return(bp);
}

/*
 * Wait for I/O completion on the buffer; return errors
 * to the user.
 */
s54kiowait(bp)
register struct buf *bp;
{
	atom_inc(&syswait.iowait);
	spin_lock(&s54kbio_lock);
	while ((bp->b_flags & B_DONE) == 0)
		mfs_sleep(bp, PRIBIO, &s54kbio_lock);
	spin_unlock(&s54kbio_lock);
	atom_dec(&syswait.iowait);
	geterror(bp);
}

/*
 * Zero the core associated with a buffer.
 */
s54kclrbuf(bp)
register struct buf *bp;
{
	ASSERT(bp->b_bcount != 0);
	bzero(bp->b_un.b_words, BSIZE);
	bp->b_resid = 0;
}

/*
 * Invalidate blocks for a dev after last close.
 */
s54kbinval(dev)
register dev_t dev;
{
	register struct buf *dp;
	register struct buf *bp;
	register i;

	dev = notminored(dev);
	spin_lock(&s54kbio_lock);
	for (i = 0; i < n4khbuf; i++) {
		dp = (struct buf *)&s54khbuf[i];
		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
			if (bp->b_dev == dev) {
				bp->b_flags |= B_STALE|B_AGE;
				bp->b_invalid = 0;
			}
	}
	spin_unlock(&s54kbio_lock);
}

/*
 * make sure all write-behind blocks
 * on dev (or NODEV for all)
 * are flushed out.
 * (from umount and update)
 */
s54kbflush(dev)
register dev_t	dev;
{
	register buf_t	*bp;
	register buf_t	*head_bp;
	register buf_t	*next_bp;
	register buf_t	*bdirty;
	register 	count;
	buf_t		*m_bp;

	count = 0;
	bdirty = &b4kdirtylist;
 	dev = notminored(dev);
	m_bp = s54kgeteblk();

	spin_lock(&s54kbio_lock);

	if (dev == NODEV) {
		for (bp = bdirty->av_forw; bp != bdirty; bp = bp->av_forw) {
			bp->b_flags &=~(B_READ|B_DONE|B_ERROR|B_DELWRI);
			bp->b_flags |=  B_ASYNC | B_BUSY;
			count++;
		}
		if ((head_bp = bdirty->av_forw) != bdirty)
			bdirty->av_back->av_forw = NULL;
		else
			head_bp = NULL;

		bdirty->av_forw = &b4kdirtylist;
		bdirty->av_back = &b4kdirtylist;
	}
	else {
		head_bp = NULL;
		for (bp = bdirty->av_forw; bp != bdirty;) {
			next_bp = bp->av_forw;
			if (dev == bp->b_dev) {
				bp->b_flags |= B_ASYNC | B_BUSY;
				bp->b_flags &=~(B_READ|B_DONE|B_ERROR|B_DELWRI);
				bp->av_back->av_forw = next_bp;
				next_bp->av_back = bp->av_back;
				count++;
				bp->av_forw = head_bp;
				head_bp = bp;
			}
			bp = next_bp;
		}
	}
	bdirty->b_bcount -= count;
	ASSERT(b4kdirtylist.b_bcount >= 0);
	s54kbasyncnt += count;
	spin_unlock(&s54kbio_lock);

	for (bp = head_bp; bp;) {
		next_bp = bp->av_forw;
		if (bp->b_invalid)
			s54kmerge_partial(bp, m_bp);
		bp->b_dev = notminored(bp->b_dev);
		(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
		bp = next_bp;
	}
	s54kbrelse(m_bp);
	atom_add(&sysinfo.lwrite, count);
	atom_add(&sysinfo.bwrite, count);
}

static buf_t	*s54kbdflush_buf;

s54kbdflush()
{
	register buf_t	*bp;
	register buf_t	*next_bp;
	buf_t		*s54kget_flush_list();

	/* reserve a buffer for bdflush partial block resolution */
	s54kbdflush_buf = s54kgeteblk();

	while (1) {
		bp = s54kget_flush_list();
		ASSERT(bp);
		while (bp) {
			next_bp = bp->av_forw;
			if (bp->b_invalid)
				s54kmerge_partial(bp, s54kbdflush_buf);
			bp->b_dev = notminored(bp->b_dev);
			(*bdevsw[bmajor(bp->b_dev)].d_strategy)(bp);
			bp = next_bp;
		}
	}
}


/*
 * do_high_water_flush is set by bdwrite and cleared by bdflush
 *
 * bdflush_wait_iodone set by bdflush cleared by iodone_brelse
 *
 */

buf_t	*
s54kget_flush_list()
{
	register buf_t	*ret_list;
	static uint	s54klast_aging_flush_lbolt = 0;
	buf_t		*s54kbuild_bdirty_flush_list();
	buf_t		*s54kbuild_aging_flush_list();

	ret_list = NULL;
	spin_lock(&s54kbio_lock);

	while (1) {
		if (s54kbasyncnt >= tune.t_s54kbasynclo) {
			s54kbdflush_wait_iodone = 1;
			mfs_sleep(s54kbdflush, PRIBIO, &s54kbio_lock);
			continue;
		}
		if ((lbolt - s54klast_aging_flush_lbolt) >= tune.t_bdflushr) {
			if (ret_list = s54kbuild_aging_flush_list())
				break;
			s54klast_aging_flush_lbolt = lbolt;
		}
		if (b4kdirtylist.b_bcount > s54kbdirty_high_water) {
			/* get at most tune.b_asynchi items */
			ret_list =  s54kbuild_bdirty_flush_list();
			ASSERT(ret_list);
			break;
		}
		ASSERT(b4kdirtylist.b_bcount <= s54kbdirty_high_water);

		s54kbdflush_wait_work = 1;
		mfs_sleep(s54kbdflush, PRIBIO, &s54kbio_lock);
	}
	spin_unlock(&s54kbio_lock);
	return(ret_list);
}

buf_t	*
s54kbuild_bdirty_flush_list()
{
	register uint	count;
	register buf_t	*bp;
	buf_t		*bp_list;

	ASSERT(b4kdirtylist.b_bcount >= 0);
	ASSERT(b4kdirtylist.av_forw != &b4kdirtylist);

	bp = b4kdirtylist.av_forw;
	count = min(tune.t_s54kbasynchi, b4kdirtylist.b_bcount);
	b4kdirtylist.b_bcount -= count;
	s54kbasyncnt += count;
	atom_add(&sysinfo.lwrite, count);
	atom_add(&sysinfo.bwrite, count);

	for (; count; bp = bp->av_forw, count--) {
		ASSERT(bp->b_flags & B_DELWRI);
		bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
		bp->b_flags |= (B_ASYNC|B_BUSY);
	}

	/* bp is the first item to be left behind */
	bp_list = b4kdirtylist.av_forw;
	bp->av_back->av_forw = NULL;

	b4kdirtylist.av_forw = bp;
	bp->av_back = &b4kdirtylist;

	return(bp_list);
}

buf_t *
s54kbuild_aging_flush_list()
{
	register buf_t	*bp;
	register buf_t	*head_bp;
	register buf_t	*next_bp;
	register	autoup;
	register	count;

	autoup = v.v_autoup_hz;
	head_bp = NULL;
	count = 0;

	for (bp = b4kdirtylist.av_forw; bp != &b4kdirtylist; bp = next_bp) {
		next_bp = bp->av_forw;

		ASSERT(bp->b_flags & B_DELWRI);

		if ((lbolt - bp->b_start) < autoup)
			continue;

		s54knotavail(bp);
		ASSERT(b4kdirtylist.b_bcount >= 0);
		bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
		bp->b_flags |= B_ASYNC;
		bp->av_forw = head_bp;
		head_bp = bp;
		if (++count >= tune.t_s54kbasynchi)
			break;
	}
	s54kbasyncnt += count;
	atom_add(&sysinfo.lwrite, count);
	atom_add(&sysinfo.bwrite, count);
	return(head_bp);
}

/*
 * Mark I/O complete on a buffer, release it if I/O is asynchronous,
 * and wake up anyone waiting for it.
 */
void
s54kiodone(bp)
register struct buf *bp;
{
	ASSERT(bp->b_flags & B_S54K);

	ASSERT(! bp->b_invalid);
	spin_lock(&s54kbio_lock);

	if (bp->b_driver_flags & B_ERROR) {
		bp->b_driver_flags &= ~B_ERROR;
		bp->b_flags |= B_ERROR;
	}
	bp->b_flags |= B_DONE;

	/* mcm if this io invoked via breada then an error will be ignored */
	if (bp->b_flags&B_ASYNC)
		s54kiodone_brelse(bp);
	else {
		bp->b_want_flag = 0;
		mfs_wakeup_all((caddr_t)bp);
	}
	spin_unlock(&s54kbio_lock);
}

/*
 * wait for asynchronous writes to finish
 */
s54kbdwait()
{
	spin_lock(&s54kbio_lock);
	while (s54kbasyncnt) {
		s54kbasynwait = 1;
		mfs_sleep((caddr_t)&s54kbasyncnt, PRIBIO, &s54kbio_lock);
	}
	spin_unlock(&s54kbio_lock);
}

/*
 * wait for asynchronous writes to finish, with timeout
 */
s54kbdwait_timeout(ticks)
register int	ticks;
{
	register time_t	start = lbolt;
	int		id, setrun();

	spin_lock(&s54kbio_lock);
	while (s54kbasyncnt && lbolt - start < ticks) {
		s54kbasynwait = 1;
		id = timeout(setrun, (caddr_t)u.u_procp,
			ticks - (lbolt - start) + 1);
		mfs_sleep((caddr_t)&s54kbasyncnt, PRIBIO, &s54kbio_lock);
		untimeout(id);
	}
	spin_unlock(&s54kbio_lock);
	return (s54kbasyncnt);
}

/*
 * release the buffer, with no I/O implied.
 */

s54kiodone_brelse(bp)
register struct buf *bp;
{
	register struct buf **backp;
        register struct buf *bfreep;

	ASSERT(! (bp->b_flags & B_DELWRI));
	ASSERT(! bp->b_invalid);
	bfreep = &b4kfreelist;
	/* Put buffer on freelist, at the beginning if B_AGE,
	   otherwise at the end.
	 */
	if (bp->b_flags & B_ERROR) {
		bp->b_flags |= B_STALE|B_AGE;
		bp->b_flags &= ~(B_ERROR|B_DELWRI);
		bp->b_dev = NODEV;
		bp->b_error = 0;
	}

	if (bp->b_flags & B_AGE) {
		backp = &bfreep->av_forw;
		(*backp)->av_back = bp;
		bp->av_forw = *backp;
		*backp = bp;
		bp->av_back = bfreep;
	} else {
		backp = &bfreep->av_back;
		(*backp)->av_forw = bp;
		bp->av_back = *backp;
		*backp = bp;
		bp->av_forw = bfreep;
	}

	bp->b_reltime = lbolt;

	if (bp->b_want_flag) {
		bp->b_want_flag = 0;
		bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE|B_READAHEAD);
		mfs_wakeup_all((caddr_t)bp);
	}
	else {
		bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE|B_READAHEAD);
		if (bfreep->b_flags & BFREELIST_WANTED) {
			bfreep->b_flags &= ~BFREELIST_WANTED;
			mfs_wakeup_all((caddr_t)bfreep);
		}
	}

	if ((bp->b_flags & B_READ) == 0) {
		s54kbasyncnt--;
		if (s54kbdflush_wait_iodone &&
		  (s54kbasyncnt < tune.t_s54kbasynclo)) {
			s54kbdflush_wait_iodone = 0;
			mfs_wakeup_all(s54kbdflush);
		}
		if ((s54kbasyncnt == 0) && s54kbasynwait) {
			s54kbasynwait = 0;
			mfs_wakeup_all(&s54kbasyncnt);
		}
	}
}

/*
 * release the buffer, with no I/O implied.
 */

s54kbrelse(bp)
register struct buf *bp;
{
	register struct buf **backp;
        register struct buf *bfreep;

	/* Put buffer on freelist, at beginning if B_AGE, otherwise at end. */

	ASSERT(bp->b_flags & B_BUSY);
	ASSERT(! (bp->b_flags & B_ASYNC));

	spin_lock(&s54kbio_lock);
	if (bp->b_flags & B_ERROR) {
		bp->b_flags |= B_STALE|B_AGE;
		bp->b_flags &= ~(B_ERROR|B_DELWRI);
		bp->b_dev = NODEV;
		bp->b_error = 0;
	}

	if (bp->b_flags & B_DELWRI) {
		bfreep = &b4kdirtylist;
		bfreep->b_bcount++;
	}
	else
		bfreep = &b4kfreelist;

	if (bp->b_flags & B_AGE) {
		backp = &bfreep->av_forw;
		(*backp)->av_back = bp;
		bp->av_forw = *backp;
		*backp = bp;
		bp->av_back = bfreep;
	} else {
		backp = &bfreep->av_back;
		(*backp)->av_forw = bp;
		bp->av_back = *backp;
		*backp = bp;
		bp->av_forw = bfreep;
	}

	if (bp->b_want_flag) {
		bp->b_want_flag = 0;
		mfs_wakeup_all((caddr_t)bp);
	}
	else  if (! (bp->b_flags & B_DELWRI)) {
		if (bfreep->b_flags & BFREELIST_WANTED) {
			bfreep->b_flags &= ~BFREELIST_WANTED;
			mfs_wakeup_all((caddr_t)bfreep);
		}
	}
	bp->b_flags &= ~(B_BUSY|B_ASYNC|B_AGE|B_READAHEAD);
	bp->b_reltime = lbolt;
	spin_unlock(&s54kbio_lock);
}

extern struct buf bufhdrs[1];

/*
 * Initialize the buffer I/O system by freeing
 * all buffers and setting all device hash buffer lists to empty.
 */
s54kbinit()
{
	register struct buf	*bp;
	register struct buf	*dp;
	register unsigned	i;
	register char		*pbuffer;

	dp = &b4kdirtylist;
	dp->b_forw = dp->b_back =
	    dp->av_forw = dp->av_back = dp;

 	if (tune.t_s54kbdirtypct == 0)
 		s54kbdirty_high_water = 1;
 	else if ((tune.t_s54kbdirtypct > 0) && (tune.t_s54kbdirtypct <= 100)) {
 		s54kbdirty_high_water = (s54knbuf * tune.t_s54kbdirtypct) / 100;
 		if (s54kbdirty_high_water == 0)
 			s54kbdirty_high_water = 1;
 	}
 	else
 		s54kbdirty_high_water = s54knbuf;
 

	dp = &b4kfreelist;
	dp->b_forw = dp->b_back =
	    dp->av_forw = dp->av_back = dp;

	bp = bufhdrs4k; 

	pbuffer = buffers4k;

	for (i = 0; i < s54knbuf; i++, bp++, pbuffer += S54KSIZE) {

		bp->b_dev = NODEV;
		bp->b_un.b_addr = pbuffer;
		bp->b_back = dp;
		bp->b_forw = dp->b_forw;
		dp->b_forw->b_back = bp;
		dp->b_forw = bp;
		bp->b_flags = B_BUSY|B_S54K;
		bp->b_bcount = S54KSIZE;
		bp->b_iodone = s54kiodone;
		s54kbrelse(bp);
	}

	for (i = 0; i < n4khbuf; i++)
		s54khbuf[i].b_forw = s54khbuf[i].b_back = 
			(struct buf *)&s54khbuf[i];
}
