/*	START NEW ARIX SCCS HEADER			*/
/*							*/
/*	@(#) mirror.c: version 25.1 created on 11/27/91 at 14:58:46	*/
/*							*/
/*	Copyright (c) 1990 by Arix Corporation		*/
/*	All Rights Reserved				*/
/*							*/
#ident	"@(#)mirror.c	25.1	11/27/91 Copyright (c) 1990 by Arix Corporation"
/*							*/
/*	END NEW ARIX SCCS HEADER			*/
/*							*/
/*
 * mirror.c -- mirror slice driver
 */

/*
 * JPC	4/90	Major rewrite to avoid system crash and to fix long known bugs
 */

#include "sys/types.h"
#include "sys/sysmacros.h"
#include "sys/user.h"
#include "sys/errno.h"
#include "sys/conf.h"
#include "sys/var.h"
#include "sys/buf.h"
#include "sys/vd.h"
#include "sys/mfs.h"
#include "sys/iobuf.h"
#include "sys/ioctl.h"
#include "sys/debug.h"
#include "sys/synch.h"
#include "sys/open.h"
#include "sys/file.h"
#include "sys/erec.h"
#include "sys/cmn_err.h"

#ifdef SECON
#define MR_PERM_CK	auth_md
#else
#define MR_PERM_CK	suser
#endif

#define N_CP_BLKS	256
#define N_CP_PAGES	dtop(N_CP_BLKS)

#define b_first		b_resid		/* be careful only to set b_resid
					 * after being finished with b_first
					 */
#define b_second	av_back
#define b_kern_bp	b_back		/* this field should only be accessed
					 * in the mirrorbuf pool, where there
					 * is no other use for b_back
					 */

#define io_waitbuf	io_s1		/* # of buffer requests that waited */
#define io_total	io_s2		/* total # of buffer requests */

#define B_FIRST(bp)	((buf_t *)((bp)->b_first))	/* do b_first cast */

static struct iobuf	mrtab;		/* freelist and request queue	*/
static spin_lock_t	mrfreelist_sem;	/* free list lock		*/
static spin_lock_t	mr_iodone_sem;	/* iodone lock			*/
static spin_lock_t	mr_update_lock;	/* error update lock		*/
static suspend_lock_t	mr_open_lock = SUSPEND_INIT(PZERO + 6);
static uint		mr_conflict;	/* copy vs. i/o conflict count	*/
static mirror_t		*update_head, *update_tail;
static buf_t		*last_mr_buf;	/* pointer to last mrbufhdr	*/
static mirror_t		*last_mr_data;	/* pointer to last mr_data	*/

void			mriodone();
buf_t			*getmrbuf();
dev_t 			full_dev();
mirror_t		*find_mir(), *new_mirror();

extern dev_t		rootdev;
extern struct mr_devsw	mr_devsw[];
extern buf_t		mrbufhdrs[];
extern uint		num_mr_bufs;

extern uint		add_chain(), remove_chain();
extern buf_t		*reverse_bp_list();
extern buf_t		*get_physbuf();
extern void		rel_physbuf();
extern ulong		atom_inc_nz();

/*****************************************************************************
When a device driver has its mirrorable option turned on in its master file,
all of its entries in bdevsw and cdevsw are replaced by calls to mirror
functions.  (Actually, there may be a layer of upkern stubs between the
[bc]devsw tables and the mirror stub functions.)

Mropen, mrclose, mrread, mrwrite, and mrioctl are called from the stubs
automatically generated by the config utility.  The stub functions were needed
to provide the major device information, because open, close, read, write and
ioctl functions are only called with a minor device number.  The full_dev
function calculates the full device number, which is checked to see if
mirroring is turned on.  If mirroring is off, we look up the original function
in mr_devsw and call it with the appropriate arguments.  So, all mirrorable
devices have the added overhead of checking mirror status, as well as the cost
of the mirror stub call to mrxxxxx and the call to the actual driver.

Mrstrategy receives a buffer pointer containing a full device number, so it
does not need a stub function.
*****************************************************************************/

/*
 * mropen -- open a possibly mirrored device
 *
 * Arguments:
 *  firstmajor	-- the first major number in the device's sequence
 *  minordev	-- the minor device number
 *  flag	-- file access flags
 *  type	-- open type (see sys/open.h)
 */

mropen(firstmajor, minordev, flag, type)
dev_t	minordev;
int	flag;
int	type;
{
	register mirror_t	*m;
	register dev_t		fdev;

	if (!(fdev = full_dev(firstmajor, minordev))) {
		u.u_error = ENXIO;
		return;
	}

	suspend_lock(&mr_open_lock);
	if (mr_shine(fdev)) { /* shine through to regular driver */
		suspend_unlock(&mr_open_lock);
		(*mr_devsw[major(fdev)].m_open)(minor(fdev), flag, type);
		return;
	}

	if (mr_dev_closed(fdev)) {
		if (!(m = find_mir(fdev))) {
			suspend_unlock(&mr_open_lock);
			(*mr_devsw[major(fdev)].m_open)(minor(fdev), flag,type);
			return;
		}
	} else {
		m = mr_addr(fdev);
	}
	ASSERT(m >= mr_data && m <= last_mr_data);

	suspend_lock(&m->m_sem);
	suspend_unlock(&mr_open_lock);
	(void)ot_openchk(&m->m_opentyp, flag, type);
	suspend_unlock(&m->m_sem);
}


/*
 * mrclose -- close a possibly mirrored device
 *
 *	See the comment above mropen for details.
 *
 * Arguments:
 *  firstmajor	-- the first major number in the device's sequence
 *  minordev	-- the minor device number
 *  flag	-- file access flags
 *  type	-- open type (see sys/open.h)
 */

mrclose(firstmajor, minordev, flag, type)
dev_t	minordev;
int	flag, type;
{
	register mirror_t	*m;
	register dev_t		fdev;

	if (!(fdev = full_dev(firstmajor, minordev))) {
		if (u.u_error == 0)
			u.u_error = ENXIO;
		return;
	}

	suspend_lock(&mr_open_lock);
	if (mr_shine(fdev)) {
		suspend_unlock(&mr_open_lock);
		(*mr_devsw[major(fdev)].m_close)(minor(fdev), flag , type);
		return;
	}

	if (mr_dev_closed(fdev)) {
		if (!(m = find_mir(fdev))) {
			suspend_unlock(&mr_open_lock);
			(*mr_devsw[major(fdev)].m_close)(minor(fdev), flag,
			  type);
			return;
		}
	}
	else
		m = mr_addr(fdev);

	ASSERT(m >= mr_data && m <= last_mr_data);

	suspend_lock(&m->m_sem);
	suspend_unlock(&mr_open_lock);
	ot_closechk(&m->m_opentyp, type);
	suspend_unlock(&m->m_sem);
}


/*
 * mrprint -- print an error message for a possibly mirrored device
 *
 *	See the comment above mropen for details.
 *
 * Arguments:
 *  maj		-- the first major number in the device's sequence
 *  dev		-- a possibly full, possibly minor device number
 *  str		-- message string
 */

mrprint(maj, dev, str)
int	maj;
dev_t	dev;
char	*str;
{
	int	(*prfunc)();

	if (!(dev & UNMINOR_BIT) && (dev = full_dev(maj, dev)) == 0)
		cmn_err(CE_WARN,
		  "mrprint: device not mirrored! maj=%d, dev=0x%x, str=%s",
		  maj, dev, str);
	else if (mr_shine(dev) && (prfunc = mr_devsw[major(dev)].m_print))
		(*prfunc)(dev, str);
	else
		cmn_err(CE_WARN, "%s on mirrored device 0x%x",
		  str, denotminored(dev));
}

/*
 * mr_name -- returns the name of a given device
 */

static char *
mr_name(m, dev)
mirror_t	*m;
dev_t		dev;
{
	if (dev == m->m_pri)
		return ("primary");
	else if (dev == m->m_sec)
		return ("secondary");
	return ("?????");
}

/*
 * mrstrategy -- start I/O to a possibly mirrored device
 *
 *	Mrstrategy tries to start a read to one side of a mirror and to
 *	start a write to both sides of a mirror.  If not enough mirror
 *	buffers are available, the kern_bp will be queued until some are
 *	freed.  The mirror I/O count is incremented when a bp enters the
 *	mirror code; it will be decremented in mriodone when the request
 *	is completely done.  If the mirror is in the process of being
 *	deleted, the increment will fail, and the bp will be sent to the
 *	primary side strategy routine.
 *
 * Arguments:
 *  kern_bp	-- the kernel buffer pointer
 */

void
mrstrategy(kern_bp)
register buf_t	*kern_bp;
{
	register buf_t		*driver_bp;
	register dev_t		fdev = kern_bp->b_dev;
	register mirror_t	*m;

	ASSERT(fdev & UNMINOR_BIT);	/* use only as directed */

	kern_bp->b_resid = 0;
	kern_bp->b_error = kern_bp->b_driver_flags = 0;

	if (mr_shine(fdev)) {
		(*mr_devsw[major(fdev)].m_strategy)(kern_bp);
		return;			/* no mirror	*/
	}

	if (mr_dev_closed(fdev)) {
		kern_bp->b_driver_flags |= B_ERROR;
		kern_bp->b_error = EBUSY;
		kern_bp->b_resid = kern_bp->b_bcount;
		iodone(kern_bp);
		return;			/* mirror shut down	*/
	}

	m = mr_addr(kern_bp->b_dev);
	ASSERT(m >= mr_data && m <= last_mr_data);

	if (kern_bp->b_blkno >= m->m_size) {
		if (kern_bp->b_blkno > m->m_size ||
		    !(kern_bp->b_flags & B_READ)) {
			kern_bp->b_error = ENXIO;
			kern_bp->b_driver_flags |= B_ERROR;
		}
		kern_bp->b_resid = kern_bp->b_bcount;
		iodone(kern_bp);
		return;			/* end of slice */
	}

	if (!atom_inc_nz(&m->m_iocnt)) {
		(*mr_devsw[major(fdev)].m_strategy)(kern_bp);
		return;			/* no mirror	*/
	}

	kern_bp->b_second = NULL;

	if ((driver_bp = getmrbuf()) == NULL) {
		mrqueue(kern_bp);
		return;
	}
	driver_bp->b_kern_bp = kern_bp;
	kern_bp->b_first = (uint)driver_bp;

					/* writes always get two mirror bufs */
	if (! (kern_bp->b_flags & B_READ)) {
		if ((driver_bp = getmrbuf()) == NULL) {
			mrqueue(kern_bp);
			return;
		}
		kern_bp->b_second = driver_bp;
		driver_bp->b_kern_bp = kern_bp;
	}
	mrsend(kern_bp);
}

/*
 * mrsend -- do mirror I/O
 *		used by mrstrategy, mrrelease, and mr_flush_conflict
 *
 *	Note the extra code for I/O during a copy operation and the
 *	monitored write counts.
 */

static
mrsend(kern_bp)
register buf_t	*kern_bp;
{
	register mirror_t	*m;
	register buf_t		*driver_bp;
	register ulong		dev;
	register int		count;		/* bcount clipped to m_size */
	buf_t			*bp;

	ASSERT(kern_bp->b_dev & UNMINOR_BIT);	/* use only full dev numbers */

	m = mr_addr(kern_bp->b_dev);
	ASSERT(m >= mr_data && m <= last_mr_data);

	if (! mr_open(m)) {
		kern_bp->b_driver_flags |= B_ERROR;
		kern_bp->b_error = ENXIO;
		mrrelease(B_FIRST(kern_bp));
		kern_bp->b_first = 0;
		if (! (kern_bp->b_flags & B_READ)) {
			mrrelease(kern_bp->b_second);
			kern_bp->b_second = 0;
		}
		kern_bp->b_resid = kern_bp->b_bcount;
		iodone(kern_bp);
		return;
	}

	/*
	 * b_blkno was checked against m_size in mrstrategy, so we only
	 * need to check the total transfer against the end of mirror here
	 */
	count = kern_bp->b_blkno + btodt(kern_bp->b_bcount) - m->m_size;
	if (count > 0)
		count = dtob(count);		/* clip transfer */
	else
		count = kern_bp->b_bcount;	/* fits in mirror */

	driver_bp = B_FIRST(kern_bp);
	ASSERT(driver_bp >= mrbufhdrs && driver_bp <= last_mr_buf);

	/* common initializations */

	driver_bp->b_blkno = kern_bp->b_blkno;
	driver_bp->b_un.b_addr = kern_bp->b_un.b_addr;
	driver_bp->b_proc = kern_bp->b_proc;
	driver_bp->b_bcount = count;
	driver_bp->b_error = driver_bp->b_driver_flags = 0;
	driver_bp->b_flags = B_BUSY | (kern_bp->b_flags & B_PHYS);

	/* READ
	 *
	 *  1) copying
	 *	read from source side
	 *  2) valid
	 *	read from side selected by m_ppercent
	 *  3) invalid
	 *	read from good side
	 */

	if (kern_bp->b_flags & B_READ) {

		/* Figure out which side to send the operation to. */

		if (mr_copying(m) || ! mr_valid(m)) {
			dev = m->m_valid_dev;
		}
		else {
			switch ( m->m_ppercent ) {
			case 0:
				dev = m->m_sec;
				break;
			case 100:
				dev = m->m_pri;
				break;
			default:
				dev = mr_side(m, kern_bp->b_blkno);
			}
		}
		driver_bp->b_dev = dev;
		driver_bp->b_flags |= B_READ;

		(*mr_devsw[major(dev)].m_strategy)(driver_bp);
		return;
	}

	/* WRITE:
	 *
	 *  1) copying
	 *	A) below copy zone	-- write to both sides
	 *	B) in copy zone		-- wait until copy done
	 *	C) above copy zone	-- write to source side + copy monitors
	 *  2) valid
	 *	write to both sides
	 *  3) invalid
	 *	write to good side
	 *
	 * If writing only to one side, mrrelease the other mirror buf.
	 */

	if (mr_copying(m)) {
		if (kern_bp->b_blkno + btodt(count) > m->m_cp_blk) {
			if (kern_bp->b_blkno < m->m_cp_blk + N_CP_BLKS) {
				(void)add_chain(&m->m_conf_head, kern_bp,
				  &kern_bp->av_forw);
				atom_inc(&mr_conflict);
				return;				/* case B */
			}
			/* else, case C */
			dev = m->m_flags & MR_MON_IDX;
			if (!atom_inc_nz(&m->m_mon[dev])) {
				cmn_err(CE_PANIC,
				  "mrsend: atom_inc_nz failed on mon %d!",
				  dev);
			}
			kern_bp->b_driver_flags = MKF_MON_FLAG | dev;
			/* copy accounting done */
			driver_bp->b_dev = dev = m->m_valid_dev;
			bp = kern_bp->b_second;
			kern_bp->b_second = NULL;
			(*mr_devsw[major(dev)].m_strategy)(driver_bp);
			mrrelease(bp);
			return;
		}
		/* else, case 1A */
	}

	if (mr_valid(m)) {
		driver_bp->b_dev = dev = m->m_pri;	/* cases 1A, or 2 */
		(*mr_devsw[major(dev)].m_strategy)(driver_bp);

		/* send second request */

		driver_bp = kern_bp->b_second;
		ASSERT(driver_bp >= mrbufhdrs && driver_bp <= last_mr_buf);
		driver_bp->b_dev = dev = m->m_sec;
		driver_bp->b_blkno = kern_bp->b_blkno;
		driver_bp->b_flags = B_WRITE | B_BUSY |
		  (kern_bp->b_flags & B_PHYS);
		driver_bp->b_un.b_addr = kern_bp->b_un.b_addr;
		driver_bp->b_proc = kern_bp->b_proc;
		driver_bp->b_bcount = count;
		driver_bp->b_error = driver_bp->b_driver_flags = 0;
		(*mr_devsw[major(dev)].m_strategy)(driver_bp);
	}
	else {
		driver_bp->b_dev = dev = m->m_valid_dev;	/* case 3 */
		bp = kern_bp->b_second;
		kern_bp->b_second = NULL;
		(*mr_devsw[major(dev)].m_strategy)(driver_bp);
		mrrelease(bp);
	}
}


/*
 * mrread -- read from a possibly mirrored device
 *
 *	See the comment above mropen for details.
 *
 * Arguments:
 *  firstmajor	-- the first major number in the device's sequence
 *  minordev	-- the minor device number
 */

mrread(firstmajor, minordev)
dev_t	minordev;
{
	register mirror_t	*m;
	register dev_t 		 fdev;

	if (!(fdev = full_dev(firstmajor, minordev))) {
		u.u_error = ENXIO;
		return;
	}

	if (mr_shine(fdev)) {
		(*mr_devsw[major(fdev)].m_read)(minor(fdev));
		return;
	}

	if (mr_dev_closed(fdev)) {
		u.u_error = EBUSY;
		return;
	}

	m = mr_addr(fdev);
	ASSERT(m >= mr_data && m <= last_mr_data);

	if (! mr_open(m)) {
		u.u_error = ENXIO;
		return;
	}

	if (physck(m->m_size, B_READ))
		physio(mrstrategy, (buf_t *)NULL, fdev, B_READ);
}

/*
 * mrwrite -- write to a possibly mirrored device
 *
 *	See the comment above mropen for details.
 *
 * Arguments:
 *  firstmajor	-- the first major number in the device's sequence
 *  minordev	-- the minor device number
 */

mrwrite(firstmajor, minordev)
int	firstmajor;
dev_t	minordev;
{
	register mirror_t	*m;
	register dev_t	 	fdev;

	if (!(fdev = full_dev(firstmajor, minordev))) {
		u.u_error = ENXIO;
		return;
	}

	if (mr_shine(fdev)) {
		(*mr_devsw[major(fdev)].m_write)(minor(fdev));
		return;
	}

	if (mr_dev_closed(fdev)) {
		u.u_error = EBUSY;
		return;
	}

	m = mr_addr(fdev);
	ASSERT(m >= mr_data && m <= last_mr_data);

	if (! mr_open(m)) {
		u.u_error = ENXIO;
		return;
	}

	if (physck(m->m_size, B_WRITE))
		physio(mrstrategy, (buf_t *)NULL, fdev, B_WRITE);
}

/*
 * mrioctl -- do ioctl for a possibly mirrored device
 *
 *	See the comment above mropen for details.
 *
 * Arguments:
 *  firstmajor	-- the first major number in the device's sequence
 *  minordev	-- the minor device number
 *  cmd		-- ioctl command
 *  arg		-- ioctl argument
 *  mode	-- modes file was opened with
 */

mrioctl(firstmajor, minordev, cmd, arg, mode)
register dev_t	minordev;
int		cmd, arg, mode;
{
	register mirror_t	*m = NULL;
	register dev_t		fdev;

	if (!(fdev = full_dev(firstmajor, minordev))) {
		u.u_error = ENXIO;
		return;
	}

	if (cmd == WAIT_MIRROR_ERROR) {
		/* Doesn't care about the state of its device at all. */
		/* Yuck!  An 'if' without 'then' code is a Bad Thing, */
		/* but we can't afford to indent an extra level here. */
	}
	else if (mr_dev_closed(fdev)) {
		switch (cmd) {
		case LOCK_MIRROR_DISK:
		case DELETE_MIRROR_DISK:
		case GET_MIRROR_DISK:
		case RE_MIRROR_DISK:
			if ((m = find_mir(fdev)) == NULL) {
				u.u_error = ENODEV;
				return;
			}
			break;
		case GET_VIRT_DISK:	/* allow stripe status when closed */
		case GET_DEV_TYPE:	/* might as well allow these too */
		case GET_LOG_TYPE:
		case GET_DISK_SIZE:
		case GET_CYLINDER_SIZE:
		case GET_TRACK_SIZE:
		case GET_INTERLACE:
		case GET_OPEN_TYPES:
			(*mr_devsw[major(fdev)].m_ioctl)(minor(fdev), cmd, arg,
			  mode);
			return;
		default:
			u.u_error = EBUSY;
			return;
		}
	}
	else if (mr_shine(fdev)) {
		if (cmd < FIRST_MR_IOCTL || cmd > LAST_MR_IOCTL) {
			(*mr_devsw[major(fdev)].m_ioctl)(minor(fdev), cmd, arg,
			  mode);
			return;
		}
		if (cmd != CREATE_MIRROR_DISK && cmd != INIT_MIRROR_DISK) {
			u.u_error = ENXIO;
			return;
		}
	}
	else {
		m = mr_addr(fdev);
		ASSERT(m >= mr_data && m <= last_mr_data);
	}

	switch (cmd) {
	case GET_DEV_TYPE:
		u.u_rval1 = MIRROR_TYPE;
		break;

	case GET_LOG_TYPE:
		u.u_rval1 = LDT_UNIX;	/* unix file system type	*/
		break;

	case GET_DISK_SIZE:
		u.u_rval1 = m->m_size;
		break;

	case GET_CYLINDER_SIZE:
		u.u_rval1 = m->m_cyl_size;
		break;

	case GET_TRACK_SIZE:
		/* return smallest component track size */
		fdev = m->m_pri;
		(*mr_devsw[major(fdev)].m_ioctl)(minor(fdev), GET_TRACK_SIZE,
		  0, 0);
		if (u.u_error)
			break;
		arg = u.u_rval1;
		fdev = m->m_sec;
		(*mr_devsw[major(fdev)].m_ioctl)(minor(fdev), GET_TRACK_SIZE,
		  0, 0);
		if (u.u_error)
			break;
		if (arg < u.u_rval1)
			u.u_rval1 = arg;
		break;

	case GET_INTERLACE:
		fdev = (m->m_ppercent ? m->m_pri : m->m_sec);
		(*mr_devsw[major(fdev)].m_ioctl)(minor(fdev), GET_INTERLACE,
		  0, 0);
		break;

	case GET_OPEN_TYPES:
		u.u_rval1 = m->m_opentyp.ot_all_opens;	/* mirror opens	*/
		break;

		/* this ioctl is used by the mirror util to reinit the	*/
		/* kernel from /etc/mirrortab after a reboot.		*/
	case INIT_MIRROR_DISK:
		start_mirror(fdev, (caddr_t)arg, 1);
		break;

	case CREATE_MIRROR_DISK:
		start_mirror(fdev, (caddr_t)arg, 0);
		break;

	case DELETE_MIRROR_DISK:
		delete_mirror(m, fdev);
		break;

	case WAIT_MIRROR_ERROR:
		spin_lock(&mr_update_lock);
		while ((m = update_head) == NULL)
			mfs_sleep_with_sig_check((caddr_t)&update_head,
			  PSLEP - 1, &mr_update_lock);
		ASSERT(m >= mr_data && m <= last_mr_data);
		if ((update_head = m->m_nxupd) == NULL)
			update_tail = NULL;
		else
			m->m_nxupd = NULL;
		spin_unlock(&mr_update_lock);
		/* fall through! */
	case GET_MIRROR_DISK:
		get_mirror(m, (caddr_t)arg);
		break;

	case LOCK_MIRROR_DISK:
		lock_mirror(m, arg);
		break;

	case RE_MIRROR_DISK:
		mirror_disk(m, arg);
		break;

	case CAUSE_MIRROR_ERROR:
		if (!MR_PERM_CK())
			break;
		spin_lock(&m->m_lock);
		m->m_flags |= (arg & MR_DO_ALL_ERRS);
		spin_unlock(&m->m_lock);
		break;

	case READ_MIRROR_DISK:
		read_disk(m, (mr_read_t *)arg);
		break;

	default:
		(*mr_devsw[major(m->m_pri)].m_ioctl)
			(minor(m->m_pri),cmd, arg, mode);
	}
}


/*
 * mirror_disk -- copy one half of a mirror to the other
 *	The flag from_pri determines which half is copied to which.
 */

static
mirror_disk(m, from_pri)
register mirror_t	*m;
int			from_pri;
{
	register buf_t	*bp;
	register ulong 	from_dev, to_dev;
	register uint	mon;
	int		blockcnt;
	int		blocknum;
	caddr_t		blkbuffer;

	if (! MR_PERM_CK())
		return;

	spin_lock(&m->m_lock);
	if (mr_copying(m)) {
		u.u_error = EBUSY;			/* copy in progress */
		spin_unlock(&m->m_lock);
		return;
	}

	if (from_pri) {
		from_dev = m->m_pri;
		to_dev =  m->m_sec;
	}
	else {
		from_dev = m->m_sec;
		to_dev =  m->m_pri;
	}

	m->m_valid_dev = from_dev;
	m->m_cp_blk = -N_CP_BLKS;
	m->m_mon[0] = MR_ACTIVE;
	m->m_mon[1] = 0;
	m->m_flags &= ~(MR_MON_IDX | MR_ABORT_COPY | MR_DELAYED_ERR);
	m->m_flags |= (MR_VALID | MR_IN_COPY);
	spin_unlock(&m->m_lock);

	blkbuffer = (caddr_t)sptalloc(N_CP_PAGES, (PG_P|PG_CB|PG_R|PG_W), 0, 0);

	if (blkbuffer == NULL) {
		cmn_err(CE_WARN, "mirror_disk: sptalloc failed");
		u.u_error = ENOMEM;
		spin_lock(&m->m_lock);
		m->m_valid_dev = from_dev;
		m->m_flags &= ~(MR_VALID | MR_IN_COPY);
		spin_unlock(&m->m_lock);
		update_mirrortab(m);
		return;
	}

	bp = get_physbuf();
	log_mirror_err(m, (daddr_t)from_pri, MIRROR_COPY);

	delay(MIRROR_COPY_DELAY);	/* wait for previous I/O to complete */

	m->m_cp_blk = 0;		/* can do this 'cuz I set copying */
	blocknum = 0;
	mon = 0;
	while (blocknum < m->m_size && !(m->m_flags & MR_ABORT_COPY)) {
		if ((blockcnt = m->m_size - blocknum) > N_CP_BLKS)
			blockcnt = N_CP_BLKS;

		bp->b_flags = B_BUSY | B_PHYS | B_READ;
		bp->b_error = bp->b_driver_flags = 0;
		bp->b_dev = from_dev;
		bp->b_blkno = blocknum;
		bp->b_bcount = dtob(blockcnt);
		bp->b_proc = NULL;
		bp->b_un.b_addr = blkbuffer;

		/*
		 * wait for previous I/O to complete before doing read
		 */
		spin_lock(&m->m_lock);
		ASSERT(m->m_mon[!mon] == 0);
		m->m_mon[!mon] = MR_ACTIVE;	/* get ready....	  */
		m->m_flags ^= MR_MON_IDX;	/* switch to next monitor */
		atom_and(&m->m_mon[mon], ~MR_ACTIVE); /* stop current one */
		while (m->m_mon[mon])
			mfs_sleep((caddr_t)m->m_mon, PRIBIO, &m->m_lock);
		mon = m->m_flags & MR_MON_IDX;
		spin_unlock(&m->m_lock);

		(*mr_devsw[major(from_dev)].m_strategy)(bp);
		iowait(bp);
		if (u.u_error) {
			cmn_err(CE_WARN,
			  "remirror: read err from %s dev 0x%x, err %d\n",
			  mr_name(m, from_dev), denotminored(from_dev),
			  u.u_error);
			spin_lock(&m->m_lock);
			m->m_flags |= (from_pri ? MR_PRI_ERROR : MR_SEC_ERROR);
			m->m_valid_dev = from_dev;	/* best we can do... */
			m->m_flags &= ~(MR_VALID | MR_IN_COPY | MR_ABORT_COPY |
			  MR_DELAYED_ERR);
			spin_unlock(&m->m_lock);
			goto bp_relse;
		}

		bp->b_flags = B_BUSY | B_PHYS | B_WRITE;
		bp->b_error = bp->b_driver_flags = 0;
		bp->b_dev = to_dev;
		bp->b_blkno = blocknum;
		bp->b_bcount = dtob(blockcnt);
		bp->b_un.b_addr = blkbuffer;

		(*mr_devsw[major(to_dev)].m_strategy)(bp);
		iowait(bp);
		if (u.u_error) {
			cmn_err(CE_WARN,
			  "remirror: write err to %s, dev 0x%x, err %d\n",
			  mr_name(m, to_dev), denotminored(to_dev),bp->b_error);
			spin_lock(&m->m_lock);
			m->m_flags |= (from_pri ? MR_SEC_ERROR : MR_PRI_ERROR );
			m->m_valid_dev = from_dev;
			m->m_flags &= ~(MR_VALID | MR_IN_COPY | MR_ABORT_COPY |
			  MR_DELAYED_ERR);
			spin_unlock(&m->m_lock);
			goto bp_relse;
		}
			/* I can modify m_cp_blk because I'm doing the copy */
		m->m_cp_blk = (blocknum += blockcnt);
		mr_flush_conflict(m);
	}

	spin_lock(&m->m_lock);
	/* MR_ABORT_COPY overrides MR_DELAYED_ERR */
	if (!(m->m_flags & MR_ABORT_COPY)) {
		if (m->m_flags & MR_DELAYED_ERR) {
			m->m_valid_dev = to_dev;
			m->m_flags &= ~MR_VALID;
		}
		else
			m->m_flags &= from_pri ? ~MR_SEC_ERROR : ~MR_PRI_ERROR;
	}
	m->m_flags &= ~(MR_ABORT_COPY | MR_DELAYED_ERR | MR_IN_COPY);
	spin_unlock(&m->m_lock);

bp_relse:
	mr_flush_conflict(m);
	if (blkbuffer)
		sptfree(blkbuffer, N_CP_PAGES, 1);
	if (bp)
		rel_physbuf(bp);
	update_mirrortab(m);
}

/*
 * mr_flush_conflict -- flush the list of writes that conflicted with a copy
 */

static
mr_flush_conflict(m)
register mirror_t	*m;
{
	register buf_t	*bp, *bp_list;

	bp_list = (buf_t *)remove_chain(&m->m_conf_head);

	if (bp_list && bp_list->av_forw)
		bp_list = reverse_bp_list(bp_list);
	while (bp = bp_list) {
		bp_list = bp->av_forw;
		mrsend(bp);
	}
}

/*
 * mrinit -- link mr free list on mrtab.b_forw and init suspend locks
 */

mrinit()
{
	register buf_t		*mbp;
	register mirror_t	*m;
	register int		i;

	mbp = mrbufhdrs;
	last_mr_buf = &mbp[num_mr_bufs - 1];

	for (i = num_mr_bufs; --i >= 0; mbp++) {
		mbp->b_dev = NODEV;
		mbp->b_iodone = mriodone;
		mbp->b_forw = mrtab.b_forw;
		mrtab.b_forw = mbp;
	}

	last_mr_data = &mr_data[max_mr_data - 1];

	for (i = max_mr_data, m = mr_data; --i >= 0; m++)
		m->m_sem.s_priority = PZERO;
}

/*
 * mriodone
 *
 *	Iodone that gets called first for all mirror buffer header pointers.
 *
 * Paramter:
 *
 *	Buffer header pointer (from the mirror driver pool)
 *
 * No return value.
 *
 */

static void
mriodone(driver_bp)
register buf_t	*driver_bp;
{
	register mirror_t	*m;
	register buf_t		*kern_bp, *bp;
	register uint		n;

	ASSERT(driver_bp >= mrbufhdrs && driver_bp <= last_mr_buf);

	kern_bp = driver_bp->b_kern_bp;
	m = mr_addr(kern_bp->b_dev);
	ASSERT(m >= mr_data && m <= last_mr_data);

	/*
	 * if simulating an error, and the right kind of bp is done, then
	 * set the error flags, and set errno to EDOM, an error that should
	 * never happen in a system call
	 */
	if (m->m_flags & MR_DO_ALL_ERRS) {
		n = (driver_bp->b_dev == m->m_pri);
		n = (driver_bp->b_flags & B_READ) ?
		  (n ? MR_DO_RP_ERR : MR_DO_RS_ERR) :
		  (n ? MR_DO_WP_ERR : MR_DO_WS_ERR);
		if (m->m_flags & n) {
			spin_lock(&m->m_lock);
			if (m->m_flags & n) {
				m->m_flags &= ~n;
				driver_bp->b_driver_flags |= B_ERROR;
				if (driver_bp->b_error == 0)
					driver_bp->b_error = EDOM;
			}
			spin_unlock(&m->m_lock);
		}
	}

	if (driver_bp->b_error) {
		/* which side of the mirror was this error on? */
		n = (driver_bp->b_dev == m->m_pri);

		spin_lock(&m->m_lock);
		m->m_flags |= (n ? MR_PRI_ERROR : MR_SEC_ERROR);
		if (mr_valid(m)) {
			if (mr_copying(m)) {
				/*
				 * Error while copying.  Note that valid_dev is
				 * source device.
				 *
				 * side | blk vs cp_blk | do
				 * ------------------------------------------
				 * src	|	>	| abort copy, invalid
				 * src	|	<	| delayed err, valid
				 * dest |   don't care  | abort copy, invalid
				 */
				if (!n ||
				  (driver_bp->b_blkno +
				   btodt(driver_bp->b_bcount) > m->m_cp_blk)) {
					m->m_flags &= ~MR_VALID;
					m->m_flags |= MR_ABORT_COPY;
				}
				else
					m->m_flags |= MR_DELAYED_ERR;
			}
			else {
				m->m_valid_dev = m->m_devs[n];
				m->m_flags &= ~MR_VALID;
			}
			log_mirror_err(m, driver_bp->b_blkno, MIRROR_INVALID);
		}
		spin_unlock(&m->m_lock);
		update_mirrortab(m);
	}

	/* READ */

	if (driver_bp->b_flags & B_READ) {
		if (driver_bp->b_error) {
			if (mr_read_error(kern_bp, driver_bp, m))
				return;		/* reissued read */
		}
		kern_bp->b_error = driver_bp->b_error;
		kern_bp->b_driver_flags = (driver_bp->b_driver_flags & B_ERROR);
		kern_bp->b_first = 0;
		kern_bp->b_second = 0;
		kern_bp->b_resid = kern_bp->b_bcount -
		  driver_bp->b_bcount + driver_bp->b_resid;
		mrrelease(driver_bp);
		iodone(kern_bp);
		goto done;
	}

	/* WRITE, don't call iodone until both driver_bp's are done */

	bp = (driver_bp == B_FIRST(kern_bp) ? kern_bp->b_second :
	  B_FIRST(kern_bp));

	spin_lock(&mr_iodone_sem);

	if (bp && ! (bp->b_flags & B_DONE)) {
		driver_bp->b_flags |= B_DONE;
		spin_unlock(&mr_iodone_sem);
		return;				/* wait for other write */
	}

	spin_unlock(&mr_iodone_sem);

	if (driver_bp->b_error || (bp && bp->b_error))
		mr_write_error(driver_bp, bp, m);

	n = kern_bp->b_bcount - driver_bp->b_bcount + driver_bp->b_resid;
	kern_bp->b_error = driver_bp->b_error;
	kern_bp->b_driver_flags |= (driver_bp->b_driver_flags & B_ERROR);
	mrrelease(B_FIRST(kern_bp));
	mrrelease(kern_bp->b_second);
	kern_bp->b_first = 0;
	kern_bp->b_second = 0;
	kern_bp->b_resid = n;
	iodone(kern_bp);

	/*
	 * if this was a monitored write, decrement the appropriate counter
	 */
	if ((kern_bp->b_driver_flags & MKF_MON_FLAG) &&
	  atom_dec(&m->m_mon[kern_bp->b_driver_flags & MR_MON_IDX]) == 1) {
		spin_lock(&m->m_lock);	/* wakeup when all I/O done */
		mfs_wakeup((caddr_t)m->m_mon);
		spin_unlock(&m->m_lock);
	}

done:
	/*
	 * decrement the I/O count, and wake if waiting for all I/O to finish
	 */
	if (atom_dec(&m->m_iocnt) == 1) {
		spin_lock(&m->m_lock);
		mfs_wakeup((caddr_t)&m->m_iocnt);
		spin_unlock(&m->m_lock);
	}
}

/*
 * mr_read_error -- process a read error, returns non-zero if read reissued
 */

static
mr_read_error(kern_bp, driver_bp, m)
register buf_t		*kern_bp;
register buf_t		*driver_bp;
register mirror_t	*m;
{
	uint	is_pri = (driver_bp->b_dev == m->m_pri);
	uint	is_first = (driver_bp == B_FIRST(kern_bp));
	uint	is_phys = (kern_bp->b_flags & B_PHYS);

	log_mirror_err(m, kern_bp->b_blkno,
	  (is_pri ? READ_PRI_ERROR : READ_SEC_ERROR) |
	  (is_phys ? RAW_IO_ERROR : BLOCK_IO_ERROR));

	cmn_err(CE_WARN,
	  "mirror: %s %s read error: %s dev 0x%x, errno %d,",
	  (is_first ? "single" : "double"), (is_phys ? "raw" : "block"),
	  mr_name(m, driver_bp->b_dev), denotminored(driver_bp->b_dev),
	  driver_bp->b_error);
	if (driver_bp->b_bcount > dtob(1))
		cmn_err(CE_CONT, "         block %d to %d\n",
		  driver_bp->b_blkno,
		  driver_bp->b_blkno + btodt(driver_bp->b_bcount) - 1);
	else
		cmn_err(CE_CONT, "         block %d\n",
		  driver_bp->b_blkno);

	/* READ ERROR:
	 *
	 * 1) first error
	 *	if dev != valid_dev then
	 *		A) reissue read to good side
	 *	else if copying and blk <= cp_blk
	 *		B) reissue to dest side
	 *	else
	 *		C) pass error back
	 * 2) second error
	 *	pass error back
	 */

	if (driver_bp == B_FIRST(kern_bp)) {
		if (driver_bp->b_dev != m->m_valid_dev) {
			/* send read to other side -- case 1A */
			kern_bp->b_first = 0;
			kern_bp->b_second = driver_bp;
			driver_bp->b_dev = m->m_valid_dev;
			driver_bp->b_error = driver_bp->b_driver_flags = 0;
			driver_bp->b_resid = 0;	/* FIX JPC */

			(*mr_devsw[major(driver_bp->b_dev)].m_strategy)
			  (driver_bp);
			return(1);				/* reissued */
		}
		else if (mr_copying(m) &&
		  (driver_bp->b_blkno + btodt(driver_bp->b_bcount) <=
		  m->m_cp_blk)) {
			/* send read to dest side -- case 1B */
			kern_bp->b_first = 0;
			kern_bp->b_second = driver_bp;
			driver_bp->b_dev = (is_pri ? m->m_sec : m->m_pri);
			driver_bp->b_error = driver_bp->b_driver_flags = 0;
			driver_bp->b_resid = 0;	/* FIX JPC */

			(*mr_devsw[major(driver_bp->b_dev)].m_strategy)
			  (driver_bp);
			return(1);				/* reissued */
		}
		/* else 1C */
	}
	/* else, case 2 */

	return(0);						/* finished */
}

/*
 * mr_write_error -- process one or more write errors
 */

static
mr_write_error(driver_bp, bp, m)
register buf_t		*driver_bp, *bp;
register mirror_t	*m;
{
	register buf_t	*errbp;
	register uint	errcode;

	errbp = NULL;
	errcode = (driver_bp->b_flags & B_PHYS) ? RAW_IO_ERROR : BLOCK_IO_ERROR;

	if (driver_bp->b_error) {
		errbp = driver_bp;
		errcode |= (driver_bp->b_dev == m->m_pri ? WRITE_PRI_ERROR :
		  WRITE_SEC_ERROR);
	}

	if (bp && bp->b_error) {
		errbp = bp;
		errcode |= (bp->b_dev == m->m_pri ? WRITE_PRI_ERROR :
		  WRITE_SEC_ERROR);
	}

	ASSERT(errbp);

	log_mirror_err(m, driver_bp->b_blkno, errcode);

	cmn_err(CE_WARN,
	  "mirror: %s %s write error: %s dev 0x%x, errno %d",
	  ((errcode & DOUBLE_WRITE_ERROR) != DOUBLE_WRITE_ERROR ? "single" :
	    "double"),
	  ((driver_bp->b_flags & B_PHYS) ? "raw" : "block"),
	  mr_name(m, errbp->b_dev), denotminored(errbp->b_dev), errbp->b_error);
	if (errbp->b_bcount > dtob(1))
		cmn_err(CE_CONT, "         block %d to %d\n",
		  errbp->b_blkno, errbp->b_blkno + btodt(errbp->b_bcount) - 1);
	else
		cmn_err(CE_CONT, "         block %d\n", errbp->b_blkno);

	/* WRITE ERROR:
	 *
	 * 1) single error on double write attempt
	 *	if successful write == m_valid_dev
	 *		A) change failure to success
	 *	else if copying
	 *		B) change failure to success (mriodone set err flags)
	 *	else
	 *		C) pass back error
	 * 2) double error or only one write attempt
	 *	pass back error
	 */

	if (bp && (errcode & DOUBLE_WRITE_ERROR) != DOUBLE_WRITE_ERROR) {
		if (errbp->b_dev != m->m_valid_dev || mr_copying(m)) {
			/* cases 1A and 1B */
			if (errbp == driver_bp) {
				driver_bp->b_error = 0;	/* fake success */
				driver_bp->b_driver_flags = 0;
				driver_bp->b_resid = 0;
			}
			/* else, don't need to fake it */
		}
		/* else, case 1C */
		else if (errbp != driver_bp) {
			driver_bp->b_error = bp->b_error; /* fake failure */
			driver_bp->b_driver_flags = bp->b_driver_flags;
			driver_bp->b_resid = bp->b_resid;
		}
	}
	/* else, case 2 */
}


static
mrqueue(kern_bp)
buf_t	*kern_bp;
{

	kern_bp->av_forw = NULL;

	spin_lock(&mrfreelist_sem);

	if (mrtab.b_actl) {
		mrtab.b_actl->av_forw = kern_bp;
		mrtab.b_actl = kern_bp;
	}
	else
		mrtab.b_actf = mrtab.b_actl = kern_bp;

	mrtab.io_waitbuf++;

	spin_unlock(&mrfreelist_sem);
}

static
mrrelease(mbp)
register buf_t	*mbp;
{
	register buf_t	*kern_bp;

	if (mbp == NULL)
		return;

	spin_lock(&mrfreelist_sem);

	mbp->b_error = mbp->b_flags = mbp->b_driver_flags = 0;
	mbp->b_kern_bp = 0;

	/* if there are any requests queued up, send one out with this mbp */
	if ((kern_bp = mrtab.b_actf) != (buf_t *)NULL) {

		mbp->b_kern_bp = kern_bp;

		if (kern_bp == mrtab.b_actl)
			mrtab.b_actf = mrtab.b_actl = 0;
		else {
			mrtab.b_actf = kern_bp->av_forw;
			kern_bp->av_forw = NULL;
		}

		/*
		 * If this is a read then it only needs one mbp. 
		 * If it is a write, then it will need two.
		 */

		if (kern_bp->b_flags & B_READ) {
			spin_unlock(&mrfreelist_sem);
			kern_bp->b_first = (uint)mbp;
			mrsend(kern_bp);
		}
		else if (kern_bp->b_first) {
			kern_bp->b_second = mbp;
			spin_unlock(&mrfreelist_sem);
			mrsend(kern_bp);
		}
		else {
			kern_bp->b_first = (uint)mbp;
			if (mrtab.b_actf) {
				kern_bp->av_forw = mrtab.b_actf;
				mrtab.b_actf = kern_bp;
			}
			else {
				mrtab.b_actf = mrtab.b_actl = kern_bp;
				kern_bp->av_forw = NULL;
			}
			spin_unlock(&mrfreelist_sem);
		}
	}
	else {
		mbp->b_forw = mrtab.b_forw;
		mrtab.b_forw = mbp;
		spin_unlock(&mrfreelist_sem);
	}
}

/*
 * getmrbuf
 *
 *	Return a pointer to a buf struct from the mirror pool.
 */

static buf_t *
getmrbuf()
{
	register buf_t	*mbp;

	spin_lock(&mrfreelist_sem);

	if ((mbp = mrtab.b_forw) == NULL) {
		spin_unlock(&mrfreelist_sem);
		return (NULL);
	}

	mrtab.b_forw = mbp->b_forw;
	mrtab.io_total++;
	spin_unlock(&mrfreelist_sem);

	ASSERT(!mbp->b_kern_bp);
	return(mbp);
}

static mirror_t *
find_mir(fulldev)
register dev_t	fulldev;
{
	register mirror_t	*mp;
	register int		i;

	mp = mr_data;
	for (i = max_mr_data; --i >= 0; mp++) {
		if (mp->m_pri == fulldev)
			return(mp);
	}
	return(0);
}


/*	full_dev returns a full major number given a minor number	*/
/* 	and the first major number of this type of device.		*/
/*	Returns 0 if not found.						*/

dev_t
full_dev(first_major, minordev)
int	first_major;
dev_t	minordev;
{
	register int	i, maj;

	maj = first_major;
	i = minordev >> 8;
	while (--i >= 0) {
		if ((maj = next_major(maj)) < 0)
			return(0);
	}
	return (notminored(makedev(maj, minordev & 0xff)));
}

/*
 * new_mirror -- returns a pointer to a locked, new mirror or NULL
 *		must be holding mr_open_lock
 */

static mirror_t *
new_mirror()
{
	register int		n;
	register mirror_t	*mp;
	static mirror_t		*next_mirror = mr_data;

	mp = next_mirror;
	for (n = max_mr_data; --n >= 0; mp++) {
		if (mp > last_mr_data) {
			mp = mr_data;			/* wrap around */
		}
		if (!(mp->m_flags & MR_INUSE)) {
			spin_lock(&mp->m_lock);
			mp->m_flags = MR_INUSE;
			next_mirror = mp;
			break;
		}
	}

	return (n < 0 ? NULL : mp);
}

/*
 * update_mirrortab -- add the mirror to the end of the update list then
 *			wake up any error daemon
 *
 * Arguments:
 *	m	-- mirror_t pointer that may be unlocked
 *
 * Note that m_nxupd is protected by mr_update_lock, not m->m_lock.
 *
 * Assumes that m_nxupd is always cleared when not in use and that
 * update_tail is cleared when the list is emptyed.
 */

static
update_mirrortab(m)
register mirror_t	*m;
{
	spin_lock(&mr_update_lock);

	/*
	 * add to end of list if not already on it
	 */
	if (m->m_nxupd == NULL && m != update_tail) {
		if (update_head)
			update_tail = (update_tail->m_nxupd = m);
		else
			update_tail = (update_head = m);
	}

	mfs_wakeup((caddr_t)&update_head);
	spin_unlock(&mr_update_lock);
}

/*
 * mr_picky_open -- do opens in order of pickyness (does closes, too)
 *		returns the opentyp_t.ot_all_opens of successful opens/closes
 */

uint
mr_picky_open(op, openfunc, min)
register opentyp_t	*op;
register int		(*openfunc)();
register uint		min;
{
	register int	i;
	register uint	typ, flg;
	int		errnum;
	opentyp_t	did;
	static ushort	picky[] = { OTYP_MNT, OTYP_SWP, OTYP_BLK, OTYP_CHR };

	errnum = u.u_error;
	u.u_error = 0;
	did.ot_all_opens = 0;

	for (i = 0; i < sizeof(picky) / sizeof(*picky); i++) {
		typ = picky[i];
		flg = OTYP_FLG(typ);
		if (op->ot_flag & flg) {
			(*openfunc)(min, FREAD, typ);
			if (u.u_error) {
				if (errnum == 0)
					errnum = u.u_error;
				u.u_error = 0;
			}
			else
				did.ot_flag |= flg;
		}
	}

	for (i = op->ot_lyrcnt; --i >= 0; ) {
		(*openfunc)(min, FREAD, OTYP_LYR);
		if (u.u_error) {
			if (errnum == 0)
				errnum = u.u_error;
			u.u_error = 0;
		}
		else
			did.ot_lyrcnt++;
	}

	if (errnum)
		u.u_error = errnum;

	return (did.ot_all_opens);
}

/*
 * start_mirror -- do INIT_MIRROR_DISK and CREATE_MIRROR_DISK ioctls
 */

static
start_mirror(fdev, arg, initflag)
dev_t	fdev;
caddr_t	arg;
int	initflag;
{
	register mirror_t	*m;
	register mr_devsw_t	*mrdp;
	register uint		dev, fmin;
	register int		i;
	uint			pri_open, sec_open;
	opentyp_t		closes;
	mirror_t		m_set;
	extern int		nodev(), nostrat();

	if (! MR_PERM_CK())
		return;

	if (copyin(arg, &m_set, sizeof(m_set))) {
		u.u_error = EFAULT;
		return;
	}
	/*
	 * we don't initialize root mirrors
	 */
	if (initflag) {
		i = notminored(rootdev);
		if (notminored(m_set.m_pri) == i ||
		  notminored(m_set.m_sec) == i) {
			u.u_error = EBUSY;
			return;
		}
	}

	dev = notminored(m_set.m_sec);
	mrdp = &mr_devsw[major(dev)];
	if (mrdp->m_open == nodev && mrdp->m_strategy == nostrat) {
		cmn_err(CE_NOTE,
		  "Secondary device [0x%x] has not been marked as mirrorable",
		  m_set.m_sec);
		u.u_error = ENODEV;
		return;
	}

	suspend_lock(&mr_open_lock);
	m = mr_addr(fdev);
	if (m >= mr_data && m <= last_mr_data) {
		u.u_error = EBUSY;
		suspend_unlock(&mr_open_lock);
		return;			/* must not have a mirror yet */
	}

	if ((m = new_mirror()) == NULL) {
		u.u_error = ENODEV;
		suspend_unlock(&mr_open_lock);
		return;
	}

	if (!mr_shine(dev)) {
		u.u_error = EBUSY;
		spin_unlock(&m->m_lock);
		suspend_unlock(&mr_open_lock);
		return;			/* secondary already mirrored */
	}

	/* take over the devices immediately */
	mr_tab[fdev & UNMINOR_MASK] = ((m - mr_data) + 1) | MVALID_BIT;
	mr_tab[dev & UNMINOR_MASK] = MR_DEV_CLOSED;

	pri_open = sec_open = 0;
	closes.ot_all_opens = 0;

	/* DO NOT COPY OVER M_LOCK  */
	m->m_flags |= (m_set.m_flags & MR_VALID) | MR_OPEN;
	m->m_cyl_size = m_set.m_cyl_size;
	m->m_ppercent = m_set.m_ppercent;
	m->m_size = m_set.m_size;
	m->m_pri = notminored(m_set.m_pri);
	m->m_sec = dev;
	m->m_valid_dev = notminored(m_set.m_valid_dev);
	m->m_cp_blk = -N_CP_BLKS;
	m->m_iocnt = MR_ACTIVE;

	/*
	 * make sure that the secondary slice isn't already in use
	 */

	spin_unlock(&m->m_lock);
	suspend_lock(&m->m_sem);
	suspend_unlock(&mr_open_lock);
	fmin = minor(dev);
	(*mrdp->m_open)(fmin, FREAD | FWRITE, OTYP_MIR);
	if (u.u_error)
		goto mr_relse;
	sec_open = 1;

	/*
	 * take over all of the primary's opens
	 */
	dev = m->m_pri;
	mrdp = &mr_devsw[major(dev)];
	fmin = minor(dev);
	(*mrdp->m_ioctl)(fmin, GET_OPEN_TYPES, 0);
	if (u.u_error)
		goto mr_relse;
	m->m_opentyp.ot_all_opens = u.u_rval1;
	u.u_rval1 = 0;

	/* open as layer */
	(*mrdp->m_open)(fmin, FREAD, OTYP_LYR);
	if (u.u_error)
		goto mr_relse;
	/* close other types */
	closes.ot_all_opens = mr_picky_open(&m->m_opentyp, mrdp->m_close, fmin);
	if (u.u_error == 0) {
		/* open as mirror */
		(*mrdp->m_open)(fmin, FREAD | FWRITE, OTYP_MIR);
		pri_open = 1;
	}
	/* layer close */
	(*mrdp->m_close)(fmin, FREAD, OTYP_LYR);
	if (u.u_error)
		goto mr_relse;

	log_mirror_err(m, (daddr_t)0, MIRROR_CREATE);

	suspend_unlock(&m->m_sem);
	return;

mr_relse:
	/*
	 * restore mr_tab, then undo stuff in the reverse order
	 */
	mr_tab[fdev & UNMINOR_MASK] = 0;
	mr_tab[m->m_sec & UNMINOR_MASK] = 0;
	/*
	 * reopen closed files, close open files
	 */
	if (closes.ot_all_opens) {
		dev = denotminored(m_set.m_pri);
		(void) mr_picky_open(&closes, mr_devsw[major(dev)].m_open,
		  minor(dev));
	}
	if (pri_open) {
		(*mr_devsw[major(m->m_pri)].m_close)(minor(m->m_pri),
		  FREAD | FWRITE, OTYP_MIR);
	}
	if (sec_open) {
		(*mr_devsw[major(m->m_sec)].m_close)(minor(m->m_sec),
		  FREAD | FWRITE, OTYP_MIR);
	}
	spin_lock(&m->m_lock);
	m->m_flags &= ~MR_INUSE;
	spin_unlock(&m->m_lock);
	suspend_unlock(&m->m_sem);
}

/*
 * delete_mirror -- verify and delete a mirrored disk
 */

delete_mirror(m, fdev)
register mirror_t	*m;
register dev_t		fdev;
{
	register mirror_t	*mp;
	register mr_devsw_t	*mrdp;
	register int		i;
	register uint		fmin;

	if (! MR_PERM_CK())
		return;

	update();				/* sync up */

	/*
	 * clear the active bit and wait for the I/O to complete
	 */
	suspend_lock(&mr_open_lock);
	suspend_lock(&m->m_sem);
	spin_lock(&m->m_lock);
	if (mr_copying(m)) {
		u.u_error = EBUSY;
		spin_unlock(&m->m_lock);
		suspend_unlock(&m->m_sem);
		suspend_unlock(&mr_open_lock);
		return;
	}
	mr_tab[fdev & UNMINOR_MASK] &= ~MVALID_BIT;	/* shine on */
	atom_and(&m->m_iocnt, ~MR_ACTIVE);
	while (m->m_iocnt)
		mfs_sleep((caddr_t)&m->m_iocnt, PRIBIO, &m->m_lock);
	spin_unlock(&m->m_lock);
	suspend_unlock(&mr_open_lock);

	if (mr_open(m)) {			/* close the secondary */
		i = m->m_sec;
		(*mr_devsw[major(i)].m_close)(minor(i), 0, OTYP_MIR);
	}

	/*
	 * pass current opens back to primary driver
	 */
	mrdp = &mr_devsw[major(m->m_pri)];
	fmin = minor(m->m_pri);
	(*mrdp->m_open)(fmin, FREAD, OTYP_LYR);
	if (mr_open(m))
		(*mrdp->m_close)(fmin, 0, OTYP_MIR);	/* mirror close */
	(void) mr_picky_open(&m->m_opentyp, mrdp->m_open, fmin);
	(*mrdp->m_close)(fmin, 0, OTYP_LYR);

	log_mirror_err(m, (daddr_t)0, MIRROR_DELETE);

	/*
	 * if the mirror is on the mirrortab update list, remove it
	 * (we assume the caller will update mirrortab)
	 */
	spin_lock(&mr_update_lock);
	if (m->m_nxupd || m == update_tail) {
		if (m == (mp = update_head))
			update_head = m->m_nxupd;
		else {
			for (; mp; mp = mp->m_nxupd)
				if (mp->m_nxupd == m) {
					mp->m_nxupd = m->m_nxupd;
					break;
				}
			ASSERT(mp);
		}
		if (update_head == NULL)
			update_tail = NULL;
	}
	spin_unlock(&mr_update_lock);

	spin_lock(&m->m_lock);
	mr_tab[m->m_pri & UNMINOR_MASK] = 0;	/* release the devices */
	mr_tab[m->m_sec & UNMINOR_MASK] = 0;
	m->m_flags = 0;
	spin_unlock(&m->m_lock);
	suspend_unlock(&m->m_sem);
}

/*
 * get_mirror -- do the GET_MIRROR_DISK ioctl
 */

static
get_mirror(m, arg)
register mirror_t	*m;
caddr_t			arg;
{
	mirror_t	m_set;

	spin_lock(&m->m_lock);
	m_set = *m;
	spin_unlock(&m->m_lock);
	m_set.m_pri = denotminored(m_set.m_pri);
	m_set.m_sec = denotminored(m_set.m_sec);
	m_set.m_valid_dev = denotminored(m_set.m_valid_dev);
	if (copyout(&m_set, arg, sizeof(m_set)))
		u.u_error = EFAULT;
}

/*
 * lock_mirror -- do the LOCK_MIRROR_DISK ioctl
 */

static
lock_mirror(m, lock_it)
register mirror_t	*m;
int			lock_it;
{
	static suspend_lock_t	lock_lock = SUSPEND_INIT(PZERO - 1);

	if (! MR_PERM_CK())
		return;
	if (m->m_pri == notminored(rootdev)) {
		u.u_error = EBUSY;
		return;
	}
	update();				/* sync up */

	suspend_lock(&lock_lock);
	spin_lock(&m->m_lock);

	if (lock_it) {
		if (mr_copying(m) || !mr_open(m) ||
		    (m->m_opentyp.ot_flag & OFLG_MNT)) {
			u.u_error = EBUSY;
			spin_unlock(&m->m_lock);
			suspend_unlock(&lock_lock);
			return;
		}
		/* wait for all I/O to stop before shutting down */
		atom_and(&m->m_iocnt, ~MR_ACTIVE);
		while (m->m_iocnt)
			mfs_sleep((caddr_t)&m->m_iocnt, PRIBIO, &m->m_lock);
		mr_tab[m->m_pri & UNMINOR_MASK] = MR_DEV_CLOSED;
		mr_tab[m->m_sec & UNMINOR_MASK] = MR_DEV_CLOSED;
		m->m_flags &= ~MR_OPEN;
		spin_unlock(&m->m_lock);
		/* close the devices while locked */
		(*mr_devsw[major(m->m_pri)].m_close)(minor(m->m_pri), FREAD,
		  OTYP_MIR);
		(*mr_devsw[major(m->m_sec)].m_close)(minor(m->m_sec), FREAD,
		  OTYP_MIR);
	}
	else {
		if (mr_open(m)) {
			u.u_error = EBUSY;
			spin_unlock(&m->m_lock);
			suspend_unlock(&lock_lock);
			return;
		}
		spin_unlock(&m->m_lock);
		/* reopen the devices */
		(*mr_devsw[major(m->m_pri)].m_open)(minor(m->m_pri), FREAD,
		  OTYP_MIR);
		if (u.u_error) {
			suspend_unlock(&lock_lock);
			return;
		}
		(*mr_devsw[major(m->m_sec)].m_open)(minor(m->m_sec), FREAD,
		  OTYP_MIR);
		if (u.u_error) {
			(*mr_devsw[major(m->m_pri)].m_close)(minor(m->m_pri),
			  FREAD, OTYP_MIR);
			suspend_unlock(&lock_lock);
			return;
		}
		spin_lock(&m->m_lock);
		ASSERT(m->m_iocnt == 0);
		m->m_iocnt = MR_ACTIVE;
		m->m_flags |= MR_OPEN;
		mr_tab[m->m_pri & UNMINOR_MASK] =
		  ((m - mr_data) + 1) | MVALID_BIT;
		spin_unlock(&m->m_lock);
	}
	suspend_unlock(&lock_lock);
}

/*
 * read_disk -- do the READ_MIRROR_DISK ioctl
 */

static
read_disk(m, arg)
register mirror_t	*m;
register mr_read_t	*arg;
{
	register dev_t	dev;
	int		*sav_uap;

	if (! MR_PERM_CK())
		return;

	u.u_count = arg->count;
	u.u_offset = arg->offset;
	u.u_base = arg->buf;
	u.u_segflg = 0;
	u.u_fmode = FREAD;

	/*
	 * Modifing u.u_ap in a driver is forbidden, but it allows
	 * us to use physck.  Oh well....
	 */
	sav_uap = u.u_ap;
	u.u_ap = (int *)arg;
	dev = (physck(m->m_size, B_READ) == 0);
	u.u_ap = sav_uap;
	if (dev)
		return;

	dev = (arg->rd_pri ? m->m_pri : m->m_sec);
	(*mr_devsw[major(dev)].m_read)(minor(dev));

	u.u_rval1 = arg->count - u.u_count;
}
