/* 
 * Mach Operating System
 * Copyright (c) 1989 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */
/*
 * HISTORY
 * $Log:	afs_vnodeops.c,v $
 * Revision 2.20  90/01/11  14:49:20  berman
 * 	Put the code to hide the setuid and setgid bits under an #ifndef
 * 	AFS_MACH_ENV conditional. We want to see the bits so we can tell
 * 	why execution fails. [jsb]
 * 
 * Revision 2.19  89/10/03  19:21:08  rpd
 * 	Added afs_nlinks to the vnode interface.
 * 	Removed calls to unix_master/unix_release; afs_page_read and
 * 	afs_page_write should already be on the master.
 * 	[89/10/02  15:03:22  rpd]
 * 
 * Revision 2.18  89/08/24  19:35:49  jsb
 * 	Created afs_Wire and afs_IsWired routines. Replace call to
 * 	FetchWholeEnchilada in afs_remove with afs_Wire. Add call in
 * 	afs_page_read to afs_Wire. See comment above afs_Wire and log
 * 	message in afs_cache.c for explanation of wiring.
 * 	[89/08/24  17:11:06  jsb]
 * 
 * Revision 2.17  89/08/09  09:13:14  jsb
 * 	Added support for afs resource pausing. Fixed bug in afs_remove
 * 	which deleted cache files for files that were still being paged from.
 * 	[89/08/09  08:50:18  jsb]
 * 
 * Revision 2.16  89/08/02  08:00:50  jsb
 * 	Fixed bug in setpag.
 * 	[89/08/01  11:17:45  jsb]
 * 
 * 	Use osi_Zalloc in place of osi_Alloc or AFS_KALLOC whereever possible.
 * 	Added afs_freefid function. Corrected conditional for non-AFS_GATEWAY
 * 	case. Added afs zone initialization.
 * 	[89/07/31  18:50:54  jsb]
 * 
 * Revision 2.15  89/06/24  23:58:58  jsb
 * 	Newer ITC sources.
 * 	[89/06/24  23:41:52  jsb]
 * 
 * Revision 2.14  89/06/12  14:50:38  jsb
 * 	Added new afs_page_write routine. Removed huge delay in setpag.
 * 	[89/06/12  10:41:20  jsb]
 * 
 * Revision 2.13  89/06/03  15:29:41  jsb
 * 	Changed directory reading routines to use a smaller version of
 * 	struct direct (one without a 256-byte d_name field). Created
 * 	afs_read from afs_rdwr which will allow afs_bread to return real
 * 	buffers when possible. Changed afs_bread to call afs_read and
 * 	also to allocate special memory for fake buffers if on a sun.
 * 	Added fix to afs_create to check for write permission.
 * 	[89/06/02  14:50:36  jsb]
 * 
 * 	Merged with newer ITC sources, which includes code to prefetch entire
 * 	file on deletion if it is still open. Many changes for gateway
 * 	support.  Fixed afs_read1dir.  Added afs_bread and afs_brelse (so that
 * 	nfs/nfs_server.c does not need to be modified). Made afs_page_read
 * 	print and return correct error code.
 * 	[89/05/26  19:30:29  jsb]
 * 
 * Revision 2.12  89/05/30  10:29:57  rvb
 * 	Moved index() out of here.
 * 	[89/04/20            af]
 * 
 * Revision 2.11  89/05/11  14:39:47  gm0w
 * 	Added afs_read1dir that only places one entry in the buffer.
 * 	Fixed bug in old afs_readdir code that was missing a DRelease
 * 	when an io error occurred.
 * 	[89/05/11            gm0w]
 * 
 * Revision 2.10  89/05/01  18:00:21  rpd
 * 	Replaced UOM directory reading code with MACH version, which works
 * 	in both nfs and non-nfs cases.
 * 	[89/05/01  17:40:49  jsb]
 * 
 * Revision 2.9  89/04/22  15:15:40  gm0w
 * 	Rewrote afs_readdir_iter() to (1) fix a bug with the wrong
 * 	offset being returned and (2) to make the code smaller,
 * 	faster and more concise.
 * 	[89/04/20            gm0w]
 * 	Updated to RX version.  This is actually a newer module than
 * 	the other RX files to pick up the UMich directory code.
 * 	[89/04/14            gm0w]
 * 
 */
/*
 * P_R_P_Q_# (C) COPYRIGHT IBM CORPORATION 1987, 1988
 * LICENSED MATERIALS - PROPERTY OF IBM
 * REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
 */

#include <afs/param.h>
#include <sys/errno.h>
#include <sys/types.h>
#include <sys/param.h>
#ifdef	AFS_AUX_ENV
#include <sys/mmu.h>
#include <sys/seg.h>
#include <sys/sysmacros.h>
#include <sys/signal.h>
#include <sys/errno.h>
#endif
#if	!defined(AFS_IBM_ENV) || !defined(sys_rt_r3)
#include <sys/time.h>
#endif	AFS_IBM_ENV
#ifdef	AFS_AIX_ENV
#include <sys/vattr.h>
#else
#include <sys/kernel.h>
#endif
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#ifdef	AFS_VFS40
#include <sys/dirent.h>
#else
#include <sys/dir.h>
#endif
#include <sys/user.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/buf.h>
#ifdef	AFS_GFS_ENV
#include <afs/gfs_vfs.h>
#include <afs/gfs_vnode.h>
#else
#ifdef	AFS_MACH_ENV
#include <vfs/vfs.h>
#include <vfs/vnode.h>
#include <sys/inode.h>
#include <mach/vm_param.h>
#include <kern/parallel.h>
#else	AFS_MACH_ENV
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <ufs/inode.h>
#endif	AFS_MACH_ENV
#endif	AFS_GFS_ENV
#include <netinet/in.h>
#include <sys/mbuf.h>
#include <rpc/types.h>
#include <rpc/xdr.h>
#ifdef	AFS_SYSVLOCK
#ifdef	AFS_AIX_ENV
#include <sys/flock.h>	/* fcntl.h is a user-level include in aix */
#else
#include <sys/fcntl.h>
#endif
#endif	AFS_SYSVLOCK

#include <afs/osi.h>
#include <rx/rx.h>

#include <afs/lock.h>
#include <afs/volerrors.h>
#include <afsint/afsint.h>
#include <afs/afs.h>
#include <afs/prs_fs.h>
#include <afs/dir.h>
#ifdef	AFS_AIX_ENV
#include <sys/access.h>
#include <afs/aix_vfs.h>
#endif
#ifdef	AFS_VFS40
#include <sys/mman.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/seg.h>
#include <vm/seg_map.h>
#include <vm/seg_vn.h>
#include <vm/rm.h>
#include <vm/swap.h>
#endif

extern char afs_zeros[AFS_ZEROS];
extern struct osi_dev cacheDev;
extern char *afs_indexFlags;
extern struct vcache *afs_FindVCache();
extern struct afs_lock afs_xvcache;
extern struct vcache *afs_NewVCache();
extern struct DirEntry *dir_GetBlob();
#ifdef	AFS_MINPHYS_ENV
extern int minphys();
#endif	AFS_MINPHYS_ENV
#ifdef	AFS_MACH_ENV
int afs_removeChan = 0;
#endif	AFS_MACH_ENV

int afs_open();
int afs_close();
int afs_rdwr();
#ifdef	AFS_AIX_ENV
int afs_ioctl();
int afs_lockctl();
#else
int afs_xioctl();
#endif
int afs_select();
int afs_getattr();
int afs_setattr();
int afs_access();
int afs_lookup();
int afs_create();
int afs_remove();
int afs_link();
int afs_rename();
int afs_mkdir();
int afs_rmdir();
int afs_readdir();
int afs_symlink();
int afs_readlink();
int afs_fsync();
int afs_inactive();
int afs_lockctl();
#ifdef	AFS_VFS40
int afs_getpage();
int afs_putpage();
int afs_map();
int afs_dump();
int afs_cmp();
int afs_realvp();
#else
int afs_bmap();
int afs_strategy();
#endif
#ifdef	AFS_GATEWAY
int afs_bread();
int afs_brelse();
int afs_fid();
#ifdef	AFS_MACH_ENV
int afs_freefid();
#endif	AFS_MACH_ENV
#endif	AFS_GATEWAY
#ifdef	AFS_MACH_ENV
int afs_page_read();
int afs_page_write();
int afs_read1dir();
int afs_nlinks();
#endif	AFS_MACH_ENV
int afs_badop();
int afs_noop();

#define	SMAR	    20			/* size of a mariner name */
#define	NMAR	    10			/* number of mariner names */
static char marinerNames[NMAR][SMAR];
static struct vcache *marinerVCs[NMAR];
static marinerPtr = 0;			/* pointer to next mariner slot to use */
#ifdef	AFS_MACH_FIX
static unsigned long pagCounter = 0;
#else	AFS_MACH_FIX
static pagCounter = 0;
#endif	AFS_MACH_FIX

struct vnodeops afs_vnodeops = {
	afs_open,
	afs_close,
	afs_rdwr,
#ifdef	AFS_AIX_ENV
	afs_ioctl,
#else
	afs_badop,
#endif
	afs_noop,
	afs_getattr,
	afs_setattr,
	afs_access,
	afs_lookup,
	afs_create,
	afs_remove,
	afs_link,
	afs_rename,
	afs_mkdir,
	afs_rmdir,
	afs_readdir,
	afs_symlink,
	afs_readlink,
	afs_fsync,
	afs_inactive,
#ifdef	AFS_VFS40
	afs_lockctl,
	afs_noop,
	afs_getpage,
	afs_putpage,
	afs_map,
	afs_dump,
	afs_cmp,
	afs_realvp,
#else
	afs_bmap,
	afs_strategy,
#ifdef	AFS_GATEWAY
	afs_bread,
	afs_brelse,
#else	AFS_GATEWAY
	afs_badop,	/* bread */
	afs_badop,	/* brelse */
#endif	AFS_GATEWAY
/* VFS 3.0 doesn't use any additional routines from here down */
#ifdef AFS_VFS32
#ifdef	AFS_SYSVLOCK
	afs_lockctl,
#else
	afs_badop,	/* lock ctrl */
#endif
#ifdef	AFS_GATEWAY
	afs_fid,
#else
	afs_noop,	/* fid */
#endif
#endif	AFS_VFS32
#ifdef	AFS_VFS34
#ifdef	AFS_SYSVLOCK
	afs_lockctl,
#else
	afs_badop,	/* lock ctrl */
#endif
#ifdef	AFS_GATEWAY
	afs_fid,
#else
	afs_noop,	/* fid */
#endif
#endif
#ifdef	AFS_MACH_ENV
	afs_page_read,
	afs_page_write,
	afs_read1dir,
#ifdef	AFS_GATEWAY
	afs_freefid,
#else	AFS_GATEWAY
	afs_noop,
#endif	AFS_GATEWAY
	afs_nlinks,
#else	AFS_MACH_ENV
#ifdef AFS_MINPHYS_ENV
	minphys,
#endif
#endif	AFS_MACH_ENV
#endif
};

#if	(!defined(AFS_GFS_ENV) && !defined(AFS_AIX_ENV))
struct vnodeops *afs_ops = &afs_vnodeops;
#endif	AFS_GFS_ENV

#ifndef	AFS_AIX_ENV
/* vnode file operations, and our own */
extern int vno_rw();
extern int vno_ioctl();
extern int vno_select();
extern int vno_close();
extern int afs_closex();
struct fileops afs_fileops = {
    vno_rw,
    vno_ioctl,
    vno_select,
    afs_closex,
};
#endif	AFS_AIX_ENV

static char fileModeMap[8] = {
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER | PRSFS_WRITE,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER | PRSFS_WRITE,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER | PRSFS_READ,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER | PRSFS_READ,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER | PRSFS_READ | PRSFS_WRITE,
    PRSFS_INSERT | PRSFS_DELETE| PRSFS_ADMINISTER | PRSFS_READ | PRSFS_WRITE
};

afs_InitReq(av, acred)
    register struct vrequest *av;
    struct ucred *acred; {
    register long temp;
#ifdef	AFS_GATEWAY
    av->hostaddr = afs_gateway_get_hostaddr(acred);
    if (av->hostaddr) {
	av->uid = afs_gateway_get_auid(acred);
    } else {
#endif	AFS_GATEWAY
#ifdef	AFS_PAGINCRED
	if (acred->cr_pag) {
	    av->uid = 0x80000000 | acred->cr_pag;
	} else {
	    av->uid = 0x7fffffff & acred->cr_uid;
	}
#else	AFS_PAGINCRED
#ifdef	AFS_AIX_ENV
/* Since groups in aix are somewhat different and it will become messy if we 'ifdef' to much the code below, we'll just write the aix module on its own and beautify it later... */
	register gid_t *tptr = acred->cr_grplst;
	register int ngrps = acred->cr_ngrps;

	av->uid = acred->cr_uid;
	while (ngrps--) {
		temp = *tptr++;
		if (((temp >> 12) & 0xf) == 0xa) {
			av->uid = temp;
			break;
		}
	}
#else
#ifdef AFS_SHORTGID
    register short *tptr = (short *) acred->cr_groups;
#else
    register long *tptr = (long *) acred->cr_groups;
#endif
    register int i;

    av->uid = acred->cr_uid;	/* default, if no pag is set */
    for(i=0; i<NGROUPS; i++) {
	temp = *tptr++;
#ifdef AFS_SHORTGID
	if (((temp>>12) & 0xf) == 0xa){
#else
	if (((temp>>24) & 0xff) == 'A'){
#endif
	    av->uid = temp;
	    break;
	}
    }
#endif	AFS_AIX_ENV
#endif	AFS_PAGINCRED
#ifdef	AFS_GATEWAY
    }
#endif	AFS_GATEWAY
    av->initd = 0;
}

/* we don't send multiple read flocks to the server, but rather just count
    them up ourselves.  Of course, multiple write locks are incompatible.
    
    Note that we should always try to release a lock, even if we have
    a network problem sending the release command through, since often
    a lock is released on a close call, when the user can't retry anyway.
    
    After we remove it from our structure, the lock will no longer be
    kept alive, and the server should time it out within a few minutes.
*/
HandleFlock(avc, acom, areq)
    register struct vcache *avc;
    register struct vrequest *areq;
    register long acom; {
    register struct conn *tc;
    register long code;
    struct AFSVolSync tsync;
    long lockType;

#ifndef	AFS_AIX_ENV
/* NO SUPPORT for aix locking yet; working on it... */
    code = 0;		/* default when we don't make any network calls */
    ObtainWriteLock(&avc->lock);
    if (acom & LOCK_UN) {
	if (avc->flockCount == 0) {
	    ReleaseWriteLock(&avc->lock);
	    return ENOTTY;	    /* no lock held */
	}
	/* unlock the lock */
	if (avc->flockCount > 0) avc->flockCount--;
	else if (avc->flockCount == -1) {
	    afs_StoreAllSegments(avc, areq);	/* fsync file early */
	    avc->flockCount = 0;
	}
	if (avc->flockCount == 0) {
	    do {
		tc = afs_Conn(&avc->fid, areq);
		if (tc) code = RXAFS_ReleaseLock(tc->id, (struct AFSFid *) &avc->fid.Fid, &tsync);
		else code = -1;
	    } while (afs_Analyze(tc, code, &avc->fid, areq));
	}
    }
    else {
	/* set a new lock */
	if ((avc->flockCount != 0 && (acom & LOCK_EX)) || (avc->flockCount < 0 && (acom & LOCK_SH))) {
	    /* incompatible modes, bounce */
	    code = EWOULDBLOCK;
	}
	else {
	    /* compatible here, try to send call to file server */
	    lockType = ((acom & LOCK_EX)? LockWrite : LockRead);
	    do {
		tc = afs_Conn(&avc->fid, areq);
		if (tc) code = RXAFS_SetLock(tc->id, (struct AFSFid *) &avc->fid.Fid, lockType, &tsync);
		else code = -1;
	    } while (afs_Analyze(tc, code, &avc->fid, areq));
	}
	if (code == 0) {
	    if (acom & LOCK_EX) {
		avc->flockCount = -1;
	    }
	    else {
		avc->flockCount++;
	    }
	}
    }
    ReleaseWriteLock(&avc->lock);
    return afs_CheckCode(code, areq);
#endif	AFS_AIX_ENV
}

#ifndef	AFS_PAGINCRED
static AddPag(aval)
    long aval; {
    register int i, foundSlot;
    register long temp;
#ifdef	AFS_AIX_ENV
    /* Do aix's AddPag separately for the time being (soon to optimize this) */
    register gid_t *tptr;
    register int ngrps;

    u.u_cred = crcopy(u.u_cred);
    tptr = u.u_cred->cr_grplst;
    ngrps = u.u_cred->cr_ngrps;
    for (; ngrps--;tptr++) {
	temp = *tptr;
	if (((temp >> 12) & 0xf) == 0xa) {
		*tptr = aval;
		return 0;
	}
    }
    /* Not in the current group's entry; add it by (possibly) extending it */
    entergroup((gid_t)aval);

#else	AFS_AIX_ENV

#ifdef AFS_SHORTGID
    register short *tptr;
#else
    register long *tptr;
#endif

    u.u_cred = crcopy(u.u_cred);
#ifdef	AFS_SHORTGID
    tptr = (short *) u.u_cred->cr_groups;
#else
    tptr = (long *) u.u_cred->cr_groups;
#endif
    foundSlot = 0;
    for(i=0;i<NGROUPS;i++,tptr++) {
	temp = *tptr;
#ifdef AFS_SHORTGID
	temp &= 0xffff;
#endif
	if (temp == NOGROUP) break;
#ifdef AFS_SHORTGID
	if (((temp >> 12) & 0xf) == 0xa) {
#else
	if (((temp >> 24) & 0xff) == 'A') {
#endif
	    foundSlot = 1;
	    break;
	}
    }
    if (i == NGROUPS) {
	/* if no free slots and no pag group slot, we fail. */
	return E2BIG;
    }
    else {
	/* otherwise, there is room, so setup slot properly */
	*tptr = aval;
	if (!foundSlot) {
	    /* maintain proper termination, if there is room */
	    if (i < NGROUPS-1) *(tptr+1) = NOGROUP;
	}
    }
#endif	AFS_AIX_ENV
    return 0;
}
#endif	AFS_PAGINCRED

#ifdef AFS_GETDIRHACK
/* routine for setting f_offset to correct value */
afs_AdvanceFD(afd, aoffset, asize)
    register struct file *afd;
    long aoffset;
    long asize; {
    register struct vcache *tvc;
    tvc = (struct vcache *) afd->f_data;
#ifdef	AFS_MACH_ENV
    if (vType(tvc) == VDIR && tvc->v.v_type == ITYPE_AFS) {
#else	AFS_MACH_ENV
    if (vType(tvc) == VDIR && tvc->v.v_op == afs_ops) {
#endif	AFS_MACH_ENV
	afd->f_offset = aoffset << 4;
    }
    else afd->f_offset += asize;
}
#endif

afs_xsetgroups() {
#ifdef	AFS_PAGINCRED
    setgroups();
#else	AFS_PAGINCRED
    struct vrequest treq;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("in afs_xsetgroups\n");
    afs_InitReq(&treq, u.u_cred);
    setgroups();
#ifdef AFS_SHORTGID
    if (((treq.uid >> 12) & 0xf) == 0xa) {
#else
    if (((treq.uid >> 24) & 0xff) == 'A') {
#endif
	/* we've already done a setpag, so now we redo it */
	AddPag(treq.uid);
    }
#endif	AFS_PAGINCRED
}

    
#ifdef	AFS_AIX_ENV
/* *** INCOMPLETE ***:  For the time being we'll just try to emulate advisory locks (i.e. flock(2)); but full blown support for lockf(2) should happen pretty soon... */
afs_lockctl(avc, lckdat, cmd, acred)
struct vcache *avc;
struct flock *lckdat;
int cmd;
struct ucred *acred;
{
    struct vrequest treq;

#ifdef	notdef
/*  For a day or two just make it return 0 (no effect); FIX IT promptly! */
    afs_InitReq(&treq, acred);
    return(HandleFlock(tvc, fd/*XXXX*/, com, &treq));
#endif
    return 0;
}

#else

#ifdef AFS_SYSVLOCK
#ifdef	AFS_VFS40
afs_lockctl(avc, af, acmd, acred, clid)
int clid;
#else
afs_lockctl(avc, af, acmd, acred)
#endif
struct vcache *avc;
struct flock *af;
int acmd;
struct ucred *acred; {
    struct vrequest treq;
    register long code;
    
    afs_InitReq(&treq, acred);
    if (acmd == F_GETLK) {
	/* return if lock is set.  We don't have a call that tells us if a lock is set
	    so we act a little conservatively, returning "no lock set" if we can't
	    tell */
	if (avc->flockCount == 0)
	    af->l_type = F_UNLCK;
	else if (avc->flockCount < 0)
	    af->l_type = F_WRLCK;
	else af->l_type = F_RDLCK;
	af->l_whence = 0;
	af->l_start = 0;
	af->l_len = 0;	/* to end of file */
	return 0;
    }
    else if (acmd == F_SETLK) {
	if (af->l_whence != 0 || af->l_start != 0 || af->l_len != 0) return EINVAL;
	/* otherwise we can turn this into a whole-file flock */
	if (af->l_type == F_RDLCK) code = LOCK_SH;
	else if (af->l_type == F_WRLCK) code = LOCK_EX;
	else if (af->l_type == F_UNLCK) code = LOCK_UN;
	else return EINVAL; /* unknown lock type */
	return HandleFlock(avc, code, &treq);
    }
    return EINVAL;
}
#endif AFS_SYSVLOCK

afs_xflock () {
    struct a {
	int fd;
	int com;
    } *uap;
    struct file *fd;
    struct vrequest treq;
    struct vcache *tvc;
    int flockDone;
    
    flockDone = 0;
    uap = (struct a *)u.u_ap;

    fd = getf(uap->fd);
    if (!fd) return;

    afs_InitReq(&treq, u.u_cred);
    /* first determine whether this is any sort of vnode */
    if (fd->f_type == DTYPE_VNODE) {
	/* good, this is a vnode; next see if it is an AFS vnode */
	tvc = (struct vcache *) fd->f_data;	/* valid, given a vnode */
#ifdef	AFS_MACH_ENV
	if (tvc->v.v_type == ITYPE_AFS) {
#else	AFS_MACH_ENV
	if (tvc->v.v_op == afs_ops) {
#endif	AFS_MACH_ENV
	    /* This is an AFS vnode, so do the work */
	    u.u_error = HandleFlock(tvc, uap->com, &treq);
	    if (u.u_error || (uap->com & LOCK_UN)) {
		/* gave up lock */
		fd->f_flag &= ~(FEXLOCK | FSHLOCK);
	    }
	    else if (uap->com & LOCK_SH) fd->f_flag |= FSHLOCK;
	    else if (uap->com & LOCK_EX) fd->f_flag |= FEXLOCK;
	    flockDone = 1;
	    fd->f_ops = &afs_fileops;
	}
    }
    if (!flockDone) flock();
    return;
}
#endif	AFS_AIX_ENV

/*
 * Pags are implemented as follows: the set of groups whose long representation
 * is '41XXXXXX' hex are used to represent the pags.  Being a member of such
 * a group means you are authenticated as pag XXXXXX (note that 0x41 == 'A', for
 * Andrew file system).  You are never authenticated as multiple pags at once.
 *
 * The function afs_InitReq takes a credential field and formats the
 * corresponding venus request structure.  The uid field in the vrequest structure
 * is set to the *pag* you are authenticated as, or the uid, if you aren't
 * authenticated with a pag.
 *
 * The basic motivation behind pags is this: just because your unix uid is N doesn't mean
 * that you should have the same privileges as anyone logged in on the machine as user N,
 * since this would enable the superuser on the machine to sneak in and make use of
 * anyone's authentication info, even that which is only accidentally left behind
 * when someone leaves a public workstation.
 *
 * The Andrew file system doesn't use the unix uid for anything except a handle with
 * which to find the actual authentication tokens anyway, so the pag is an alternative
 * handle which is somewhat more secure (although of course not absolutely secure).
*/
static lastPagTime = 0;

afs_setpag () {
#ifdef	AFS_PAGINCRED
    /* 10 msec is good for a full cycle in 250 days */
    osi_Wait(10, (char *) 0);	/* sleep sans sleep handle */
    u.u_cred = crcopy(u.u_cred);
    u.u_cred->cr_pag = 0x80000000 | pagCounter++;
    u.u_error = 0;
#else	AFS_PAGINCRED
    /* 10 sec is 11 days for short, 1940 days for long */
    while (osi_Time() <= lastPagTime + 10) {
	osi_Wait(1000, (char *) 0);	/* sleep sans sleep handle */
    }
    lastPagTime = osi_Time();
#ifdef AFS_SHORTGID
    u.u_error = AddPag((0xa << 12) + (pagCounter++ & 0xfff));
#else
    u.u_error = AddPag(('A' << 24) + (pagCounter++ & 0xffffff));
#endif
#endif	AFS_PAGINCRED
    if (afs_debug & AFSDEB_GENERAL) afs_dp("setpag returning %d\n", u.u_error);
}

afs_AddMarinerName(aname, avc)
    register char *aname;
    register struct vcache *avc; {
    register int i;
    register char *tp;

    i = marinerPtr++;
    if (i >= NMAR) {
	i = 0;
	marinerPtr = 1;
    }
    tp = marinerNames[i];
    strncpy(tp, aname, SMAR);
    tp[SMAR-1] = 0;
    marinerVCs[i] = avc;
    return 0;
}

char *afs_GetMariner(avc)
    register struct vcache *avc; {
    register int i;
    for(i=0; i<NMAR; i++) {
	if (marinerVCs[i] == avc) {
	    return marinerNames[i];
	}
    }
    return "a file";
}

char *afs_index(a, c)
    register char *a, c; {
    register char tc;
    while (tc = *a) {
	if (tc == c) return a;
	else a++;
    }
    return (char *) 0;
}

afs_noop() {
    if (afs_debug & AFSDEB_GENERAL) afs_dp("autofail noop\n");
    return EINVAL;
}

afs_badop() {
    if (afs_debug & AFSDEB_GENERAL) afs_dp("autofail badop\n");
    panic("afs vnodeop");
}

#ifdef	AFS_GATEWAY
/* a freelist of one */
struct buf *afs_bread_freebp = 0;

/*
 *  Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
 *  Thus we can use fake bufs (ie not from the real buffer pool).
 */
afs_bread(vp, lbn, bpp)
	struct vnode *vp;
	daddr_t lbn;
	struct buf **bpp;
{
	int offset, fsbsize, error;
	struct buf *bp;
	struct iovec iov;
	struct uio uio;

	fsbsize = vp->v_vfsp->vfs_bsize;
	offset = lbn * fsbsize;
	if (afs_bread_freebp) {
		bp = afs_bread_freebp;
		afs_bread_freebp = 0;
	} else {
		bp = (struct buf *)osi_Zalloc(afs_buf_zone);
#if	defined(AFS_MACH_ENV) && defined(sun3)
		/* allocate 0fxxxxxx space memory so we can mclgetx it */
		bp->b_un.b_addr = (caddr_t)udpmsgbuf_alloc();
#else
		bp->b_un.b_addr = (caddr_t)osi_Alloc(fsbsize);
#endif
	}
	iov.iov_base = bp->b_un.b_addr;
	iov.iov_len = fsbsize;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_seg = UIO_SYSSPACE;
	uio.uio_offset = offset;
	uio.uio_resid = fsbsize;
	*bpp = 0;
	error = afs_read(vp, &uio, u.u_cred, lbn, bpp);
	if (error) {
		afs_bread_freebp = bp;
		return error;
	}
	if (*bpp) {
		afs_bread_freebp = bp;
	} else {
		*(struct buf **)&bp->b_vp = bp; /* mark as fake */
		*bpp = bp;
	}
	return 0;
}

afs_brelse(vp, bp)
struct vnode *vp;
struct buf *bp;
{
	if ((struct buf *)bp->b_vp != bp) { /* not fake */
		ufs_brelse(bp->b_vp, bp);
	} else if (afs_bread_freebp) {
#if	defined(AFS_MACH_ENV) && defined(sun3)
		udpmsgbuf_free((struct mbuf *)bp->b_un.b_addr);
#else
		osi_Free(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
#endif
		osi_Zfree(afs_buf_zone, bp);
	} else {
		afs_bread_freebp = bp;
	}
}
#endif	AFS_GATEWAY

/* given a vnode ptr, open flags and credentials, open the file */
afs_open(avcp, aflags, acred)
    register struct vcache **avcp;
    long aflags;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;
    register struct vcache *tvc;
    int writing;
    
    afs_InitReq(&treq, acred);
    tvc = *avcp;
    if (afs_debug & AFSDEB_GENERAL) afs_dp("open %x flags %x\n", tvc, aflags);
    code = afs_VerifyVCache(tvc, &treq);
    if (code) goto done;
    if (aflags & (FWRITE | FTRUNC)) writing = 1;
    else writing = 0;
    if (vType(tvc) == VDIR) {
	/* directory */
	if (writing) {
	    code = EISDIR;
	    goto done;
	}
	else {
	    if (!afs_AccessOK(tvc, PRSFS_LOOKUP, &treq)) {
		code = EACCES;
		goto done;
	    }
	}
    }
    else {
	osi_FlushText(tvc); /* only needed to flush text if text locked last time */
	/* normal file or symlink */
	if (writing) {
	    if (!afs_AccessOK(tvc, PRSFS_WRITE, &treq)) {
		/* maybe this is an insert-only dir */
		if (!afs_AccessOK(tvc, PRSFS_INSERT, &treq)) {
		    code = EACCES;
		    goto done;
		}
	    }
	}
	if (!afs_AccessOK(tvc, PRSFS_READ, &treq)) {
	    /* maybe this is an insert-only dir */
	    if (!afs_AccessOK(tvc, PRSFS_INSERT, &treq)) {
		code = EACCES;
		goto done;
	    }
	}
    }
    /* set date on file if open in O_TRUNC mode */
    if (aflags & FTRUNC) {
	/* this fixes touch */
	ObtainWriteLock(&tvc->lock);
	tvc->m.Date = osi_Time();
	ReleaseWriteLock(&tvc->lock);
    }
    ObtainReadLock(&tvc->lock);
    if (writing) tvc->execsOrWriters++;
    tvc->opens++;
    ReleaseReadLock(&tvc->lock);
done:
    return afs_CheckCode(code, &treq);
}

#ifndef	AFS_AIX_ENV
/* We don't need this for AIX since: (1) aix doesn't use fileops and it call close directly intead (where the unlocking should be done) and (2) temporarily, the aix lockf isn't supported yet. 

/* this stupid routine is used to release the flocks held on a particular
    file descriptor.  Sun doesn't pass file descr. info through to the vnode
    layer, and yet we must unlock flocked files on the *appropriate* (not first, as
    in System V) close call.  Thus this code.
    
    How does this code get invoked? The afs FLOCK plugs in the new afs file ops structure
    into any afs file when it gets flocked.  N.B: Intercepting close syscall doesn't trap
    aborts or exit system calls.
*/
afs_closex(afd)
    register struct file *afd; {
    struct vrequest treq;
    register struct vcache *tvc;
    long flags;
    int closeDone;
    long code;
    
    /* setup the credentials */
    afs_InitReq(&treq, u.u_cred);

    closeDone = 0;
    /* we're the last one.  If we're an AFS vnode, clear the flags, close the file and
      release the lock when done.  Otherwise, just let the regular close code work.
      */
    if (afd->f_type == DTYPE_VNODE) {
	tvc = (struct vcache *) afd->f_data;
#ifdef	AFS_MACH_ENV
	if (tvc->v.v_type == ITYPE_AFS) {
#else	AFS_MACH_ENV
	if (tvc->v.v_op == afs_ops) {
#endif	AFS_MACH_ENV
	    VN_HOLD((struct vnode *) tvc);
	    flags = afd->f_flag & (FSHLOCK | FEXLOCK);
	    afd->f_flag &= ~(FSHLOCK | FEXLOCK);
	    code = vno_close(afd);
	    if (flags) HandleFlock(tvc, LOCK_UN, &treq);
	    VN_RELE((struct vnode *) tvc);
	    closeDone = 1;
	}
    }
    /* now, if close not done, do it */
    if (!closeDone) {
	code = vno_close(afd);
    }
    return code;	/* return code from vnode layer */
}
#endif	AFS_AIX_ENV

/* handle any closing cleanup stuff */
#ifdef	AFS_VFS40
afs_close(avc, aflags, count, acred)
int count;
#else
afs_close(avc, aflags, acred)
#endif
    register struct vcache *avc;
    long aflags;
    struct ucred *acred; {
    register long code;
    register struct brequest *tb;
    struct vrequest treq;
    int coredump;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("close file %x flags %x\n", avc, aflags);
#ifdef	AFS_VFS40
    if (count > 1) {
	/* The vfs layer may call this repeatedly with higher "count"; only on the last close (i.e. count = 1) we should actually proceed with the close. */
	return 0;
    }
#endif
    if (aflags == -1) {
	/* special flags means core dump */
	aflags = FWRITE;
	coredump = 1;
    }
    else coredump = 0;
    afs_InitReq(&treq, acred);
    ObtainReadLock(&avc->lock);	/* so write locker never sees opens change */
    avc->opens--;
    ReleaseReadLock(&avc->lock);
    code = 0;
    if (aflags & (FWRITE | FTRUNC)) {
	/* don't do coredumps locally due to stack space limitations at VN_RELE time */
	if (afs_BBusy()	&& !coredump) {
	    /* do it yourself if daemons are all busy */
	    ObtainWriteLock(&avc->lock);
	    avc->execsOrWriters--;
	    if (avc->execsOrWriters == 0) {
		/* put the file back */
		code = afs_StoreAllSegments(avc, &treq);
	    }
	    ReleaseWriteLock(&avc->lock);
	}
	else {
	    /* at least one daemon is idle, so ask it to do the store.
		Also, note that queued routine BStore unlocks this entry */
	    ObtainSharedLock(&avc->lock);
	    tb = afs_BQueue(BOP_STORE, avc, 1, acred->cr_uid);
	    /* sleep waiting for the store to start, then retrieve error code */
	    while ((tb->flags & BUVALID) == 0) {
		tb->flags |= BUWAIT;
#ifdef	AFS_MACH_ENV
		afs_BSleep(tb);
#else
		osi_Sleep(tb);
#endif
	    }
	    code = tb->code;
	    afs_BRelease(tb);
	}

	/* VNOVNODE is "acceptable" error code from close, since
	    may happen when deleting a file on another machine while
	    it is open here. */
#ifdef	AFS_MACH_FIX
	/* EINVAL is also "acceptable" error code from close, since
	    may happen when unlinking a file on this machine while
	    it is open here. */
	if (code == VNOVNODE || code == EINVAL)
#else	AFS_MACH_FIX
	if (code == VNOVNODE)
#endif	AFS_MACH_FIX
	    code = 0;
	
	/* some codes merit specific complaint */
	if (code < 0) {
	    uprintf("afs: failed to store file (network problems)\n");
	}
	else if (code == ENOSPC) {
	    uprintf("afs: failed to store file (partition full)\n");
	}
	else if (code == EDQUOT) {
	    uprintf("afs: failed to store file (over quota)\n");
	}
	else if (code != 0) uprintf("afs: failed to store file (%d)\n", code);

	/* finally, we flush any text pages lying around here; check coredump flag
	  just in case, to minimize stack usage in VN_RELE */
	if (!coredump) {
	    avc->flushDV = 0;
	    osi_FlushText(avc);
	}
    }
    return afs_CheckCode(code, &treq);
}

/* called on writes */
afs_write(avc, auio, aio, acred)
    register struct vcache *avc;
    struct uio *auio;
    int aio;
    struct ucred *acred; {
    long totalLength;
    long transferLength;
    long filePos;
    long startDate;
    long max;
    register struct dcache *tdc;
    long offset, len, error;
    struct uio tuio;
    struct iovec *tvec;  /* again, should have define */
    struct osi_file *tfile;
    register long code;
    struct vrequest treq;

#ifdef	AFS_GATEWAY
    if (avc->execsOrWriters <= 0 && !afs_CredIsLocal(acred)) {
	/* this is an nfs server write */
	error = afs_gateway_linger(avc, acred);
	if (error) {
	    return afs_CheckCode(error, &treq);
	}
    }
#endif	AFS_GATEWAY
    if (avc->execsOrWriters <= 0) {
	/* bozos at Sun don't open core files for writing */
	if (afs_debug & AFSDEB_GENERAL) afs_dp("opening core file\n");
	avc->states |= CCore;
	avc->opens++;		/* make file look open */
	avc->execsOrWriters++;
    }
    startDate = osi_Time();
    afs_InitReq(&treq, acred);
    /* otherwise we read */
    totalLength = auio->afsio_resid;
    filePos = auio->afsio_offset;
    error = 0;
    transferLength = 0;
    if (afs_debug & AFSDEB_GENERAL) afs_dp("W");
    afs_CheckSize(totalLength>>10);	/* totalLength bounds the amount we'll grow this file */
    ObtainWriteLock(&avc->lock);
    if (aio & IO_APPEND) {
	/* append mode, start it at the right spot */
	filePos = auio->afsio_offset = avc->m.Length;
    }
    avc->m.Date	= startDate;	/* avoid counting lock-waiting time in file date (for ranlib) */
    tvec = (struct iovec *) osi_AllocSendSpace();
    while (totalLength > 0) {
	/* read the cached info */
	tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 0);
	if (!tdc) {
	    error = EIO;
	    break;
	}
	afs_indexFlags[tdc->index] |= IFDataMod;    /* so it doesn't disappear */
	if (tdc->f.states & DWriting == 0) {
	    /* don't mark entry as mod if we don't have to */
	    tdc->f.states |= DWriting;
	    tdc->flags |= DFEntryMod;
	}
	tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
	if (!tfile) panic("afswrite open");
	len = totalLength;	/* write this amount by default */
	max = AFS_CHUNKTOSIZE(tdc->f.chunk);	/* max size of this chunk */
	if (max	< len +	offset)	{   /*if we'd go past the end of this chunk */
	    /* it won't all fit in this chunk, so write as much
		as will fit */
	    len = max - offset;
	    if (afs_debug & AFSDEB_GENERAL) afs_dp("write doesn't all fit, chunk size %d, newlen %d\n", max, len);
	}
	/* mung uio structure to be right for this transfer */
	afsio_copy(auio, &tuio, tvec);
	afsio_trim(&tuio, len);
	tuio.afsio_offset = offset;
#ifdef	AFS_MACH_ENV
	do {
	    code = VOP_RDWR(tfile->vnode, &tuio, UIO_WRITE, 0, &osi_cred);
	} while (afs_fspause(0, code));
#else
#ifdef	AFS_AIX_ENV
	code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, (off_t)&offset, &tuio, NULL, NULL, -1);
#else
	code = VOP_RDWR(tfile->vnode, &tuio, UIO_WRITE, 0, &osi_cred);
#endif
#endif
	if (code) {
	    error = code;
	    tdc->f.versionNo = -1;	/* bad data */
	    tdc->flags |= DFEntryMod;
	    osi_Truncate(tfile,	0);	/* fake truncate the segment */
	    afs_AdjustSize(tdc,	0);	/* sets f.chunkSize to 0 */
	    afs_PutDCache(tdc);
	    osi_Close(tfile);
	    break;
	}
	/* otherwise we've written some, fixup length, etc and continue with next seg */
	len = len - tuio.afsio_resid; /* compute amount really transferred */
	afsio_skip(auio, len);	    /* advance auio over data written */
	/* compute new file size */
	if (offset + len > tdc->f.chunkBytes)
	    afs_AdjustSize(tdc, offset+len);
	totalLength -= len;
	transferLength += len;
	filePos += len;
	if (filePos > avc->m.Length)
	    avc->m.Length = filePos;
	osi_Close(tfile);
	afs_PutDCache(tdc);
    }
    ReleaseWriteLock(&avc->lock);
    osi_FreeSendSpace(tvec);
    return afs_CheckCode(error, &treq);
}


/* the read/write entry point */
afs_rdwr(avc, auio, arw, aio, acred)
    register struct vcache *avc;
    struct uio *auio;
    enum uio_rw arw;
    int aio;
    struct ucred *acred; {
#ifdef	AFS_GATEWAY

    /* check for write first */
    if (arw == UIO_WRITE) return afs_write(avc, auio, aio, acred);

#ifdef AFS_GETDIRHACK
    /* for systems that sometimes call read on dirs, intercept appropriately */
    if (vType(avc) == VDIR) {
	auio->afsio_offset >>= 4;   /* avoid block truncation effects in seekdir */
	return afs_readdir(avc, auio, acred);
    }
#endif
    return afs_read(avc, auio, acred, 0, 0);
}

/*
 *  Used by afs_rdwr (in read case) and by afs_bread.
 *  If abpp is nonzero and it is convenient to do so,
 *  we point abpp to a data-containing buffer; otherwise,
 *  we just return data via uio.
 *
 *  Currently, abpp-pointing is not implemented; to implement
 *  it, try to do VOP_BREAD in place of VOP_RDWR, making sure that
 *  bsize is the same, etc.
 */
afs_read(avc, auio, acred, albn, abpp)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred;
    daddr_t albn;
    struct buf **abpp; {
#endif	AFS_GATEWAY
    long totalLength;
    long transferLength;
    long filePos;
    int	needPrefetch;
    register struct dcache *tdc;
    long offset, len, error;
    struct uio tuio;
    struct iovec *tvec;
    struct osi_file *tfile;
    register long code;
    struct vrequest treq;

#ifndef	AFS_GATEWAY
    /* check for write first */
    if (arw == UIO_WRITE) return afs_write(avc, auio, aio, acred);

#ifdef AFS_GETDIRHACK
    /* for systems that sometimes call read on dirs, intercept appropriately */
    if (vType(avc) == VDIR) {
	auio->afsio_offset >>= 4;   /* avoid block truncation effects in seekdir */
	return afs_readdir(avc, auio, acred);
    }
#endif
#endif	AFS_GATEWAY

    /* first read of the data in this vnode, pretend all older ones flushed */
    if (vType(avc) == VREG && avc->flushDV == AFS_MAXDV)
	avc->flushDV = avc->m.DataVersion;
    if (afs_debug & AFSDEB_GENERAL) afs_dp("R");

    /* check that we have the latest status info in the vnode cache */
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(avc, &treq);
    if (code) return code;  /* failed to get it */

    tvec = (struct iovec *) osi_AllocSendSpace();
    totalLength = auio->afsio_resid;
    filePos = auio->afsio_offset;
    error = 0;
    transferLength = 0;
    needPrefetch = 0;	/* true if we should try prefetching the next byte when done */
    ObtainReadLock(&avc->lock);
    while (totalLength > 0) {
	/* read all of the cached info */
	if (filePos >= avc->m.Length) break;	/* all done */
	/* a tricky question: does the presence of the DFFetching flag
	    mean that we're fetching the latest version of the file?  No.
	    The server could update the file as soon as the fetch responsible for
	    the setting of the DFFetching flag completes.
	    
	    However, the presence of the DFFetching flag (visible under a read lock
	    since it is set and cleared only under a write lock) means that we're fetching
	    as good a version as was known to this client at the time of the last call to
	    afs_VerifyVCache, since the latter updates the stat cache's m.DataVersion field
	    under a write lock, and from the time that the DFFetching flag goes on (before
	    the fetch starts), to the time it goes off (after the fetch completes), afs_GetDCache
	    keeps at least a read lock (actually it keeps an S lock) on the cache entry.
	    
	    This means that if the DFFetching flag is set, we can use that data for any reads
	    that must come from the current version of the file (current == m.DataVersion).
	     
	    Another way of looking at this same point is this: if we're fetching some
	    data and then try do an afs_VerifyVCache, the VerifyVCache operation will
	    not complete until after the DFFetching flag is turned off and the dcache entry's
	     f.versionNo field is updated.
	     
	     Note, by the way, that if DFFetching is set, m.DataVersion > f.versionNo (the
	     latter is not updated until after the fetch completes).
	*/
	tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2);
	/* now, first try to start transfer, if we'll need the data.  If data already coming, we don't need to do this, obviously.  Type 2 requests never return a null dcache entry, btw. */
	if (!(tdc->flags & DFFetching) && (avc->m.DataVersion != tdc->f.versionNo)) {
	    /* have cache entry, it is not coming in now, and we'll need new data */
	    if (!afs_BBusy()) {
		/* daemon is not busy */
		tdc->flags |= DFFetchReq;
		afs_BQueue(BOP_FETCH, avc, 0, filePos, (char *) tdc);
		while (tdc->flags & DFFetchReq) {
		    if (afs_debug & AFSDEB_GENERAL) afs_dp("waiting for fetch ack %x\n", tdc);
		    /* don't need waiting flag on this one */
		    ReleaseReadLock(&avc->lock);
		    osi_Sleep(&tdc->validPos);
		    ObtainReadLock(&avc->lock);
		}
	    }
	}
	/* now data may have started flowing in (if DFFetching is on).  If data is now streaming in, then wait for some interesting stuff. */
	while ((tdc->flags & DFFetching) && tdc->validPos <= filePos) {
	    if (afs_debug & AFSDEB_GENERAL) afs_dp("waiting for data %x\n", avc);
	    /* too early: wait for DFFetching flag to vanish, or data to appear */
	    tdc->flags |= DFWaiting;
	    ReleaseReadLock(&avc->lock);
	    osi_Sleep(&tdc->validPos);
	    ObtainReadLock(&avc->lock);
	}
	/* fetching flag gone, data is here, or we never tried (BBusy for instance) */
	if (tdc->flags & DFFetching) {
	    /* still fetching, some new data is here: compute length and offset */
	    offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
	    len = tdc->validPos - filePos;
	}
	else {
	    /* no longer fetching, verify data version (avoid new GetDCache call) */
	    if (avc->m.DataVersion == tdc->f.versionNo) {
		offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
		len = tdc->f.chunkBytes - offset;
	    }
	    else {
		/* don't have current data, so get it below */
		afs_PutDCache(tdc);
		tdc = (struct dcache *) 0;
	    }
	}

	if (!tdc) {
	    ReleaseReadLock(&avc->lock);
	    tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1);
	    ObtainReadLock(&avc->lock);
	}

	if (!tdc) {
	    error = EIO;
	    break;
	}
	if (len	> totalLength) len = totalLength;   /* will read len bytes */
	if (len	<= 0) {	/* shouldn't get here if DFFetching is on */
	    /* read past the end of a chunk, may not be at next chunk yet, and yet
		also not at eof, so may have to supply fake zeros */
	    len	= AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */
	    if (len > totalLength) len = totalLength;	/* and still within xfr request */
	    code = avc->m.Length - offset; /* and still within file */
	    if (len > code) len = code;
	    if (len > AFS_ZEROS) len = sizeof(afs_zeros);   /* and in 0 buffer */
	    afsio_copy(auio, &tuio, tvec);
	    afsio_trim(&tuio, len);
	    code = uiomove(afs_zeros, len, UIO_READ, &tuio);
	    if (code) {
		error = code;
		afs_PutDCache(tdc);
		break;
	    }
	}
	else {
	    /* get the data from the file */
	    tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
	    if (!tfile) panic("afsread open");
	    /* mung uio structure to be right for this transfer */
	    afsio_copy(auio, &tuio, tvec);
	    afsio_trim(&tuio, len);
	    tuio.afsio_offset = offset;
#ifdef	AFS_AIX_ENV
	    code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t)&offset, &tuio, NULL, NULL, -1);
#else
	    code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, &osi_cred);
#endif
	    osi_Close(tfile);
	    if (code) {
		error = code;
		afs_PutDCache(tdc);
		break;
	    }
	}
	/* otherwise we've read some, fixup length, etc and continue with next seg */
	len = len - tuio.afsio_resid; /* compute amount really transferred */
	afsio_skip(auio, len);	    /* update input uio structure */
	totalLength -= len;
	transferLength += len;
	filePos += len;
	/* decide if we should prefetch a afsio_offset+transferLength when done.  Don't start next guy coming in before this block is read in, to avoid overloading client */
	if (!(tdc->flags & (DFNextStarted | DFFetching))) needPrefetch = 1;
	else needPrefetch = 0;
	afs_PutDCache(tdc);
	if (len <= 0) break;	/* surprise eof */
    }
    ReleaseReadLock(&avc->lock);
    /* try to queue prefetch, if needed */
    if (needPrefetch && tdc) {
	offset = auio->afsio_offset-1;	/* last byte read */
	offset = AFS_CHUNK(offset)+1;	/* next chunk we'll need */
	offset = AFS_CHUNKTOBASE(offset);   /* base of next chunk */
	if (offset < avc->m.Length && !afs_BBusy()) {
	    tdc->flags |= DFNextStarted;	/* we've tried to prefetch this guy */
	    afs_BQueue(BOP_FETCH, avc, 0, offset, (char *) 0);
	}
    }
    osi_FreeSendSpace(tvec);
    return afs_CheckCode(error, &treq);
}

/* copy out attributes from cache entry */
afs_CopyOutAttrs(avc, attrs)
    register struct vattr *attrs;
    register struct vcache *avc; {
    register struct volume *tvp;
    register struct cell *tcell;

#ifdef	AFS_MACH_ENV
    attrs->va_mode = vType(avc) | (avc->m.Mode&~VFMT);
#else	AFS_MACH_ENV
    attrs->va_type = vType(avc);
    attrs->va_mode = avc->m.Mode;
#endif	AFS_MACH_ENV
#ifndef	AFS_MACH_ENV
    if (avc->m.Mode & (VSUID|VSGID)) {
	/* setuid or setgid, make sure we're allowed to run them from this cell */
	tcell = afs_GetCell(avc->fid.Cell);
	if (tcell && (tcell->states & CNoSUID))
	    attrs->va_mode &= ~(VSUID|VSGID);
    }
#endif	AFS_MACH_ENV
    attrs->va_uid = avc->m.Owner;
    attrs->va_gid = avc->m.Group;   /* yeah! */
    attrs->va_fsid = 1;
    if (avc->mvstat == 2) {
        tvp = afs_GetVolume(&avc->fid, 0);
	/* The mount point's vnode. */
        if (tvp) {
	    attrs->va_nodeid = tvp->mtpoint.Fid.Vnode + (tvp->mtpoint.Fid.Volume << 16);
	    afs_PutVolume(tvp);
	}
	else attrs->va_nodeid = 0;
    }
    else attrs->va_nodeid = avc->fid.Fid.Vnode + (avc->fid.Fid.Volume << 16);
    attrs->va_nlink = avc->m.LinkCount;
    attrs->va_size = avc->m.Length;
    attrs->va_blocksize = 8192;
    attrs->va_atime.tv_sec = attrs->va_mtime.tv_sec = attrs->va_ctime.tv_sec = avc->m.Date;
    attrs->va_atime.tv_usec = attrs->va_mtime.tv_usec = attrs->va_ctime.tv_usec = 0;
    attrs->va_rdev = 1;
    attrs->va_blocks = (attrs->va_size? ((attrs->va_size + 1023)>>10) << 1 : 1);
    return 0;
}

afs_getattr(avc, attrs, acred)
    register struct vcache *avc;
    register struct vattr *attrs;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("getattr %x\n", avc);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(avc, &treq);
    if (code == 0) {
	osi_FlushText(avc); /* only needed to flush text if text locked last time */
	code = afs_CopyOutAttrs(avc, attrs);
#ifdef	AFS_GATEWAY
	afs_gateway_mutilate_mode(acred, attrs);
#endif	AFS_GATEWAY
    }
    return afs_CheckCode(code, &treq);
}

/* convert a Unix request into a status store request */
afs_VAttrToAS(av, as)
register struct vattr *av;
register struct AFSStoreStatus *as; {
    register int mask;
    mask = 0;
#ifdef	AFS_AIX_ENV
/* Boy, was this machine dependent bogosity hard to swallow????.... */
    if (av->va_mode != -1) {
#else
    if (av->va_mode != ((unsigned short)-1)) {
#endif
	mask |= AFS_SETMODE;
	as->UnixModeBits = av->va_mode & 0xffff;
    }
    if (av->va_gid != -1) {
	mask |= AFS_SETGROUP;
	as->Group = av->va_gid;
    }
    if (av->va_uid != -1) {
	mask |= AFS_SETOWNER;
	as->Owner = av->va_uid;
    }
    if (av->va_mtime.tv_sec != -1) {
	mask = AFS_SETMODTIME;
	as->ClientModTime = av->va_mtime.tv_sec;
    }
    as->Mask = mask;
    return 0;
}

afs_setattr(avc, attrs, acred)
    register struct vcache *avc;
    register struct vattr *attrs;
    struct ucred *acred; {
    struct vrequest treq;
    struct AFSStoreStatus astat;
    register long code;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("setattr %x\n", avc);
    afs_InitReq(&treq, acred);
    if (avc->states & CRO) {
	code=EROFS;
	goto done;
    }
    /* if file size has changed, we need write access, otherwise (e.g. chmod) give it
	a shot; if it fails, we'll discard the status info */
    if (attrs->va_size != 0xffffffff && !afs_AccessOK(avc, PRSFS_WRITE, &treq) && !afs_AccessOK(avc, PRSFS_INSERT, &treq)) {
	code = EACCES;
	goto done;
    }
    afs_VAttrToAS(attrs, &astat);	/* interpret request */
    code = 0;
    if (attrs->va_size != -1) {
	ObtainWriteLock(&avc->lock);
	code = afs_TruncateAllSegments(avc, attrs->va_size, &treq);
	/* if date not explicitly set by this call, set it ourselves, since we changed the data */
	if (!(astat.Mask & AFS_SETMODTIME)) {
	    astat.Mask |= AFS_SETMODTIME;
	    astat.ClientModTime = osi_Time();
	}
	if (code == 0 && avc->execsOrWriters <= 0) {
	    code = afs_StoreAllSegments(avc, &treq);
	}
	ReleaseWriteLock(&avc->lock);
	avc->flushDV = 0;
	osi_FlushText(avc);	/* do this after releasing all locks */
    }
    if (code == 0) {
	ObtainSharedLock(&avc->lock);	/* lock entry */
	code = afs_WriteVCache(avc, &astat, &treq);    /* send request */
	ReleaseSharedLock(&avc->lock);	/* release lock */
    }
    if (code) avc->states &= ~CStatd;	    /* error?  erase any changes we made to vcache entry */
done:
    return afs_CheckCode(code, &treq);
}

afs_access(avc, amode, acred)
    register struct vcache *avc;
    register long amode;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("access %x mode %x\n", avc, amode);
    afs_InitReq(&treq, acred);

    code = afs_VerifyVCache(avc, &treq);
    if (code) return afs_CheckCode(code, &treq);

    /* if we're looking for write access and we have a read-only file system, report it */
    if ((amode & VWRITE) && (avc->states & CRO)) {
	return EROFS;
    }
    code = 1;		/* Default from here on in is access ok. */
    if (vType(avc) == VDIR) {
	if (amode & VEXEC) code = afs_AccessOK(avc, PRSFS_LOOKUP, &treq);
	if (code && (amode & VWRITE)) {
	    code = afs_AccessOK(avc, PRSFS_INSERT, &treq);
	    if (!code) code = afs_AccessOK(avc, PRSFS_DELETE, &treq);
	}
	if (code && (amode & VREAD))
	    code = afs_AccessOK(avc, PRSFS_LOOKUP, &treq);
    }
    else {
	if (amode & VEXEC) {
	    code = afs_AccessOK(avc, PRSFS_READ, &treq);
	    if (code && (avc->m.Mode & 0100) == 0) code = 0;
	}
	if (code && (amode & VWRITE)) code = afs_AccessOK(avc, PRSFS_WRITE, &treq);
	if (code && (amode & VREAD)) code = afs_AccessOK(avc, PRSFS_READ, &treq);
    }
    if (code) return 0;		/* if access is ok */
    else return	afs_CheckCode(EACCES, &treq);		/* failure code */
}

/* call under write lock, evaluate mvid field from a mt pt */
EvalMountPoint(avc, advc, areq)
    register struct vcache *avc;
    struct vcache *advc;	    /* the containing dir */
    register struct vrequest *areq; {
    register long code;
    register struct volume *tvp;
    struct VenusFid tfid;
    struct cell *tcell;
    char *cpos;
    char type;

    if (avc->mvid && (avc->states & CMValid)) return 0;	/* done while racing */
    code = afs_HandleLink(avc, areq);
    if (code) return code;
    /* now link name is inavc->linkData, get appropriate volume info */
    type = avc->linkData[0];
    cpos = afs_index(&avc->linkData[1], ':');
    if (cpos) {
	/* parse cellular mt point */
	*cpos = 0;
	code = 0;
	tcell = afs_GetCellByName(&avc->linkData[1]);
	if (tcell) {
	    tvp = afs_GetVolumeByName(cpos+1, tcell->cell, 1, areq);
	}
	else {
	    code = ENODEV;
	}
	*cpos =	':';	    /* put it back */
	if (code) return code;
    }
    else {
	tvp = afs_GetVolumeByName(&avc->linkData[1], avc->fid.Cell, 1, areq);
    }
    if (!tvp) {
	return ENOENT;
    }
    /* next, check for backups within backups */
    if ((avc->states & VBackup) && (tvp->states & VBackup)) {
	afs_PutVolume(tvp);
	return ELOOP;
    }
    /* don't allocate mvid field until we're sure we have something to put in */
    if ((avc->states & CRO) || cpos) {	/* mt pt in a ro vol, or a cellular mt point */
	if (type == '#' && tvp->roVol) {
	    /* there is a readonly volume, and we should map to it */
	    tfid.Fid.Volume = tvp->roVol;	/* remember volume we really want */
	    tfid.Cell = tvp->cell;
	    afs_PutVolume(tvp); /* release the old volume */
	    tvp = afs_GetVolume(&tfid, areq); /* get the new one */
	    if (!tvp) {
		return ENOENT;    /* oops, can't do it */
	    }
	}
    }
    if (avc->mvid == 0)
	avc->mvid = (struct VenusFid *) osi_Zalloc(afs_VenusFid_zone);
    avc->mvid->Cell = tvp->cell;
    avc->mvid->Fid.Volume = tvp->volume;
    avc->mvid->Fid.Vnode = 1;
    avc->mvid->Fid.Unique = 1;
    avc->states |= CMValid;
    tvp->mtpoint = avc->fid;		/* setup back pointer to mtpoint */
    tvp->dotdot = advc->fid;
    afs_PutVolume(tvp);
    return 0;
}
    
ENameOK(aname)
    register char *aname; {
    register char tc;
    register int tlen;

    tlen = strlen(aname);
    if (tlen >= 4 && strcmp(aname+tlen-4, "@sys") == 0) return 0;
    /* check remainder of name for bogus chars */
    while(tc = (*aname++)) {
	if (tc & 0x80) return 0;
    }
    return 1;
}


/* Sorry Mike but I'm including the following here just until I find the darn strcat() declaration in SunOS/Utlrix kernel (preliminary searches were negative)... */

#if	(!defined(AFS_IBM_ENV) && !defined(AFS_AIX_ENV) && !defined(AFS_MACH_ENV))
char *
strcat(s1, s2)
	register char *s1, *s2;
{
	register char *os1;

	os1 = s1;
	while (*s1++)
		;
	--s1;
	while (*s1++ = *s2++)
		;
	return (os1);
}
#endif	/*(!defined(AFS_IBM_ENV) && !defined(AFS_AIX_ENV) && !defined(AFS_MACH_ENV))*/

#ifdef	AFS_GATEWAY
HandleAtName(aname, aresult, acred)
    register char *aname;
    register char *aresult;
    struct ucred *acred; {
#else	AFS_GATEWAY
HandleAtName(aname, aresult)
    register char *aname;
    register char *aresult; {
#endif	AFS_GATEWAY
    register int tlen;
    tlen = strlen(aname);
    if (tlen >= 4 && strcmp(aname+tlen-4, "@sys")==0) {
	strncpy(aresult, aname, tlen-4);
#ifdef	AFS_GATEWAY
	strcpy(aresult+tlen-4, afs_gateway_get_sysname(acred));
#else	AFS_GATEWAY
	strcpy(aresult+tlen-4, SYS_NAME);
#endif	AFS_GATEWAY
    }
    else strcpy(aresult, aname);
}

afs_lookup(adp, aname, avcp, acred)
    register struct vcache *adp, **avcp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    char *tname;
    register struct vcache *tvc;
    struct vcache *uvc;
    register long code;
    int evalFlag;
    register struct dcache *tdc;
    long dirOffset, dirLen, theDir;
    struct VenusFid tfid;

    /* lookup the name aname in the appropriate dir, and return a cache entry
      on the resulting fid */
    if (afs_debug & AFSDEB_GENERAL) afs_dp("lookup dir %x, name %s\n", adp, aname);
    afs_InitReq(&treq, acred);
    code = strlen(aname);
    if (code >= 4 && strcmp(aname+code-4, "@sys")==0) {
	tname = (char *) osi_AllocSendSpace();
#ifdef	AFS_GATEWAY
	HandleAtName(aname, tname, acred);
#else	AFS_GATEWAY
	HandleAtName(aname, tname);
#endif	AFS_GATEWAY
    }
    else {
	tname = aname;
    }
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    /* watch for ".." in a volume root */
    if (adp->mvstat == 2 && tname[0] == '.' && tname[1] == '.' && tname[2] == 0) {
	/* looking up ".." in root via special hacks */
	if (adp->mvid == (struct VenusFid *) 0 || adp->mvid->Fid.Volume == 0) {
	    if (afs_debug & AFSDEB_GENERAL) afs_dp("afs: punting on '..' call for %x\n", adp);
	    code = ENOTTY;
	    goto done;
	}
	/* otherwise we have the fid here, so we use it */
	tvc = afs_GetVCache(adp->mvid, &treq);
	if (!tvc) {
	    code = ENOENT;
	    goto done;
	}
	else {
	    *avcp = tvc;
	    code = 0;
	    goto done;
	}
    }
    /* now check the access */
    if (!afs_AccessOK(adp, PRSFS_LOOKUP, &treq)) {
	code = EACCES;
	goto done;
    }
    /* now we have to lookup the next fid */
    tdc = afs_GetDCache(adp, 0, &treq, &dirOffset, &dirLen, 1);
    if (!tdc) {
	code = EIO;
	goto done;
    }
    /* now we will just call dir package with appropriate inode.
      Dirs are always fetched in their entirety for now */
    ObtainReadLock(&adp->lock);
    theDir = tdc->f.inode;
    code = dir_Lookup(&theDir, tname, &tfid.Fid);
    ReleaseReadLock(&adp->lock);
    afs_PutDCache(tdc);
    if (code) {
	goto done;
    }
    /* new fid has same cell and volume */
    tfid.Cell = adp->fid.Cell;
    tfid.Fid.Volume = adp->fid.Fid.Volume;
    if (afs_debug & AFSDEB_GENERAL) afs_dp("lookup fid is %x.%x.%x.%x\n", tfid.Cell, tfid.Fid.Volume, tfid.Fid.Vnode, tfid.Fid.Unique);
    /* now get the status info */
    tvc = afs_GetVCache(&tfid, &treq);
    if (tvc) {
	tvc->parentVnode = adp->fid.Fid.Vnode;
	tvc->parentUnique = adp->fid.Fid.Unique;
	if (tvc->mvstat == 1) {
	    if (afs_debug & AFSDEB_GENERAL) afs_dp("crossing mt point %x\n", tvc);
	    /* a mt point, possibly unevaluated */
	    if (tvc->mvid == (struct VenusFid *) 0 || !(tvc->states & CMValid)) {
		if (afs_debug & AFSDEB_GENERAL) afs_dp("trying to eval mt point\n");
		ObtainWriteLock(&tvc->lock);
		code = EvalMountPoint(tvc, adp, &treq);
		if (code) {
		    if (afs_debug & AFSDEB_GENERAL) afs_dp("evalmountpoint error %d\n", code); 
		}
		ReleaseWriteLock(&tvc->lock);
		evalFlag = 1;
	    }
	    else evalFlag = 0;
	    /* next, we want to continue using the target of the mt point */
	    if (tvc->mvid) {
		/* now lookup target, to set .. pointer */
		if (afs_debug & AFSDEB_GENERAL) afs_dp("mount point is to %x.%x.%x.%x\n", tvc->fid.Cell, tvc->fid.Fid.Volume, tvc->fid.Fid.Vnode, tvc->fid.Fid.Unique);
		uvc = tvc;	/* remember for later */
		tvc = afs_GetVCache(tvc->mvid, &treq);
		afs_PutVCache(uvc); /* we're done with it */
		if (!tvc) {
		    code = ENOENT;
		    goto done;
		}
		/* now, if we came via a new mt pt (say because of a new release of a R/O volume), we must reevaluate the .. ptr to point back to the appropriate place */
		if (evalFlag) {
		    ObtainWriteLock(&tvc->lock);
		    if (tvc->mvid == (struct VenusFid *) 0) {
			tvc->mvid = (struct VenusFid *) osi_Zalloc(afs_VenusFid_zone);
		    }
		    /* setup backpointer */
		    *tvc->mvid = adp->fid;
		    ReleaseWriteLock(&tvc->lock);
		}
	    }
	    else {
		if (afs_debug & AFSDEB_GENERAL) afs_dp("failed to find mt pt\n");
		afs_PutVCache(tvc);
		code = ENOENT;
		goto done;
	    }
	}
	*avcp = tvc;
	if (afs_debug & AFSDEB_GENERAL) afs_dp("lookup returning ce %x\n", tvc);
	code = 0;
    }
    else code = ENOENT;
done:
    /* put the network buffer back, if need be */
    if (tname != aname) osi_FreeSendSpace(tname);
    if (code == 0) {
	afs_AddMarinerName(aname, tvc);
    }
    return afs_CheckCode(code, &treq);
}

afs_create(adp, aname, attrs, aexcl, amode, avcp, acred)
    register struct vcache *adp;
    char *aname;
    struct vattr *attrs;
    enum vcexcl aexcl;
    int amode;
    struct vcache **avcp;
    struct ucred *acred; {
    long origCBs, origZaps, finalZaps;
    struct vrequest treq;
    register long code;
    register struct conn *tc;
    struct VenusFid newFid;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
    struct AFSVolSync tsync;
    struct AFSCallBack CallBack;
    long now;
    register struct dcache *tdc;
    long offset, len, host;
    register struct vcache *tvc;

    afs_InitReq(&treq, acred);

    if (afs_debug & AFSDEB_GENERAL) afs_dp("creating file %s in %x with mode %x\n", aname, adp, amode);
    if (!ENameOK(aname)) {
	code = EINVAL;
	goto done;
    }
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;

    tdc = afs_GetDCache(adp, 0, &treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock);
    if (tdc) {
	/* see if file already exists.  If it does, we only set the size attributes (to handle O_TRUNC) */
	code = dir_Lookup(&tdc->f.inode, aname, &newFid.Fid);
	if (code == 0) {
	    afs_PutDCache(tdc);
	    ReleaseWriteLock(&adp->lock);
	    if (aexcl != NONEXCL) {
		code = EEXIST;	    /* file exists in excl mode open */
		goto done;
	    }
	    /* found the file, so use it */
	    newFid.Cell = adp->fid.Cell;
	    newFid.Fid.Volume = adp->fid.Fid.Volume;
	    tvc = afs_GetVCache(&newFid, &treq);
	    if (tvc) {
		len = attrs->va_size;	/* only do the truncate */
		if (len != 0xffffffff) {
		    if (vType(tvc) != VREG) {
			afs_PutVCache(tvc);
			code = EISDIR;
			goto done;
		    }
#ifdef	AFS_GATEWAY
		    if (!afs_AccessOK(tvc, PRSFS_WRITE, &treq)) {
			afs_PutVCache(tvc);
			code = EACCES;
			goto done;
		    }
#endif	AFS_GATEWAY
		    /* do a truncate */
		    VATTR_NULL(attrs);
		    attrs->va_size = len;
		    tvc->execsOrWriters++;	/* pretend we are writing, to avoid store */
		    code = afs_setattr(tvc, attrs, acred);
		    tvc->execsOrWriters--;
		    if (code) {
			afs_PutVCache(tvc);
			goto done;
		    }
		}
		*avcp = tvc;
	    }
	    else code = ENOENT;
	    /* make sure vrefCount bumped only if code == 0 */
	    goto done;
	}
    }

    origCBs = afs_evenCBs;	/* if changes, we don't really have a callback */
    origZaps = afs_evenZaps;	/* number of even numbered vnodes discarded */
    InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE;
    InStatus.ClientModTime = osi_Time();
    InStatus.UnixModeBits = attrs->va_mode & 0xffff;   /* only care about protection bits */
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    host = tc->server->host;	    /* remember for callback processing */
	    now = osi_Time();
	    code = RXAFS_CreateFile(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, &OutFidStatus, &OutDirStatus, &CallBack, &tsync);
	    CallBack.ExpirationTime += now;
	}
	else code = -1;
    } while(afs_Analyze(tc, code, &adp->fid, &treq));
    if (code == EEXIST && aexcl == NONEXCL) {
	/* if we get an EEXIST in nonexcl mode, just do a lookup */
	if (tdc) afs_PutDCache(tdc);
	ReleaseWriteLock(&adp->lock);
	code = afs_lookup(adp, aname, avcp, acred);
	goto done;
    }
    if (code) {
	if (code < 0) adp->states &= ~CStatd;
	ReleaseWriteLock(&adp->lock);
	if (tdc) afs_PutDCache(tdc);
	goto done;
    }
    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &newFid.Fid);
	    if (code) {
		tdc->f.versionNo = -1;	/* surprise error -- use invalid value */
		/* DFEntryMod set by local hero */
	    }
	}
	afs_PutDCache(tdc);
    }
    newFid.Cell = adp->fid.Cell;
    newFid.Fid.Volume = adp->fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);

    /* New tricky optimistic callback handling algorithm for file creation works
	as follows.  We create the file essentially with no locks set at all.  File
	server may thus handle operations from others cache managers as well as from
	this very own cache manager that reference the file in question before
	we managed to create the cache entry.  However, if anyone else changes
	any of the status information for a file, we'll see afs_evenCBs increase
	(files always have even fids).  If someone on this workstation manages
	to do something to the file, they'll end up having to create a cache
	entry for the new file.  Either we'll find it once we've got the afs_xvcache
	lock set, or it was also *deleted* the vnode before we got there, in which case
	we will find afs_evenZaps has changed, too.  Thus, we only assume we have the right
	status information if no callbacks or vnode removals have occurred to even
	numbered files from the time the call started until the time that we got the xvcache
	lock set.  Of course, this also assumes that any call that modifies a file first
	gets a write lock on the file's vnode, but if that weren't true, the whole cache manager
	would fail, since no call would be able to update the local vnode status after modifying
	a file on a file server. */
    ObtainWriteLock(&afs_xvcache);
    finalZaps =	afs_evenZaps;	    /* do this before calling newvcache */
    if (!(tvc = afs_FindVCache(&newFid))) {
	tvc = afs_NewVCache(&newFid, host);
	if (tvc) {
	    /* add the callback in */
	    if (origCBs == afs_evenCBs && origZaps == finalZaps) {
		tvc->states |= CStatd;  /* we've fake entire thing, so don't stat */
		tvc->cbExpires = CallBack.ExpirationTime;
	    }
	    else tvc->callback = 0;
	    afs_ProcessFS(tvc, &OutFidStatus);
	    *avcp = tvc;
	    code = 0;
	}
	else code = ENOENT;
    }
    else {
	/* otherwise cache entry already exists, someone else must have created  */
	tvc->states &= ~CStatd;	/* don't need write lock to *clear* this flag */
	*avcp = tvc;
	code = 0;
    }
    ReleaseWriteLock(&afs_xvcache);
done:
    if (code == 0) {
	afs_AddMarinerName(aname, *avcp);
	/* return the new status in vattr (barf) */
	afs_CopyOutAttrs(*avcp, attrs);
    }
    return afs_CheckCode(code, &treq);
}

/*
 * Check to see if we can track the change locally: requires that
 * we have sufficiently recent info in data cache.  If so, we
 * know the new DataVersion number, and place it correctly in both the
 * data and stat cache entries.  This routine returns 1 if we should
 * do the operation locally, and 0 otherwise.
 *
 * This routine must be called with the stat cache entry write-locked.
 */
afs_LocalHero(avc, adc, astat, aincr)
    register struct vcache *avc;
    register AFSFetchStatus *astat;
    register struct dcache *adc;
    register int aincr; {
    register long avers;

    avers = astat->DataVersion;
    /* this *is* the version number, no matter what */
    avc->m.DataVersion = avers;
    avc->m.Length = astat->Length;
    avc->m.Date = astat->ClientModTime;
    adc->flags |= DFEntryMod;
    if (avers == adc->f.versionNo + aincr) {
	/* we've been tracking things correctly */
	adc->f.versionNo = avers;
	return 1;
    }
    else {
	adc->f.versionNo = -1;
	return 0;
    }
}

/*
 *  Wire down file in cache: prefetch all data, and turn on CWired flag
 *  so that callbacks/callback expirations are (temporarily) ignored
 *  and cache file(s) are kept in cache. File will be unwired when
 *  afs_inactive is called (ie no one has VN_HOLD on vnode), or when
 *  afs_IsWired notices that the file is no longer Active.
 */
static afs_Wire(avc, areq)
struct vrequest *areq;
register struct vcache *avc; {
    register long nextChunk, pos;
    register struct dcache *tdc;
    long offset, len;

    if (avc->states & CWired) return;	/* already wired down */
    if ((avc->states & CStatd) == 0) return;	/* don't know size */
    for(nextChunk=0;nextChunk<1024;nextChunk++)	{   /* sanity check on N chunks */
	pos = AFS_CHUNKTOBASE(nextChunk);
	if (pos	>= avc->m.Length) break;	/* all done */
	tdc = afs_GetDCache(avc, pos, areq, &offset, &len, 0);
	if (!tdc) break;
	afs_PutDCache(tdc);
    }
    avc->states |= CWired;
}

/*
 *  Tests whether file is wired down, after unwiring the file if it
 *  is found to be inactive (ie not open and not being paged from).
 */
afs_IsWired(avc)
    register struct vcache *avc; {
    if (avc->states & CWired) {
	if (osi_Active(avc)) return 1;
	avc->states &= ~CWired;
    }
    return 0;
}

afs_remove(adp, aname, acred)
    register struct vcache *adp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    struct VenusFid unlinkFid;
    register long code;
    register struct conn *tc;
    register struct vcache *tvc;
    long offset, len;
    struct AFSFetchStatus OutDirStatus;
    struct AFSVolSync tsync;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("remove dir %x, name %s\n", adp, aname);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) return afs_CheckCode(code, &treq);
    tdc	= afs_GetDCache(adp, 0,	&treq, &offset,	&len, 1);  /* test for error below */
    ObtainWriteLock(&adp->lock);
    unlinkFid.Fid.Vnode = 0;
    tvc = (struct vcache *) 0;
    if (tdc) {
	code = dir_Lookup(&tdc->f.inode, aname, &unlinkFid.Fid);
	if (code == 0) {
	    unlinkFid.Cell = adp->fid.Cell;
	    unlinkFid.Fid.Volume = adp->fid.Fid.Volume;
	    ObtainReadLock(&afs_xvcache);
	    tvc = afs_FindVCache(&unlinkFid);
	    ReleaseReadLock(&afs_xvcache);
	}
    }
    if (tvc && osi_Active(tvc)) {
	/* about to delete whole file, prefetch it first */
	ReleaseWriteLock(&adp->lock);
	ObtainWriteLock(&tvc->lock);
	afs_Wire(tvc, &treq);
	ReleaseWriteLock(&tvc->lock);
	ObtainWriteLock(&adp->lock);
    }
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    code = RXAFS_RemoveFile(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &OutDirStatus, &tsync);
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (tdc) afs_PutDCache(tdc);
	if (tvc) afs_PutVCache(tvc);
	if (code < 0) adp->states &= ~CStatd;
	ReleaseWriteLock(&adp->lock);
	return afs_CheckCode(code, &treq);
    }
#ifdef	AFS_MACH_ENV
    if (afs_removeChan) osi_Wakeup(&afs_removeChan);
#endif
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) {
	    /* we can do it locally */
	    code = dir_Delete(&tdc->f.inode, aname);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- invalid value */
	    /* DFEntryMod set by local hero */
	}
	afs_PutDCache(tdc);	/* drop ref count */
    }
    ReleaseWriteLock(&adp->lock);
    /* now, get vnode for unlinked dude, and see if we should force it from cache.  adp is
	now the deleted files vnode.  Note that we call FindVCache instead of GetVCache
	since if the file's really gone, we won't be able to fetch the status info anyway.  */
    if (tvc) {
	ObtainWriteLock(&tvc->lock);
	if (osi_Active(tvc) && tvc->m.LinkCount <= 1) {
	    /* file is open, but possibly gone on the server; do the best we can */
	    tvc->m.LinkCount--;
	}
	else {
	    /* link count, ctime are wrong; we don't have correct value */
	    tvc->states &= ~CStatd;
	    if (tvc->m.LinkCount <= 1) afs_TryToSmush(tvc);
	}
	ReleaseWriteLock(&tvc->lock);
	afs_PutVCache(tvc);
    }
    return 0;
}

afs_link(avc, adp, aname, acred)
    register struct vcache *avc;
    register struct vcache *adp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    register long code;
    register struct conn *tc;
    long offset, len;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
    struct AFSVolSync tsync;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("hard link %x in dir %x with name %s\n", avc, adp, aname);
    /* create a hard link; new entry is aname in dir adp */
    afs_InitReq(&treq, acred);
    if (avc->fid.Cell != adp->fid.Cell || avc->fid.Fid.Volume != adp->fid.Fid.Volume) {
	code = EXDEV;
	goto done;
    }
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    tdc	= afs_GetDCache(adp, 0,	&treq, &offset,	&len, 1);  /* test for error below */
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    code = RXAFS_Link(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, (struct AFSFid *) &avc->fid.Fid, &OutFidStatus, &OutDirStatus, &tsync);
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (tdc) afs_PutDCache(tdc);
	if (code < 0) adp->states &= ~CStatd;
	ReleaseWriteLock(&adp->lock);
	goto done;
    }
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &avc->fid.Fid);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- invalid value */
	}
	afs_PutDCache(tdc);	/* drop ref count */
    }
    ReleaseWriteLock(&adp->lock);
    ObtainWriteLock(&avc->lock);    /* correct link count */
    /* we could lock both dir and file; since we get the new fid status back, you'd think
	we could put it in the cache status entry at that point.  Note that if we don't lock the
	file over the rpc call, we have no guarantee that the status info returned in ustat
	is the most recent to store in the file's cache entry */
    avc->states	&= ~CStatd;	/* don't really know new link count */
    ReleaseWriteLock(&avc->lock);
    code = 0;
done:
    return afs_CheckCode(code, &treq);
}

afs_rename(aodp, aname1, andp, aname2, acred)
    register struct vcache *aodp, *andp;
    char *aname1, *aname2;
    struct ucred *acred; {
    struct vrequest treq;
    register struct conn *tc;
    register long code;
    long returnCode;
    int oneDir, doLocally;
    struct VenusFid unlinkFid, fileFid;
    struct vcache *tvc;
    struct dcache *tdc1, *tdc2;
    struct AFSFetchStatus OutOldDirStatus, OutNewDirStatus;
    struct AFSVolSync tsync;

    afs_InitReq(&treq, acred);

    if (afs_debug & AFSDEB_GENERAL) afs_dp("rename d1 %x name %s to d2 %x name %s\n", aodp, aname1, andp, aname2);
    /* verify the latest versions of the stat cache entries */
    code = afs_VerifyVCache(aodp, &treq);
    if (code) goto done;
    code = afs_VerifyVCache(andp, &treq);
    if (code) goto done;
    
    /* lock in appropriate order, after some checks */
    if (aodp->fid.Cell != andp->fid.Cell || aodp->fid.Fid.Volume != andp->fid.Fid.Volume) {
	code = EXDEV;
	goto done;
    }
    oneDir = 0;
    if (andp->fid.Fid.Unique == aodp->fid.Fid.Unique) {
	ObtainWriteLock(&andp->lock);
	oneDir = 1;	    /* only one dude locked */
    }
    else if (andp->fid.Fid.Unique < aodp->fid.Fid.Unique) {
	ObtainWriteLock(&andp->lock);	/* lock smaller one first */
	ObtainWriteLock(&aodp->lock);
    }
    else {
	ObtainWriteLock(&aodp->lock);	/* lock smaller one first */
	ObtainWriteLock(&andp->lock);
    }
    
    /* locks are now set, proceed to do the real work */
    do {
	tc = afs_Conn(&aodp->fid, &treq);
	if (tc) {
	    code = RXAFS_Rename(tc->id, (struct AFSFid *) &aodp->fid.Fid, aname1, (struct AFSFid *) &andp->fid.Fid, aname2, &OutOldDirStatus, &OutNewDirStatus, &tsync);
	} else code = -1;

    } while (afs_Analyze(tc, code, &andp->fid, &treq));
    returnCode = code;	    /* remember for later */
    
    /* Now we try to do things locally.  This is really loathsome code. */
    if (code == 0) {
	tdc1 = tdc2 = 0;
	/* don't use GetDCache because we don't want to worry about what happens if
	    we have to stat the file (updating the stat block) before finishing
	    local hero stuff (which may put old (from rename) data version number
	    back in the cache entry).
	    In any event, we don't really care if the data is not
	    in the cache; if it isn't, we won't do the update locally.  */
	tdc1 = afs_FindDCache(aodp, 0);
	if (!oneDir) tdc2 = afs_FindDCache(andp, 0);
	else tdc2 = tdc1;
	if (tdc1 && tdc2) {
	    /* see if version numbers increased properly */
	    doLocally = 0;
	    unlinkFid.Fid.Vnode = 0;
	    if (oneDir) {
		/* number increases by 1 for whole rename operation */
		if (afs_LocalHero(aodp, tdc1, &OutOldDirStatus, 1)) {
		    doLocally = 1;
		}
	    }
	    else {
		/* two separate dirs, each increasing by 1 */
		if (afs_LocalHero(aodp, tdc1, &OutOldDirStatus, 1) && afs_LocalHero(andp, tdc2, &OutNewDirStatus, 1))
		    doLocally = 1;
		else {
		    /* local hero might not have invalidated both */
		    tdc1->f.versionNo = -1;
		    tdc2->f.versionNo = -1;
		    tdc2->flags	|= DFEntryMod;	/* localhero sets tdc1->DFEntryMod */
		}
	    }
	    /* now really do the work */
	    if (doLocally) {
		/* first lookup the fid of the dude we're moving */
		code = dir_Lookup(&tdc1->f.inode, aname1, &fileFid.Fid);
		if (code == 0) {
		    /* delete the source */
		    code = dir_Delete(&tdc1->f.inode, aname1);
		    if (code) {
			tdc1->f.versionNo = -1;   /* mysterious failure */
			tdc1->flags |= DFEntryMod;
		    }
		    /* first see if target is there */
		    code = dir_Lookup(&tdc2->f.inode, aname2, &unlinkFid.Fid);
		    if (code == 0) {
			/* target already exists, and will be unlinked by server */
			code = dir_Delete(&tdc2->f.inode, aname2);
			if (code) {
			    tdc2->f.versionNo = -1;	/* oops */
			    tdc2->flags |= DFEntryMod;
			}
		    }
		    code = dir_Create(&tdc2->f.inode, aname2, &fileFid.Fid);
		    if (code) {
			tdc2->f.versionNo = -1;
			tdc2->flags |= DFEntryMod;
		    }
		}
	    }
	}
	if (tdc1) afs_PutDCache(tdc1);
	if ((!oneDir) && tdc2) afs_PutDCache(tdc2);
    }
    else {
	if (code < 0) {	/* if failed, server might have done something anyway, and assume that we know about it */
	    andp->states &= ~CStatd;
	    aodp->states &= ~CStatd;
	}
    }
    /* update dir link counts */
    aodp->m.LinkCount = OutOldDirStatus.LinkCount;
    if (!oneDir)
	andp->m.LinkCount = OutNewDirStatus.LinkCount;

    /* release locks */
    ReleaseWriteLock(&aodp->lock);
    if (!oneDir) ReleaseWriteLock(&andp->lock);
    
    /* now, some more details.  if unlinkFid.Fid.Vnode then we should decrement
	the link count on this file.  Note that if fileFid is a dir, then we don't
	have to invalidate its ".." entry, since its DataVersion # should have
	changed. However, interface is not good enough to tell us the
	*file*'s new DataVersion, so we're stuck.  Our hack: delete mark
	the data as having an "unknown" version (effectively discarding the ".."
	entry */
    if (unlinkFid.Fid.Vnode) {
	unlinkFid.Fid.Volume = aodp->fid.Fid.Volume;
	unlinkFid.Cell = aodp->fid.Cell;
	tvc = afs_GetVCache(&unlinkFid, &treq);
	if (tvc) {
	    ObtainWriteLock(&tvc->lock);
	    if (tvc->opens != 0 && tvc->m.LinkCount <= 1) {
		/* don't delete the file, just decrement the link count */
		tvc->m.LinkCount--;
	    }
	    else {
		tvc->states &= ~CStatd; /* don't have old name's file's new status */
		/* if this was last guy (probably) discard from cache.  Never hurts to do this, even if this isn't the last link: at worst we'll fetch the file again. TryToSmush won't get rid of the file if it is important (e.g. modified or locked).  Actually, this is a little optimistic: if the file was open, we have the only copy, but Unix semantics require we keep the thing around. */
		if (tvc->m.LinkCount == 1) {
		    /* try to discard from cache to save space */
		    afs_TryToSmush(tvc);
		}
	    }
	    ReleaseWriteLock(&tvc->lock);
	    afs_PutVCache(tvc);
	}
    }

    /* now handle ".." invalidation */
    if (!oneDir) {
	fileFid.Fid.Volume = aodp->fid.Fid.Volume;
	fileFid.Cell = aodp->fid.Cell;
	tvc = afs_GetVCache(&fileFid, &treq);
	if (tvc && (vType(tvc) == VDIR)) {
	    ObtainWriteLock(&tvc->lock);
	    tdc1 = afs_FindDCache(tvc, 0);
	    if (tdc1) {
		tdc1->f.versionNo = -1;	/* mark it as unknown */
		tdc1->flags |= DFEntryMod;
		afs_PutDCache(tdc1);	/* put it back */
	    }
	    ReleaseWriteLock(&tvc->lock);
	    afs_PutVCache(tvc);
	}
    }
    code = returnCode;
done:
    return afs_CheckCode(code, &treq);
}

afs_mkdir(adp, aname, attrs, avcp, acred)
    register struct vcache *adp;
    register struct vcache **avcp;
    char *aname;
    struct vattr *attrs;
    struct ucred *acred; {
    struct vrequest treq;
    register long code;
    register struct conn *tc;
    struct VenusFid newFid;
    register struct dcache *tdc;
    long offset, len;
    register struct vcache *tvc;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
    struct AFSCallBack CallBack;
    struct AFSVolSync tsync;
    long now;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("mkdir %s in %x\n", aname, adp);
    afs_InitReq(&treq, acred);
    if(!ENameOK(aname)) {
	code = EINVAL;
	goto done;
    }
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE;
    InStatus.ClientModTime = osi_Time();
    InStatus.UnixModeBits = attrs->va_mode & 0xffff;   /* only care about protection bits */
    tdc = afs_GetDCache(adp, 0, &treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    now = osi_Time();
	    code = RXAFS_MakeDir(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, &OutFidStatus, &OutDirStatus, &CallBack, &tsync);
	    CallBack.ExpirationTime += now;
	    /* DON'T forget to Set the callback value... */
	}
	else code = -1;
    } while(afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (code < 0) adp->states &= ~CStatd;
	ReleaseWriteLock(&adp->lock);
	if (tdc) afs_PutDCache(tdc);
	goto done;
    }
    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &newFid.Fid);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- use invalid value */
	}
	afs_PutDCache(tdc);
    }
    adp->m.LinkCount = OutDirStatus.LinkCount;
    newFid.Cell = adp->fid.Cell;
    newFid.Fid.Volume = adp->fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);
    /* now we're done with parent dir, create the real dir's cache entry */
    tvc = afs_GetVCache(&newFid, &treq);
    if (tvc) {
	code = 0;
	*avcp = tvc;
    }
    else code = ENOENT;
done:
    return afs_CheckCode(code, &treq);
}

afs_rmdir(adp, aname, acred)
    register struct vcache *adp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    register long code;
    register struct conn *tc;
    long offset, len;
    struct AFSFetchStatus OutDirStatus;
    struct AFSVolSync tsync;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("rmdir of %s from %x\n", aname, adp);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    tdc	= afs_GetDCache(adp, 0,	&treq, &offset,	&len, 1);	/* test for error below */
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    code = RXAFS_RemoveDir(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &OutDirStatus, &tsync);
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (tdc) afs_PutDCache(tdc);
	if (code < 0) adp->states &= ~CStatd;
	ReleaseWriteLock(&adp->lock);
	goto done;
    }
    /* here if rpc worked; update the in-core link count */
    adp->m.LinkCount = OutDirStatus.LinkCount;
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) {
	    /* we can do it locally */
	    code = dir_Delete(&tdc->f.inode, aname);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- invalid value */
	}
	afs_PutDCache(tdc);	/* drop ref count */
    }
    ReleaseWriteLock(&adp->lock);
    /* don't worry about link count since dirs can not be hardlinked */
    code = 0;
done:
    return afs_CheckCode(code, &treq);
}

/* BlobScan is supposed to ensure that the blob reference refers to a valid
    directory entry.  It consults the allocation map in the page header
    to determine whether a blob is actually in use or not.

    More formally, BlobScan is supposed to return a new blob number which is just like
    the input parameter, only it is advanced over header or free blobs.
    
    Note that BlobScan switches pages if necessary.  BlobScan may return
    either 0 or an out-of-range blob number for end of file.
*/
static BlobScan(afile, ablob)
    long *afile;
    register long ablob; {
    register long relativeBlob;
    long pageBlob;
    register struct PageHeader *tpe;
    register long i;

    /* advance ablob over free and header blobs */
    while (1) {
	pageBlob = ablob & ~(EPP-1);	/* base blob in same page */
	tpe = (struct PageHeader *) dir_GetBlob(afile, pageBlob);
	if (!tpe) return 0;		    /* we've past the end */
	relativeBlob = ablob - pageBlob;    /* relative to page's first blob */
	/* first watch for headers */
	if (pageBlob ==	0) {		    /* first dir page has extra-big header */
	    /* first page */
	    if (relativeBlob < DHE+1) relativeBlob = DHE+1;
	}
	else {				    /* others have one header blob */
	    if (relativeBlob == 0) relativeBlob = 1;
	}
	/* make sure blob is allocated */
	for(i = relativeBlob; i < EPP; i++) {
	    if (tpe->freebitmap[i>>3] & (1<<(i&7))) break;
	}
	/* now relativeBlob is the page-relative first allocated blob,
	 or EPP (if there are none in this page). */
	DRelease(tpe, 0);
	if (i != EPP) return i+pageBlob;
	ablob =	pageBlob + EPP;	/* go around again */
    }
    /* never get here */
}

#if	defined(AFS_GATEWAY) || defined(AFS_MACH_ENV)
/*
 * This version of afs_readdir packs multiple entries into each directory
 * block, which is critical for afs gateway performance. It also has hooks
 * to overcome xdr's mistreatment of directory information.
 */

/*
 *  This corresponds to a struct direct (or dirent for VFS40),
 *  with the d_name field removed.
 */
struct afs_dirstruct {
#ifdef	AFS_VFS40
	off_t   d_off;			/* offset of next directory entry */
#endif	AFS_VFS40
	u_long	d_fileno;		/* file number of entry */
	u_short	d_reclen;		/* length of this record */
	u_short	d_namlen;		/* length of string in d_name */
};

#define	RoundToInt(x)	(((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
#define	RecLen(namlen)	RoundToInt(namlen + 1 + sizeof(struct afs_dirstruct))
#endif
#ifdef	AFS_GATEWAY
afs_readdir(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {

    return afs_readdir_with_offlist(avc, auio, acred, 0);
}

afs_readdir_with_offlist(avc, auio, acred, offlist)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred;
    long *offlist; {
    struct vrequest treq;
    register struct dcache *tdc;
    register long code;
    long offset, len, oldblob, newblob;
    int oldreclen, newreclen;
    int oldnamlen, newnamlen;
    struct DirEntry *oldde, *newde;
    struct afs_dirstruct dirEntry;
    int volstub;
    int padding;
    int dirblksizleft;
    /* opaque value is pointer into a vice dir; use bit map to decide
	if the entries are in use.  Always assumed to be valid.  0 is
	special, means start of a new dir.  Long inode, followed by
	short reclen and short namelen.  Namelen does not include
	the null byte.  Followed by null-terminated string.
    */

    if (auio->afsio_resid & (DIRBLKSIZ - 1)) {
	code = EINVAL;
	goto done;
    }
    afs_InitReq(&treq, acred);	    /* setup request structure */
    /* update the cache entry */
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    /* get a reference to the entire directory */
    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 1);
    if (!tdc) {
	code = ENOENT;
	goto done;
    }
    ObtainReadLock(&avc->lock);
    oldde     = 0;
    oldblob   = auio->afsio_offset >> 5;	/* entry we want */
    oldreclen = 0;
    dirblksizleft = DIRBLKSIZ;
    volstub = avc->fid.Fid.Volume << 16;
    for (;;) {
	newblob = BlobScan(&tdc->f.inode, oldblob);/* scan for in-use blob */
	if (newblob == 0) {
	    newde = 0;
	    goto copyold;
	}
	newde = (struct DirEntry *) dir_GetBlob(&tdc->f.inode, newblob);
	if (newde == (struct DirEntry *) 0) {
	    goto copyold;
	}
	newblob += dir_NameBlobs(newde->name);
	newnamlen = strlen(newde->name);
	newreclen = RecLen(newnamlen);
	if (oldreclen + newreclen > auio->afsio_resid) {
	    DRelease(newde, 0);
	    newde = 0;
	    goto copyold;
	}
copyold:
	if (oldde) {
	    /* copy oldde into user space */
	    if (newde == 0 || oldreclen + newreclen > dirblksizleft) {
	        padding = dirblksizleft - oldreclen;
		dirEntry.d_reclen = dirblksizleft;
		dirblksizleft = DIRBLKSIZ;
	    } else {
		padding = 0;
		dirEntry.d_reclen = oldreclen;
		dirblksizleft -= oldreclen;
	    }
#ifdef	AFS_VFS40
	    dirEntry.d_off    = oldblob << 5;
#else	AFS_VFS40
	    if (offlist)
		*offlist++    = oldblob << 5;
#endif	AFS_VFS40
	    dirEntry.d_namlen = oldnamlen;
	    dirEntry.d_fileno = volstub + ntohl(oldde->fid.vnode);
	    code = uiomove(&dirEntry,
			sizeof(struct afs_dirstruct),
			UIO_READ, auio);
	    if (code == 0) {
		code = uiomove(oldde->name,
			oldreclen - sizeof(struct afs_dirstruct),
			UIO_READ, auio);
	    }
	    DRelease(oldde, 0);
	    if (code) {
		if (newde) {
		    DRelease(newde, 0);
		    newde = 0;
		}
		break;
	    }
	    if (padding > 0) {
		auio->afsio_resid -= padding;
		auio->afsio_iov->iov_len -= padding;
		auio->afsio_iov->iov_base += padding;
	    }
	}
	if (newde == 0) {
	    break;
	}
	oldde     = newde;
	oldblob   = newblob;
	oldnamlen = newnamlen;
	oldreclen = newreclen;
    }
    if (oldde) {
	auio->afsio_offset = oldblob << 5;
    }
    afs_PutDCache(tdc);
    ReleaseReadLock(&avc->lock);
done:
    return afs_CheckCode(code, &treq);
}
#else	AFS_GATEWAY
#ifndef AFS_GETDIRHACK
/* this code is from Mike Stolarchuk of U of M; I've munged it a bit stylistically.  Packing
    things doesn't work with AFS_GETDIRHACK, but AFS_GETDIRHACK really is a
    temporary interim hack for CMU, since we're the only folks running R3
    kernels with R2 (non-VFS) binaries for so long. */

/*
 * The kernel don't like it so much to have large stuff on the stack.
 * Here we use a watered down version of the direct struct, since
 * its not too bright to double copy the strings anyway.
*/

struct min_direct {	/* miniature direct structure */
			/* If struct direct changes, this must too */
#ifdef AFS_VFS40
    long	d_off;
#endif AFS_VFS40
    u_long	d_fileno;
    u_short	d_reclen;
    u_short	d_namlen;
};


/*
 *------------------------------------------------------------------------------
 *
 * afs_readdir_move.
 *	mainly a kind of macro... makes getting the struct direct
 *	out to the user space easy... could take more parameters,
 *	but now just takes what it needs.
 *
 *
*/
#ifdef	AFS_VFS40
#define DIRSIZ_LEN(len) \
    ((sizeof (struct dirent) - (MAXNAMLEN+1)) + (((len)+1 + 3) &~ 3))
#else
#define DIRSIZ_LEN(len) \
    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((len)+1 + 3) &~ 3))
#endif

afs_readdir_move (de, vc, auio, slen, rlen) 
struct DirEntry *	de;
struct vcache *		vc;
struct	uio *		auio;
int			slen;
int			rlen;
{
    int	code = 0;
    struct min_direct dirEntry;
    struct iovec *iov = auio->afsio_iov;

    /* Note the odd mechanism for building the inode number */
    dirEntry.d_fileno = (vc->fid.Fid.Volume << 16) +
      ntohl(de->fid.vnode);
    dirEntry.d_reclen = rlen;
    dirEntry.d_namlen = slen;
#ifdef AFS_VFS40
    dirEntry.d_off = 0;	    /* not used yet by SunOS 4.0 */
#endif AFS_VFS40

    code = uiomove(&dirEntry, sizeof(dirEntry), UIO_READ, auio);
    if (code == 0)
	code = uiomove(de->name, slen, UIO_READ, auio);

    /* pad out the remaining characters with zeros */
    if (code == 0) {
	int count = ((slen + 4) & ~3) - slen;
	while((count-- > 0) && code == 0)
	    code = ureadc('\0', auio);
    }

    return(code);
}

/*
 *------------------------------------------------------------------------------
 * 
 *	iterator for the directory reads.  Takes the AFS DirEntry
 *	structure and slams them into UFS direct structures.
 *	uses afs_readdir_move to get the struct to the user space.
 *
 *	The routine works by looking ahead one AFS directory entry.
 *	That's because the AFS entry we are currenly working with
 *	may not fit into the buffer the user has provided.  If it
 *	doesn't we have to change the size of the LAST AFS directory
 *	entry, so that it will FIT perfectly into the block the
 *	user has provided.
 *	
 *	The 'forward looking' of the code mkes it a bit tough to read.
 *	Remember we need to get an entry, see if it it fits, then
 *	set it up as the LAST entry, and find the next one.
 *
 *	Tough to take: We give out an EINVAL if we don't have enough
 *	space in the buffer, and at the same time, don't have an entry
 *	to put into the buffer. This CAN happen if the first AFS entry
 *	we get can't fit into the 512 character buffer provided.  Seems
 *	it ought not happen... 
 *
 *	Assumption: don't need to use anything but one dc entry:
 *	this means the directory ought not be greater than 64k.
*/

afs_readdir_iter (vc, dc, auio, offset)
struct	vcache	*vc;
struct	dcache	*dc;
struct	uio	*auio;
long		offset; {
    /*
      * Wha do we need?  How about a vc enty, a dc entry,
	    * (we supply the tde entry, and release it too!
	       * How many of these do we do?  We do until we 
		     * can't fit the next one (that's hard, ain't it?)
  */
    int code = 0;
    struct DirEntry *ode = 0, *nde = 0;
    int o_slen = 0;
    int n_slen = 0;
    int len = 0;
    u_long us;
    u_long origOffset;

    /*
      * Send the offset back to the thing we are trying to read.
      */
    auio->afsio_offset = offset;


    while (code==0) {
	origOffset = auio->afsio_offset;

	/* scan for the next interesting entry */
	/* scan for in-use blob */
	/* otherwise up point at this blob */
	/* note that ode, if non-zero, also represents a held dir page */
	if (!(us = BlobScan(&dc->f.inode, (origOffset >> 5)) )
	    || !(nde = (struct DirEntry *) dir_GetBlob(&dc->f.inode, us) ) ) {
	    /* failed to setup nde, return what we've got, and release ode */
	    if (len) {
		/* something to hand over. */
		code = afs_readdir_move( ode, vc, auio,
					o_slen, auio->afsio_resid  );
		auio->afsio_resid = 0;  
	    }
	    else {
		/* nothin to hand over */
	    }
	    if (ode) DRelease(ode, 0);
	    return(code);
	}
	/* by here nde is set */

	/* Do we have enough user space to carry out our mission? */
	if (DIRSIZ_LEN((n_slen = strlen(nde->name))) >= (auio->afsio_resid-len)) {
	    /* No can do no more now; ya know... at this time */
	    DRelease (nde, 0); /* can't use this one. */
	    if (len) {
		code = afs_readdir_move(ode, vc, auio,
					o_slen, auio->afsio_resid);
		auio->afsio_resid = 0;  
	    }
	    else { /* trouble, can't give anything to the user! */
		/* even though he has given us a buffer, 
		 * even though we have something to give us,
		 * Looks like we lost something somewhere.
		 */
		code = EINVAL;
	    }
	    if (ode) DRelease(ode, 0);
	    return (code);
	}

	/*
	 * In any event, we move out the LAST de entry, getting ready
	 * to set up for the next one.
	 */
	if (len)
	    code = afs_readdir_move (ode, vc, auio, o_slen, len );

	len = DIRSIZ_LEN( o_slen = n_slen );
	if (ode) DRelease(ode, 0);
	ode = nde;

	auio->afsio_offset = ((us + dir_NameBlobs(nde->name)) << 5);
    }

    /*
     * Nonzero return code.
     */
    if (ode) DRelease(ode, 0);
    return(code);
}

/*
 *------------------------------------------------------------------------------
 *
 * Read directory entries.
 * There are some weird things to look out for here.  The uio_offset
 * field is either 0 or it is the offset returned from a previous
 * readdir.  It is an opaque value used by the server to find the
 * correct directory block to read.  The byte count must be at least
 * vtoblksz(vp) bytes.  The count field is the number of blocks to
 * read on the server.  This is advisory only, the server may return
 * only one block's worth of entries.  Entries may be compressed on
 * the server.
 *
 * This routine encodes knowledge of Vice dirs.
 */

/*
 * Here is the bad, bad, really bad news.
 * It has to do with 'offset' (seek locations).
*/

afs_readdir(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    long offset, len;
    int code = 0;
    /* opaque value is pointer into a vice dir; use bit map to decide
	if the entries are in use.  Always assumed to be valid.  0 is
	special, means start of a new dir.  Long inode, followed by
	short reclen and short namelen.  Namelen does not include
	the null byte.  Followed by null-terminated string.
    */
    afs_InitReq(&treq, acred);	    /* setup request structure */
    /* update the cache entry */
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    /* get a reference to the entire directory */
    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 1);
    if (!tdc) {
	code = ENOENT;
	goto done;
    }
    ObtainReadLock(&avc->lock);

    afs_readdir_iter (avc, tdc, auio, auio->afsio_offset);

    afs_PutDCache(tdc);
    ReleaseReadLock(&avc->lock);

done:
    return afs_CheckCode(code, &treq);
}

#else AFS_GETDIRHACK

/*
 * Read directory entries.
 * There are some weird things to look out for here.  The uio_offset
 * field is either 0 or it is the offset returned from a previous
 * readdir.  It is an opaque value used by the server to find the
 * correct directory block to read.  The byte count must be at least
 * vtoblksz(vp) bytes.  The count field is the number of blocks to
 * read on the server.  This is advisory only, the server may return
 * only one block's worth of entries.  Entries may be compressed on
 * the server.
 *
 * This routine encodes knowledge of Vice dirs.
 */

afs_readdir(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    long us;
    register long code;
    long offset, len, nextBlob;
    struct DirEntry *tde;
#ifdef	AFS_VFS40
    struct dirent dirEntry;
#else
    struct direct dirEntry;	    /* big, contains 255 byte entry name */
#endif
    long origOffset;
    /* opaque value is pointer into a vice dir; use bit map to decide
	if the entries are in use.  Always assumed to be valid.  0 is
	special, means start of a new dir.  Long inode, followed by
	short reclen and short namelen.  Namelen does not include
	the null byte.  Followed by null-terminated string.
    */
    afs_InitReq(&treq, acred);	    /* setup request structure */
    /* update the cache entry */
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    /* get a reference to the entire directory */
    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 1);
    if (!tdc) {
	code = ENOENT;
	goto done;
    }
    ObtainReadLock(&avc->lock);
    origOffset = auio->afsio_offset;
    nextBlob = origOffset >> 5;		/* entry we want */
    /* scan for the next interesting entry */
    us = BlobScan(&tdc->f.inode, nextBlob);	/* scan for in-use blob */
    if (us == 0) {
	afs_PutDCache(tdc);
	ReleaseReadLock(&avc->lock);
	code = 0;
	goto done;
    }
    /* otherwise up point at this blob */
    tde = (struct DirEntry *) dir_GetBlob(&tdc->f.inode, us);
    if (tde == (struct DirEntry *) 0) {
	afs_PutDCache(tdc);
	ReleaseReadLock(&avc->lock);
	code = 0;
	goto done;
    }
    /* copy the name structure out to user space */
    /* volume << 16 + vnode is the d_fileno field */
    dirEntry.d_fileno = (avc->fid.Fid.Volume << 16) + ntohl(tde->fid.vnode);
    dirEntry.d_reclen = 512;
#ifdef	AFS_VFS40
    dirEntry.d_off = 0;		    /* We don't use the offset yet */
#endif
    dirEntry.d_namlen = strlen(tde->name);	/* don't count terminating null */
    strcpy(dirEntry.d_name, tde->name);
    code = uiomove(&dirEntry, sizeof(dirEntry), UIO_READ, auio);
    auio->afsio_resid -= (512 - sizeof(dirEntry));    /* pretend we read 512 bytes */
    auio->afsio_offset = (us + dir_NameBlobs(tde->name)) << 5;	/* skip entry */
    DRelease(tde, 0);
    afs_PutDCache(tdc);
    ReleaseReadLock(&avc->lock);
done:
    return afs_CheckCode(code, &treq);
}
#endif AFS_GETDIRHACK
#endif	AFS_GATEWAY

afs_symlink(adp, aname, attrs, atargetName, acred)
    register struct vcache *adp;
    register char *atargetName;
    char *aname;
    struct vattr *attrs;
    struct ucred *acred; {
    struct vrequest treq;
    register long code;
    register struct conn *tc;
    struct VenusFid newFid;
    register struct dcache *tdc;
    long offset, len, alen, host;
    register struct vcache *tvc;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
    struct AFSVolSync tsync;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("symlink dir %s in %x\n", aname, adp);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) return afs_CheckCode(code, &treq);
    InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE;
    InStatus.ClientModTime = osi_Time();
    alen = strlen(atargetName);	    /* we want it to include the null */
    if (*atargetName == '#' || *atargetName == '%' || *atargetName == '$')
	InStatus.UnixModeBits = 0644;	/* mt pt: null from "." at end */
    else {
	InStatus.UnixModeBits = 0755;
	alen++;	    /* add in the null */
    }
    tdc = afs_GetDCache(adp, 0, &treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock);
    ObtainSharedLock(&afs_xvcache);  /* prevent others from creating this entry */
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    host = tc->server->host;
	    code = RXAFS_Symlink(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, atargetName, &InStatus, (struct AFSFid *) &newFid.Fid, &OutFidStatus, &OutDirStatus, &tsync);
	}
	else code = -1;
    } while(afs_Analyze(tc, code, &adp->fid, &treq));
    UpgradeSToWLock(&afs_xvcache);
    if (code) {
	if (code < 0) adp->states &= ~CStatd;
	ReleaseWriteLock(&adp->lock);
	ReleaseWriteLock(&afs_xvcache);
	if (tdc) afs_PutDCache(tdc);
	goto done;
    }
    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &newFid.Fid);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- use invalid value */
	}
	afs_PutDCache(tdc);
    }
    newFid.Cell = adp->fid.Cell;
    newFid.Fid.Volume = adp->fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);

    /* now we're done with parent dir, create the link's entry.  Note that no one can get a pointer
	to the new cache entry until we release the xvcache lock. */
    tvc = afs_NewVCache(&newFid, host);
    tvc->cbExpires = 0x7fffffff;	/* never expires, they can't change */
    afs_ProcessFS(tvc, &OutFidStatus);
    tvc->states |= CStatd;		/* have valid info */
    if (!tvc->linkData) {
	tvc->linkData = (char *) osi_Alloc(alen);
	strncpy(tvc->linkData, atargetName, alen-1);
	tvc->linkData[alen-1] = 0;
    }
    ReleaseWriteLock(&afs_xvcache);
    afs_PutVCache(tvc);
    code = 0;
done:
    return afs_CheckCode(code, &treq);
}

/* call under write-lock to read link into memory */
afs_HandleLink(avc, areq)
    register struct vcache *avc;
    struct vrequest *areq; {
    register struct dcache *tdc;
    register char *tp;
    struct osi_file *tfile;
    long offset, len, alen;
    register long code;

    /* two different formats, one for links protected 644, have a "." at the end
	of the file name, which we turn into a null.  Others, protected 755,
	we add a null to the end of */
    if (!avc->linkData) {
	tdc = afs_GetDCache(avc, 0, areq, &offset, &len, 0);
	if (!tdc) {
	    return EIO;
	}
	/* otherwise we have the data loaded, go for it */
	if (len > 1024) {
	    afs_PutDCache(tdc);
	    return EFAULT;
	}
	tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
	if (!tfile) {
	    afs_PutDCache(tdc);
	    return EIO;
	}
	if (avc->m.Mode	& 0111)	alen = len+1;	/* regular link */
	else alen = len;			/* mt point */
	tp = osi_Alloc(alen);	/* make room for terminating null */
	code = osi_Read(tfile, tp, len);
	tp[alen-1] = 0;
	osi_Close(tfile);
	afs_PutDCache(tdc);
	if (code != len) {
	    osi_Free(tp, alen);
	    return EIO;
	}
	avc->linkData = tp;
    }
    return 0;
}

afs_readlink(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;
    register char *tp;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("readlink %x\n", avc);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    if (vType(avc) != VLNK) {
	code = EINVAL;
	goto done;
    }
    ObtainWriteLock(&avc->lock);
    code = afs_HandleLink(avc, &treq);
    /* finally uiomove it to user-land */
    if (code == 0) {
	tp = avc->linkData;
	if (tp) code = uiomove(tp, strlen(tp), UIO_READ, auio);
	else code = EIO;
    }
    ReleaseWriteLock(&avc->lock);
done:
    return afs_CheckCode(code, &treq);
}

afs_fsync(avc, acred)
    register struct vcache *avc;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;

    if (afs_debug & AFSDEB_GENERAL) afs_dp("fsync file %x\n", avc);
    afs_InitReq(&treq, acred);
    ObtainSharedLock(&avc->lock);
    code = 0;
    if (avc->execsOrWriters > 0) {
	/* put the file back */
	code = afs_StoreAllSegments(avc, &treq);
    }
    ReleaseSharedLock(&avc->lock);
    return afs_CheckCode(code, &treq);
}

afs_inactive(avc, acred)
    register struct vcache *avc;
    struct ucred *acred; {

    /* at this point, there are no references to the vnode, even in core page structures.
	Thus, we can pretend all is flushed, until next read. */
    avc->flushDV = AFS_MAXDV;

    if (avc->states & CCore) {
	/* barf */
	avc->vrefCount++;
	avc->states &= ~CCore;
	if (afs_debug & AFSDEB_GENERAL) afs_dp("doing delayed core store\n");
#ifdef	AFS_VFS40
	afs_close(avc, -1, 0/*count: which we ignore*/, acred);  /* special flags field means core dump */
#else
	afs_close(avc, -1, acred);  /* special flags field means core dump */
#endif
	avc->vrefCount--;
    }
    avc->states &= ~CWired;
    return 0;	    /* lru should do everything for us */
}


/* basic strategy routine, assuming stuff mapped in. */
afs_ustrategy(abp)
    register struct buf *abp; {
    register long code;
    struct uio tuio;
    struct iovec tiovec[1];

    if (afs_debug & AFSDEB_GENERAL) afs_dp("strategy %x\n", abp->b_vp);
    if ((abp->b_flags & B_READ) == B_READ) {
	/* read b_bcount bytes into kernel address b_un.b_addr starting
	    at byte DEV_BSIZE * b_blkno.  Bzero anything we can't read,
	    and finally call iodone(abp).  File is in abp->b_vp.  Credentials
	    are from u area??
	*/
	tuio.afsio_iov = tiovec;
	tuio.afsio_iovcnt = 1;
#ifdef	AFS_VFS40
	tuio.afsio_offset = (u_int) dbtob(abp->b_blkno);
#else
	tuio.afsio_offset = DEV_BSIZE * abp->b_blkno;
#endif
	tuio.afsio_seg = AFS_UIOSYS;
#ifdef AFS_UIOFMODE
	tuio.afsio_fmode = 0;
#endif
	tuio.afsio_resid = abp->b_bcount;
	tiovec[0].iov_base = abp->b_un.b_addr;
	tiovec[0].iov_len = abp->b_bcount;
	/* are user's credentials valid here?  probably, but this
	     sure seems like the wrong things to do. */
	code = afs_rdwr((struct vcache *) abp->b_vp, &tuio, UIO_READ, 0, u.u_cred);
	if (code == 0) {
	    if (tuio.afsio_resid > 0)
		bzero(abp->b_un.b_addr + abp->b_bcount - tuio.afsio_resid, tuio.afsio_resid);
	}
    }
    else {
	tuio.afsio_iov = tiovec;
	tuio.afsio_iovcnt = 1;
#ifdef	AFS_VFS40
	tuio.afsio_offset = (u_int) dbtob(abp->b_blkno);
#else
	tuio.afsio_offset = DEV_BSIZE * abp->b_blkno;
#endif
	tuio.afsio_seg = AFS_UIOSYS;
#ifdef AFS_UIOFMODE
	tuio.afsio_fmode = 0;
#endif
	tuio.afsio_resid = abp->b_bcount;
	tiovec[0].iov_base = abp->b_un.b_addr;
	tiovec[0].iov_len = abp->b_bcount;
	/* are user's credentials valid here?  probably, but this
	     sure seems like the wrong things to do. */
	code = afs_rdwr((struct vcache *) abp->b_vp, &tuio, UIO_WRITE, 0, u.u_cred);
    }
#ifdef	AFS_VFS40
    /*
      * Call pvn_done() to free the bp and pages.  If not ASYNC
      * then we have to call pageio_done() to free the bp.
      */
    pvn_done(abp);
    if (!(abp->b_flags & B_ASYNC)) {
	pageio_done(abp);
    }
#else
    iodone(abp);
#endif
    return code;
}

#ifdef	AFS_VFS40

int afs_vmdebug=0;
/*
 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
 */
static int
afs_writelbn(avc, pp, off, len, flags)
register struct vcache *avc;
struct page *pp;
u_int off;
u_int len;
int flags;
{
    struct buf *bp;
    int code;

    if (afs_vmdebug) printf("in afs_writelbn:off=%d,len=%d, flags=%x\n", off, len, flags);
    bp = pageio_setup(pp, len, (struct vnode *)avc, B_WRITE | flags);
    if (bp == NULL) {
	pvn_fail(pp, B_WRITE | flags);
	return (ENOMEM);
    }

    bp->b_dev = 0;
    bp->b_blkno = btodb(off);
    bp_mapin(bp);

    code = afs_ustrategy(bp);
    u.u_ru.ru_oublock++;
    if (afs_vmdebug) printf("Leaving afs_writelbn=%d\n", code);
    return (code);
}

/* Called from pvn_getpages or afs_getpage to get a particular page. When we are called the rnode has already locked by afs_getpage. */
int afs_lostpage=0;
static int
afs_getapage(vp, off, protp, pl, plsz, seg, addr, rw, cred)
struct vnode *vp;
u_int off;
u_int *protp;
struct page *pl[];
u_int plsz;
struct seg *seg;
addr_t addr;
enum seg_rw rw;
struct ucred *cred;
{
    register struct vcache *avc = (struct vcache *)vp;
    struct vrequest treq;
    register u_int bsize;
    struct buf *bp;
    struct page *pp, *pp2, **ppp, *pagefound;
    daddr_t lbn;
    u_int io_off, io_len;
    u_int blksize, blkoff;
    int code;

    if (afs_vmdebug) printf("Entering afs_getapage: vp %x size %d off %d pl %x addr %x\n", vp, avc->m.Length, off, pl, addr);
    bsize = vp->v_vfsp->vfs_bsize;
reread:
    code = 0;
    lbn = off / bsize;
    blkoff = lbn * bsize;

    if ((pagefound = page_find(vp, off)) == NULL) {
	/* Need to go to server to get a block	 */
	if (blkoff < avc->m.Length && blkoff + bsize > avc->m.Length) {
	    /* If less than a block left in file read less than a block. */
	    if (avc->m.Length <= off) {
		/* Trying to access beyond EOF, set up to get at least one page. */
		blksize = off + PAGESIZE - blkoff;
	    } else {
		blksize = avc->m.Length - blkoff;
	    }
	} else {
	    blksize = bsize;
	}
	if (afs_vmdebug) printf("getapage-1: plsz=%d, bsize=%d, off=%d, addr=%x, blkoff=%d, blksize=%d, Len=%d\n", plsz, bsize, off, addr, blkoff, blksize, avc->m.Length);
	pp = pvn_kluster(vp, off, seg, addr, &io_off, &io_len, blkoff, blksize, 0);
	if (pp == NULL) panic("afs_getapage: pvn_kluster");

	if (pl != NULL) {
	    register int sz;

	    if (plsz >= io_len) {
		/* Everything fits, set up to load up and hold all the pages. */
		pp2 = pp;
		sz = io_len;
	    } else {
		for (pp2 = pp; pp2->p_offset != off; pp2 = pp2->p_next) ;
		sz = plsz;
	    }

	    ppp = pl;
	    do {
		PAGE_HOLD(pp2);
		*ppp++ = pp2;
		pp2 = pp2->p_next;
		sz -= PAGESIZE;
	    } while (sz > 0);
	    *ppp = NULL;		/* terminate list */
	}

	/* Now round the request size up to page boundaries. This insures that the entire page will be initialized to zeroes if EOF is encountered. */
	io_len = ptob(btopr(io_len));
	bp = pageio_setup(pp, io_len, vp, ppp == NULL ? (B_ASYNC | B_READ) : B_READ);
	bp->b_blkno = btodb(io_off);
	bp->b_dev = 0;
	bp_mapin(bp);

	/* If doing a write beyond what we believe is EOF, don't bother trying to read the pages from the server, we'll just zero the pages here.  We don't check that the rw flag is S_WRITE here because some implementations may attempt a read access to the buffer before copying data. */
	if (io_off >= avc->m.Length && seg == segkmap) {
	    bzero(bp->b_un.b_addr, io_len);
	    pvn_done(bp);
	    if (ppp != NULL)
		pageio_done(bp);
	} else {
	    code = afs_ustrategy(bp);
	}
	u.u_ru.ru_majflt++;
	if (seg == segkmap)
	    u.u_ru.ru_inblock++;	/* count as `read' operation */
    }

    if (pagefound != NULL) {
	int s;

	/* We need to be careful here because if the page was previously on the free list, we might have already lost it at interrupt level. */
	s = splvm();
	if (pagefound->p_vnode == vp && pagefound->p_offset == off) {
	    /* If the page is intransit or if it is on the free list call page_lookup to try and wait for / reclaim the page. */
	    if (pagefound->p_intrans || pagefound->p_free)
		pagefound = page_lookup(vp, off);
	}
	(void) splx(s);
	if (pagefound == NULL || pagefound->p_offset != off ||
	    pagefound->p_vnode != vp || pagefound->p_gone) {
	    afs_lostpage++;
	    goto reread;
	}
	if (pl != NULL) {
	    PAGE_HOLD(pagefound);
	    pl[0] = pagefound;
	    pl[1] = NULL;
	    u.u_ru.ru_minflt++;
	}
    }

    if (code && pl != NULL) {
	for (ppp = pl; *ppp != NULL; *ppp++ = NULL)
	    PAGE_RELE(*ppp);
    }
    if (afs_vmdebug) printf("afs_getapage-E: returning %d\n", code);
    return (code);
}


/* Return all the pages from [off..off+len) in file */
int afsgetpage=0;
static int
afs_getpage(vp, off, len, protp, pl, plsz, seg, addr, rw, cred)
struct vnode *vp;
u_int off, len;
u_int *protp;
struct page *pl[];
u_int plsz;
struct seg *seg;
addr_t addr;
enum seg_rw rw;
struct ucred *cred;
{
    register struct vcache *avc = (struct vcache *)vp;
    struct vrequest treq;
    int code;

    afsgetpage++;
    if (afs_vmdebug) printf("Entering afs_getpage\n");
    if (protp != NULL)
	*protp = PROT_ALL;

    afs_InitReq(&treq, cred);
    code = afs_VerifyVCache(avc, &treq);
    if (code) return code;  /* failed to get it */

/*    ObtainWriteLock(&avc->lock);  */
    /* If we are getting called as a side effect of a afs_rdwr() write operation the local file size might not be extended yet. In this case we want to be able to return pages of zeroes. */
    if (off + len > avc->m.Length + PAGEOFFSET && seg != segkmap) {
/*	ReleaseWriteLock(&avc->lock);	*/
	return (EFAULT);		/* beyond EOF */
    }
    if (len <= PAGESIZE)
	code = afs_getapage(vp, off, protp, pl, plsz, seg, addr,rw, cred);
    else
	code = pvn_getpages(afs_getapage, vp, off, len, protp, pl, plsz,seg, addr, rw, cred);
/*    ReleaseWriteLock(&avc->lock); */
    if (afs_vmdebug) printf("Leaving afs_getpage=%d\n", code);
    return (code);
}


int afsputpage=0;
static int
afs_putpage(vp, off, len, flags, cred)
struct vnode *vp;
u_int off;
u_int len;
int flags;
struct ucred *cred;
{
    register struct vcache *avc = (struct vcache *)vp;
    struct vrequest treq;
    register struct page *pp;
    struct page *dirty, *io_list;
    register u_int io_off, io_len;
    daddr_t lbn;
    u_int lbn_off;
    u_int bsize;
    int vpcount;
    int code = 0;

    afsputpage++;
    if (afs_vmdebug) printf("afs_putpage-1:off=%d,len=%d,flags=%x\n",off,len,flags);
    if (len == 0 && (flags & B_INVAL) == 0 &&
	 (vp->v_vfsp->vfs_flag & VFS_RDONLY)) {
	return (0);
    }

    afs_InitReq(&treq, cred);
    code = afs_VerifyVCache(avc, &treq);
    if (code) return code;  /* failed to get it */

    if (vp->v_pages == NULL || off >= avc->m.Length)
	return (0);

  
    bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
    vpcount = vp->v_count;
    VN_HOLD(vp);

again:
      if (len == 0) {
	  /* Search the entire vp list for pages >= off  */
	  dirty = pvn_vplist_dirty(vp, off, flags);
      } else {
	  /* Do a range from [off...off + len) via page_find. We set limits so that we kluster to bsize boundaries. */
	  if (off >= avc->m.Length) {
	      dirty = NULL;
	  } else {
	      u_int fsize, eoff, offlo, offhi;

	      fsize = (avc->m.Length + PAGEOFFSET) & PAGEMASK;
	      eoff = MIN(off + len, fsize);
	      offlo = (off / bsize) * bsize;
	      offhi = roundup(eoff, bsize);
	      dirty = pvn_range_dirty(vp, off, eoff, offlo, offhi,
				      flags);
	  }
      }

    /* Now pp will have the list of kept dirty pages marked for write back.  It will also handle invalidation and freeing of pages that are not dirty.  All the pages on the list returned need to still be dealt with here.  */
    while ((pp = dirty) != NULL) {
	/*
	 * Pull off a contiguous chunk that fits in one lbn
	 */
	io_off = pp->p_offset;
	lbn = io_off / bsize;

	page_sub(&dirty, pp);
	io_list = pp;
	io_len = PAGESIZE;
	lbn_off = lbn * bsize;

	while (dirty != NULL && dirty->p_offset < lbn_off + bsize &&
	       dirty->p_offset == io_off + io_len) {
	    pp = dirty;
	    page_sub(&dirty, pp);
	    page_sortadd(&io_list, pp);
	    io_len += PAGESIZE;
	}

	/*
	 * Check for page length rounding problems
	     */
	if (io_off + io_len > lbn_off + bsize) {
	    io_len = lbn_off + bsize - io_off;
	}

	code = afs_writelbn(avc, io_list, io_off, io_len, flags);
	if (code) break;
    }

    if (code && dirty != NULL)
	pvn_fail(dirty, flags);
    else if ((flags & (B_INVAL | B_ASYNC)) == B_INVAL && len == 0 && off == 0 && vp->v_pages != NULL)
	goto again;

out:
      /* Instead of using VN_RELE here we are careful to only call the inactive routine if the vnode reference count is now zero, but  it wasn't zero coming into pageio.  This is to prevent calling the putpage routine on a vnode that is already considered in the `inactive' state. */
    if (--vp->v_count == 0 && vpcount > 0)
	  (void) afs_inactive(avc, cred);
    if (afs_vmdebug) printf("afs_putpage-END=%d\n", code);
    return (code);
}

/* This is identical to nfs_map() and ufs_map() */
int afsmap=0;
afs_map(vp, off, as, addr, len, prot, maxprot, flags, cred)
struct vnode *vp;
u_int off;
struct as *as;
addr_t addr;
u_int len;
u_int prot, maxprot;
u_int flags;
struct ucred *cred;
{
	struct segvn_crargs vn_a;
	int code;

	afsmap++;
	if (afs_vmdebug) printf("Entering AFS_MAP\n");
	if ((int)off < 0 || (int)(off + len) < 0)
		return (EINVAL);

#ifdef	AFS_MACH_ENV
	if ((vp->v_mode&VFMT) != VREG)
#else	AFS_MACH_ENV
	if (vp->v_type != VREG)
#endif	AFS_MACH_ENV
		return (ENODEV);

	vn_a.vp = vp;
	vn_a.offset = off;
	vn_a.type = flags & MAP_TYPE;
	vn_a.prot = prot;
	vn_a.maxprot = maxprot;
	vn_a.cred = cred;
	vn_a.amp = NULL;

	(void) as_unmap(as, addr, len);
	code = as_map(as, addr, len, segvn_create, (caddr_t)&vn_a);
	if (afs_vmdebug) printf("Leaving AFS_MAP=%d\n", code);
	return code;
}

int afsdump=0;
afs_dump() {
    afsdump++;
    printf("AFS_DUMP. MUST IMPLEMENT THIS!!!\n");
    return EINVAL;
}

/* Nothing fancy here; just compare if vnodes are identical ones */
afs_cmp(vp1, vp2) 
struct vnode *vp1, *vp2;
{
    return(vp1 == vp2);
}

int afsrealvp=0;
afs_realvp() {
    afsrealvp++;
    afs_dp("Entering AFS_REALVP:. MUST IMPLEMENT THIS!!!\n");
    return EINVAL;
}
#else
afs_bmap(avc, abn, anvp, anbn)
    register struct vcache *avc;
    long abn, *anbn;
    struct vcache **anvp; {
    if (anvp)
	*anvp = avc;
    if (anbn)
	*anbn =	abn * (8192 / DEV_BSIZE);   /* in 512 byte units */
    return 0;
}

/* real strategy */
afs_strategy (abp)
    register struct buf *abp; {
    return osi_MapStrategy(afs_ustrategy, abp);
}
#endif

#ifdef	AFS_GATEWAY
afs_fid(avc, fidpp)
struct vcache *avc;
struct fid **fidpp;
{
#ifdef	AFS_MACH_ENV
    *fidpp = (struct fid *) osi_Zalloc(afs_fid_zone);
#else
    *fidpp = (struct fid *) kmem_alloc(sizeof(struct fid));
#endif
    (*fidpp)->fid_len = sizeof(struct VenusFid);
    bcopy(&avc->fid, (*fidpp)->fid_data, sizeof(struct VenusFid));
    return (0);
}

#ifdef	AFS_MACH_ENV
afs_freefid(avc, fidp)
struct vcache *avc;
struct fid *fidp;
{
    osi_Zfree(afs_fid_zone, fidp);
    return (0);
}
#endif	AFS_MACH_ENV
#endif	AFS_GATEWAY

/* avc must be held, its parent must be unlocked.  Returns true if any of
    the requested access modes are allowed. */
afs_AccessOK (avc, arights, areq)
    register struct vcache *avc;
    struct vrequest *areq;
    long arights; {
    register struct vcache *tvc;
    struct AFSFetchStatus OutStatus;
    struct AFSCallBack CallBack;
    struct AFSVolSync tsync;
    struct VenusFid aclfid;
    register long i, mask;
    long code;
    int canLockFid;
    struct conn *newconn;

    /*
    This routine checks that the access bits in a directory permit a particular access to
    work.  It makes use of the fact that GetVStat clears the access information before
    it stats a directory, and that GetVStat was recently called on the directory in question, either, if
    this is a file, in the next few lines, or, if this is a directory, when the first
    parameter to this function was computed.

    Note that avc's ref count is assumed to be bumped.  It should not be otherwise locked.  Its parent is
    assumed to unlocked.
    */

    /* This next used to use different mode bits for owner and others. */
    mask = (avc->m.Mode & 0700) >> 6;

    if (vType(avc) == VDIR) {
        tvc = avc;
        aclfid = avc->fid;
	mask = -1;	/* ignore mode bits for dirs */
	canLockFid = 0;	/* means aclfid is same as avc */
    }
    else {
	/* possible race: new cache entry between IWalk (setting parent field) and here.
	    In this case, avc->parentVnode is 0, and we don't know who our parent really
	    is.  This will get fixed when we fix the stat interface to return parent info.  For now,
	    venus doesn't block these operations; they'll fail later. */
	if (avc->parentVnode == 0) return 1;		/* fail operation later */
	aclfid.Cell = avc->fid.Cell;
        aclfid.Fid.Volume = avc->fid.Fid.Volume;
        aclfid.Fid.Vnode = avc->parentVnode;
        aclfid.Fid.Unique = avc->parentUnique;
        tvc = afs_GetVCache(&aclfid, areq);
	if (tvc == 0) return 0;
        mask = fileModeMap[mask];
	canLockFid = 1;	/* means new fid */
    }


    if ((mask & arights & tvc->anyAccess) == arights) {
	if (canLockFid) afs_PutVCache(tvc);
	return 1;
    }

    for(i=0;i<CPSIZE;i++) {
#ifdef	AFS_GATEWAY
        if (areq->uid == tvc->randomUid[i] && areq->hostaddr == tvc->randomHostaddr[i]) {
#else	AFS_GATEWAY
        if (areq->uid == tvc->randomUid[i]) {
#endif	AFS_GATEWAY
	    if (canLockFid) afs_PutVCache(tvc);
	    return ((mask & arights & tvc->randomAccess[i]) == arights);
	}
    }

    /* If we make it here, we just don't know the answer. */
    do {
        newconn = afs_Conn(&aclfid, areq);
	if (newconn){
	    code = RXAFS_FetchStatus(newconn->id, (struct AFSFid *) &aclfid.Fid, &OutStatus, &CallBack, &tsync);
	} else code = -1;
    } while (afs_Analyze(newconn, code, &aclfid, areq));
    if (code != 0) {
	if (canLockFid) afs_PutVCache(tvc);
	return 0;
    }
    for(i=0; i<CPSIZE-1; i++) {
        tvc->randomUid[i] = tvc->randomUid[i+1];
#ifdef	AFS_GATEWAY
        tvc->randomHostaddr[i] = tvc->randomHostaddr[i+1];
#endif	AFS_GATEWAY
        tvc->randomAccess[i] = tvc->randomAccess[i+1];
    }
    tvc->randomUid[CPSIZE-1] = areq->uid;
#ifdef	AFS_GATEWAY
    tvc->randomHostaddr[CPSIZE-1] = areq->hostaddr;
#endif	AFS_GATEWAY
    tvc->randomAccess[CPSIZE-1] = OutStatus.CallerAccess;
    if (canLockFid) afs_PutVCache(tvc);
    return ((mask & OutStatus.CallerAccess & arights) == arights);
}

shutdown_vnodeops()
{
    int i;

    pagCounter =  marinerPtr = 0;
    for (i=0; i<NMAR; i++)
	marinerVCs[i] = 0;
}


/* These should probably move somewhere else */
#define	AFS_MAXIOVCNT	    16

/* routine to make copy of uio structure in ainuio, using aoutvec for space */
afsio_copy(ainuio, aoutuio, aoutvec)
struct uio *ainuio, *aoutuio;
register struct iovec *aoutvec; {
    register int i;
    register struct iovec *tvec;

    if (ainuio->afsio_iovcnt > AFS_MAXIOVCNT) return EINVAL;
    bcopy(ainuio, aoutuio, sizeof(struct uio));
    tvec = ainuio->afsio_iov;
    aoutuio->afsio_iov = aoutvec;
    for(i=0;i<ainuio->afsio_iovcnt;i++){
	bcopy(tvec, aoutvec, sizeof(struct iovec));
	tvec++;	    /* too many compiler bugs to do this as one expr */
	aoutvec++;
    }
    return 0;
}

/* trim the uio structure to the specified size */
afsio_trim(auio, asize)
register struct uio *auio;
register long asize; {
    register int i;
    register struct iovec *tv;

    auio->afsio_resid = asize;
    tv = auio->afsio_iov;
    /* It isn't clear that multiple iovecs work ok (hasn't been tested!) */
    for(i=0;;i++,tv++) {
	if (i >= auio->afsio_iovcnt || asize <= 0) {
	    /* we're done */
	    auio->afsio_iovcnt = i;
	    break;
	}
	if (tv->iov_len	<= asize)
	    /* entire iovec is included */
	    asize -= tv->iov_len;   /* this many fewer bytes */
	else {
	    /* this is the last one */
	    tv->iov_len = asize;
	    auio->afsio_iovcnt = i+1;
	    break;
	}
    }
    return 0;
}

/* skip asize bytes in the current uio structure */
afsio_skip(auio, asize)
register struct uio *auio;
register long asize; {
    register struct iovec *tv;	/* pointer to current iovec */
    register int cnt;

   /* It isn't guaranteed that multiple iovecs work ok (hasn't been tested!) */
    while (asize > 0 && auio->afsio_resid) {
	tv = auio->afsio_iov;
	cnt = tv->iov_len;
	if (cnt == 0) {
	    auio->afsio_iov++;
	    auio->afsio_iovcnt--;
	    continue;
	}
	if (cnt > asize)
	    cnt = asize;
	tv->iov_base += cnt;
	tv->iov_len -= cnt;
	auio->uio_resid -= cnt;
	auio->uio_offset += cnt;
	asize -= cnt;
    }
    return 0;
}

#ifdef	AFS_MACH_ENV
afs_page_read(avc, abuffer, asize, aoffset, acred)
    register struct vcache *avc;
    caddr_t abuffer;
    int asize;
    vm_offset_t aoffset;
    struct ucred *acred; {
    int error;
    struct uio uio;
    struct iovec iov;
    struct vrequest treq;

    iov.iov_base = abuffer;
    iov.iov_len = asize;
    uio.uio_iov = &iov;
    uio.uio_iovcnt = 1;
    uio.uio_offset = aoffset;
    uio.uio_segflg = UIO_SYSSPACE;
    uio.uio_resid = asize;
    error = afs_rdwr(avc, &uio, UIO_READ, 0, acred);
    if (error) {
	printf("error %d on pagein (afs_rdwr)\n", error);
	error = EIO;
    } else if ((avc->states & CWired) == 0) {
	afs_InitReq(&treq, acred);
	ObtainWriteLock(&avc->lock);
	afs_Wire(avc, &treq);
	ReleaseWriteLock(&avc->lock);
    }
    return(error);
}

afs_page_write(avc, abuffer, asize, aoffset, acred, init)
    register struct vcache *avc;
    caddr_t abuffer;
    int asize;
    vm_offset_t aoffset;
    struct ucred *acred;
    boolean_t init; {
    int error;
    struct uio uio;
    struct iovec iov;

    if (init) {
	panic("afs_page_write: called from data_initialize");
    }
    iov.iov_base = abuffer;
    iov.iov_len = asize;
    uio.uio_iov = &iov;
    uio.uio_iovcnt = 1;
    uio.uio_offset = aoffset;
    uio.uio_segflg = UIO_SYSSPACE;
    uio.uio_resid = asize;
    error = afs_rdwr(avc, &uio, UIO_WRITE, 0, acred);
    if (error) {
	printf("error %d on pageout (afs_rdwr)\n", error);
	error = EIO;
    }
    return(error);
}

/*
 *  This is just the old afs_readdir code, with afs_AdvanceFD
 *  built in -- except that we use the caller's perceived idea
 *  of the value of DIRBLKSIZ, instead of the true value, or
 *  a guess like 512 (as was used in afs_AdvanceFD).
 */
afs_read1dir(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    long us;
    register long code;
    long offset, len, nextBlob;
    struct DirEntry *tde;
    struct afs_dirstruct dirEntry;
    long DirBlkSiz;
    /* opaque value is pointer into a vice dir; use bit map to decide
	if the entries are in use.  Always assumed to be valid.  0 is
	special, means start of a new dir.  Long inode, followed by
	short reclen and short namelen.  Namelen does not include
	the null byte.  Followed by null-terminated string.
    */
    afs_InitReq(&treq, acred);	    /* setup request structure */
    /* update the cache entry */
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    /* get a reference to the entire directory */
    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 1);
    if (!tdc) {
	code = ENOENT;
	goto done;
    }
    ObtainReadLock(&avc->lock);
    DirBlkSiz = auio->afsio_resid; /* user's idea of DIRBLKSIZ */
    if (DirBlkSiz < 512) DirBlkSiz = 512; /* just in case he's faked us out */
    nextBlob = auio->afsio_offset / DirBlkSiz;		/* entry we want */
    /* scan for the next interesting entry */
    us = BlobScan(&tdc->f.inode, nextBlob);	/* scan for in-use blob */
    if (us == 0) {
	afs_PutDCache(tdc);
	ReleaseReadLock(&avc->lock);
	code = 0;
	goto done;
    }
    /* otherwise up point at this blob */
    tde = (struct DirEntry *) dir_GetBlob(&tdc->f.inode, us);
    if (tde == (struct DirEntry *) 0) {
	afs_PutDCache(tdc);
	ReleaseReadLock(&avc->lock);
	code = 0;
	goto done;
    }
    /* copy the name structure out to user space */
    /* volume << 16 + vnode is the d_fileno field */
    dirEntry.d_fileno = (avc->fid.Fid.Volume << 16) + ntohl(tde->fid.vnode);
    dirEntry.d_reclen = DirBlkSiz;
    dirEntry.d_namlen = strlen(tde->name); /* don't count terminating null */
    code = uiomove(&dirEntry, sizeof(struct afs_dirstruct), UIO_READ, auio);
    if (code == 0) {
	code = uiomove(tde->name, dirEntry.d_namlen + 1, UIO_READ, auio);
    }
    auio->afsio_resid = 0;    /* pretend we read DirBlkSiz bytes */
    auio->afsio_offset = (us + dir_NameBlobs(tde->name)) * DirBlkSiz;
    DRelease(tde, 0);
    afs_PutDCache(tdc);
    ReleaseReadLock(&avc->lock);
done:
    return afs_CheckCode(code, &treq);
}

afs_nlinks(vp, l, cred)
    struct vnode	*vp;
    int			*l;
    struct ucred	*cred;
{
    struct vattr	vattr;
    int			error;

    error = VOP_GETATTR(vp, &vattr, cred);
    if (error) {
	return (error);
    }
    *l = vattr.va_nlink;
    return (0);
}
#endif	AFS_MACH_ENV

osi_zone_t afs_buf_zone;
osi_zone_t afs_fid_zone;
osi_zone_t afs_cell_zone;
osi_zone_t afs_conn_zone;
osi_zone_t afs_vcache_zone;
osi_zone_t afs_packet_zone;
osi_zone_t afs_server_zone;
osi_zone_t afs_osifile_zone;
osi_zone_t afs_unixuser_zone;
osi_zone_t afs_VenusFid_zone;

afs_zone_init()
{
	afs_buf_zone = osi_Zinit(sizeof(struct buf), "afs buf");
	afs_fid_zone = osi_Zinit(sizeof(struct fid), "afs fid");
	afs_cell_zone = osi_Zinit(sizeof(struct cell), "afs cell");
	afs_conn_zone = osi_Zinit(sizeof(struct conn), "afs conn");
	afs_vcache_zone = osi_Zinit(sizeof(struct vcache), "afs vcache");
	afs_packet_zone = osi_Zinit(osi_PACKETSIZE, "afs packet");
	afs_server_zone = osi_Zinit(sizeof(struct server), "afs server");
	afs_osifile_zone = osi_Zinit(sizeof(struct osi_file), "afs osifile");
	afs_unixuser_zone = osi_Zinit(sizeof(struct unixuser), "afs unixuser");
	afs_VenusFid_zone = osi_Zinit(sizeof(struct VenusFid), "afs VenusFid");
}
