/*
 * 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1988
 * LICENSED MATERIALS - PROPERTY OF IBM
 * REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
 */
/* $Header:afs_vnodeops.c 12.2$ */
/* $ACIS:afs_vnodeops.c 12.2$ */
/* $Source: /ibm/acis/usr/sys/afs/RCS/afs_vnodeops.c,v $ */

#ifndef lint
static char *rcsid = "$Header:afs_vnodeops.c 12.2$";
#endif

#include "../h/types.h"
#include "../h/param.h"
#include "../h/time.h"
#include "../h/kernel.h"
#include "../h/socket.h"
#include "../h/socketvar.h"
#include "../h/protosw.h"
#include "../h/dir.h"
#include "../h/user.h"
#include "../h/file.h"
#include "../h/uio.h"
#include "../h/buf.h"
#include "../h/vfs.h"
#include "../h/vnode.h"
#include "../ufs/inode.h"
#include "../netinet/in.h"
#include "../h/mbuf.h"
#include "../rpc/types.h"
#include "../rpc/xdr.h"
#ifdef sun
#include "../h/fcntl.h"
#endif

#include "../afs/osi.h"
#define RFTP_INTERNALS 1
#include "../afs/r.h"
#include "../afs/rftp.h"

#include "../afs/lock.h"
#include "../afs/volerrors.h"
#include "../afsint/rvice.h"
#include "../afsint/rvaux.h"
#include "../afs/afs.h"
#include "../afs/prs_fs.h"
#include "../afs/dir.h"

struct VenusFid afs_rootFid;
extern struct osi_dev cacheDev;
extern char *afs_indexFlags;
extern struct vcache *afs_FindVCache();
extern struct lock afs_xvcache;
extern long afs_missedCBs;
extern struct vcache *afs_NewVCache();
extern struct DirEntry *dir_GetBlob();
#ifdef	AFS_VFS32
extern int minphys();
#endif

int afs_open();
int afs_close();
int afs_rdwr();
int afs_xioctl();
int afs_select();
int afs_getattr();
int afs_setattr();
int afs_access();
int afs_lookup();
int afs_create();
int afs_remove();
int afs_link();
int afs_rename();
int afs_mkdir();
int afs_rmdir();
int afs_readdir();
int afs_symlink();
int afs_readlink();
int afs_fsync();
int afs_inactive();
int afs_bmap();
int afs_strategy();
int afs_badop();
int afs_noop();

#define	SMAR	    20			/* size of a mariner name */
#define	NMAR	    10			/* number of mariner names */
static char marinerNames[NMAR][SMAR];
static struct vcache *marinerVCs[NMAR];
static marinerPtr = 0;			/* pointer to next mariner slot to use */
static pagCounter = 0;

struct vnodeops afs_vnodeops = {
	afs_open,
	afs_close,
	afs_rdwr,
	afs_badop,
	afs_noop,
	afs_getattr,
	afs_setattr,
	afs_access,
	afs_lookup,
	afs_create,
	afs_remove,
	afs_link,
	afs_rename,
	afs_mkdir,
	afs_rmdir,
	afs_readdir,
	afs_symlink,
	afs_readlink,
	afs_fsync,
	afs_inactive,
	afs_bmap,
	afs_strategy,
	afs_badop,	/* bread */
	afs_badop,	/* brelse */
#ifdef AFS_VFS30
	minphys,
#endif
#ifdef AFS_VFS32
	afs_badop,	/* lock control */
	afs_noop,	/* fid */
	minphys,
#endif
#ifdef AFS_VFS34
	afs_badop,	/* lock ctrl */
	afs_noop,	/* fid */
#endif
};
#ifndef	vax
struct vnodeops *afs_ops = &afs_vnodeops;
#endif

/* vnode file operations, and our own */
extern int vno_rw();
extern int vno_ioctl();
extern int vno_select();
extern int vno_close();
extern int afs_closex();
struct fileops afs_fileops = {
    vno_rw,
    vno_ioctl,
    vno_select,
    afs_closex,
};

static char fileModeMap[8] = {
    PRSFS_ADMINISTER,
    PRSFS_ADMINISTER,
    PRSFS_ADMINISTER | PRSFS_WRITE,
    PRSFS_ADMINISTER | PRSFS_WRITE,
    PRSFS_ADMINISTER | PRSFS_READ,
    PRSFS_ADMINISTER | PRSFS_READ,
    PRSFS_ADMINISTER | PRSFS_READ | PRSFS_WRITE,
    PRSFS_ADMINISTER | PRSFS_READ | PRSFS_WRITE
};
afs_vnodeops_cleanup() {
    bzero(&afs_rootFid, sizeof(afs_rootFid));
    bzero(marinerNames, sizeof(marinerNames));
    bzero(marinerVCs, sizeof(marinerVCs));
    marinerPtr = 0;
    pagCounter = 0;
}

afs_InitReq(av, acred)
    register struct vrequest *av;
    register struct ucred *acred; {
    register long temp;
#ifdef AFS_SHORTGID
    register short *tptr;
#else
    register long *tptr;
#endif
    register int i;

#ifdef AFS_SHORTGID
    av->uid = acred->cr_uid;	/* default, if no pag is set */
    tptr = (short *) acred->cr_groups;
    for(i=0; i<NGROUPS; i++) {
	temp = *tptr++;
	if (((temp>>12) & 0xf) == 0xa){
	    av->uid = temp;
	    break;
	}
    }
    av->initd = 0;
#else
    av->uid = acred->cr_uid;	/* default, if no pag is set */
    tptr = (long *) acred->cr_groups;
    for(i=0; i<NGROUPS; i++) {
	temp = *tptr++;
	if (((temp>>24) & 0xff) == 'A'){
	    av->uid = temp;
	    break;
	}
    }
    av->initd = 0;
#endif
}

/* we don't send multiple read flocks to the server, but rather just count
    them up ourselves.  Of course, multiple write locks are incompatible.
    
    Note that we should always try to release a lock, even if we have
    a network problem sending the release command through, since often
    a lock is released on a close call, when the user can't retry anyway.
    
    At least once we dump it from our structure, it will no longer be
    kept alive, and the server should time it out within a few minutes.
*/
HandleFlock(avc, fd, acom, areq)
    register struct vcache *avc;
    struct file *fd;
    register struct vrequest *areq;
    register long acom; {
    register struct conn *tc;
    register long code;
    long lockType;

    code = 0;		/* default when we don't make any network calls */
    ObtainWriteLock(&avc->lock);
    if (acom & LOCK_UN) {
	if (avc->flockCount == 0) {
	    ReleaseWriteLock(&avc->lock);
	    return ENOTTY;	    /* no lock held */
	}
	/* unlock the lock */
	if (avc->flockCount > 0) avc->flockCount--;
	else if (avc->flockCount == -1) avc->flockCount = 0;
	if (avc->flockCount == 0) {
	    do {
		tc = afs_Conn(&avc->fid, areq);
#ifdef	NINTERFACE
		if (tc) code = AFS_ReleaseLock(tc->id, (struct AFSFid *) &avc->fid.Fid);
#else
		if (tc) code = RViceReleaseLock(tc->id, &avc->fid.Fid);
#endif
		else code = -1;
	    } while (afs_Analyze(tc, code, &avc->fid, areq));
	}
	fd->f_flag &= ~(FEXLOCK | FSHLOCK);
    }
    else {
	/* set a new lock */
	if ((avc->flockCount != 0 && (acom & LOCK_EX)) || (avc->flockCount < 0 && (acom & LOCK_SH))) {
	    /* incompatible modes, bounce */
	    code = EWOULDBLOCK;
	}
	else {
	    /* compatible here, try to send call to file server */
	    lockType = ((acom & LOCK_EX)? LockWrite : LockRead);
	    do {
		tc = afs_Conn(&avc->fid, areq);
#ifdef	NINTERFACE
		if (tc) code = AFS_SetLock(tc->id, (struct AFSFid *) &avc->fid.Fid, lockType);
#else
		if (tc) code = RViceSetLock(tc->id, &avc->fid.Fid, lockType);
#endif
		else code = -1;
	    } while (afs_Analyze(tc, code, &avc->fid, areq));
	}
	if (code == 0) {
	    if (acom & LOCK_EX) {
		avc->flockCount = -1;
		fd->f_flag |= FEXLOCK;
	    }
	    else {
		avc->flockCount++;
		fd->f_flag |= FSHLOCK;
	    }
	}
    }
    ReleaseWriteLock(&avc->lock);
    /* now there's a race here: we could have locked the file after someone else
	stored a file newer than the one we fetched from the server when we did
	our open syscall.  So, we update our stat info (including file's
	DataVersion number), and when we read data, we'll fetch new stuff
	from server if need be.
    */
    if (code == 0 && !(acom & LOCK_UN)) {
	/* if we're locking the file, check that we have the right data */
	code = afs_VerifyVCache(avc, areq);
	if (code) {
	    /* can't get right data, so better punt and unlock the file */
	    HandleFlock(avc, fd, LOCK_UN, areq);
	    /* don't care about the return code since we're failing anyway */
	}
    }
    return code;
}

static AddPag(aval)
    long aval; {
    register int i, foundSlot;
    register long temp;
#ifdef AFS_SHORTGID
    register short *tptr;
#else
    register long *tptr;
#endif

    u.u_cred = crcopy(u.u_cred);
#ifdef AFS_SHORTGID
    tptr = (short *) u.u_cred->cr_groups;
#else
    tptr = (long *) u.u_cred->cr_groups;
#endif
    foundSlot = 0;
    for(i=0;i<NGROUPS;i++,tptr++) {
	temp = *tptr;
#ifdef AFS_SHORTGID
	temp &= 0xffff;
#endif
	if (temp == NOGROUP) break;
#ifdef AFS_SHORTGID
	if (((temp >> 12) & 0xf) == 0xa) {
#else
	if (((temp >> 24) & 0xff) == 'A') {
#endif
	    foundSlot = 1;
	    break;
	}
    }
    if (i == NGROUPS) {
	/* if no free slots and no pag group slot, we fail. */
	return E2BIG;
    }
    else {
	/* otherwise, there is room, so setup slot properly */
	*tptr = aval;
	if (!foundSlot) {
	    /* maintain proper termination, if there is room */
	    if (i < NGROUPS-1) *(tptr+1) = NOGROUP;
	}
    }
    return 0;
}

#ifdef AFS_GETDIRHACK
/* routine for setting f_offset to correct value */
afs_AdvanceFD(afd, aoffset, asize)
    register struct file *afd;
    long aoffset;
    long asize; {
    register struct vcache *tvc;
    tvc = (struct vcache *) afd->f_data;
    if (vType(tvc) == VDIR && tvc->v.v_op == afs_ops) {
	afd->f_offset = aoffset << 4;
    }
    else afd->f_offset += asize;
}
#endif

afs_xsetgroups() {
    struct vrequest treq;

    afs_dp("in afs_xsetgroups\n");
    afs_InitReq(&treq, u.u_cred);
    setgroups();
#ifdef AFS_SHORTGID
    if (((treq.uid >> 12) & 0xf) == 0xa) {
#else
    if (((treq.uid >> 24) & 0xff) == 'A') {
#endif
	/* we've already done a setpag, so now we redo it */
	AddPag(treq.uid);
    }
}

afs_xflock () {
    struct a {
	int fd;
	int com;
    } *uap;
    struct file *fd;
    struct vrequest treq;
    struct vcache *tvc;
    int flockDone;
    
    flockDone = 0;
    uap = (struct a *)u.u_ap;

    fd = getf(uap->fd);
    if (!fd) return;

    afs_InitReq(&treq, u.u_cred);
    /* first determine whether this is any sort of vnode */
    if (fd->f_type == DTYPE_VNODE) {
	/* good, this is a vnode; next see if it is an AFS vnode */
	tvc = (struct vcache *) fd->f_data;	/* valid, given a vnode */
	if (tvc->v.v_op == afs_ops) {
	    /* This is an AFS vnode, so do the work */
	    u.u_error = HandleFlock(tvc, fd, uap->com, &treq);
	    flockDone = 1;
	    fd->f_ops = &afs_fileops;
	}
    }
    if (!flockDone) flock();
    return;
}

/*
 * Pags are implemented as follows: the set of groups whose long representation
 * is '41XXXXXX' hex are used to represent the pags.  Being a member of such
 * a group means you are authenticated as pag XXXXXX (note that 0x41 == 'A', for
 * Andrew file system).  You are never authenticated as multiple pags at once.
 *
 * The function afs_InitReq takes a credential field and formats the
 * corresponding venus request structure.  The uid field in the vrequest structure
 * is set to the *pag* you are authenticated as, or the uid, if you aren't
 * authenticated with a pag.
 *
 * The basic motivation behind pags is this: just because your unix uid is N doesn't mean
 * that you should have the same privileges as anyone logged in on the machine as user N,
 * since this would enable the superuser on the machine to sneak in and make use of
 * anyone's authentication info, even that which is only accidentally left behind
 * when someone leaves a public workstation.
 *
 * The Andrew file system doesn't use the unix uid for anything except a handle with
 * which to find the actual authentication tokens anyway, so the pag is an alternative
 * handle which is somewhat more secure (although of course not absolutely secure).
*/
afs_setpag () {
#ifdef AFS_SHORTGID
    u.u_error = AddPag((0xa << 12) + (pagCounter++ & 0xfff));
#else
    u.u_error = AddPag(('A' << 24) + (pagCounter++ & 0xffffff));
#endif
    afs_dp("setpag returning %d\n", u.u_error);
}

afs_AddMarinerName(aname, avc)
    register char *aname;
    register struct vcache *avc; {
    register int i;
    register char *tp;

    i = marinerPtr++;
    if (i >= NMAR) {
	i = 0;
	marinerPtr = 1;
    }
    tp = marinerNames[i];
    strncpy(tp, aname, SMAR);
    tp[SMAR-1] = 0;
    marinerVCs[i] = avc;
    return 0;
}

char *afs_GetMariner(avc)
    register struct vcache *avc; {
    register int i;
    for(i=0; i<NMAR; i++) {
	if (marinerVCs[i] == avc) {
	    return marinerNames[i];
	}
    }
    return "a file";
}

char *index(a, c)
    register char *a, c; {
    register char tc;
    while (tc = *a) {
	if (tc == c) return a;
	else a++;
    }
    return (char *) 0;
}

afs_noop() {
    afs_dp("autofail noop\n");
    return EREMOTE;
}

afs_badop() {
    afs_dp("autofail badop\n");
    panic("afs vnodeop");
}

/* given a vnode ptr, open flags and credentials, open the file */
afs_open(avcp, aflags, acred)
    register struct vcache **avcp;
    long aflags;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;
    register struct vcache *tvc;
    register struct dcache *tdc;
    long offset, len;
    int writing;
    
    afs_InitReq(&treq, acred);
    tvc = *avcp;
    afs_dp("open %x flags %x\n", tvc, aflags);
    code = afs_VerifyVCache(tvc, &treq);
    if (code) goto done;
    if (aflags & (FWRITE | FTRUNC)) writing = 1;
    else writing = 0;
    if (vType(tvc) == VDIR) {
	/* directory */
	if (writing) {
	    code = EISDIR;
	    goto done;
	}
	else {
	    if (!afs_AccessOK(tvc, PRSFS_LOOKUP, &treq)) {
		code = EACCES;
		goto done;
	    }
	}
    }
    else {
	/* normal file or symlink */
	if (writing) {
	    if (!afs_AccessOK(tvc, PRSFS_WRITE, &treq)) {
		code = EACCES;
		goto done;
	    }
	}
	if (!afs_AccessOK(tvc, PRSFS_READ, &treq)) {
	    code = EACCES;
	    goto done;
	}
    }
    /* set date on file if open in O_TRUNC mode */
    if (aflags & FTRUNC) {
	/* this fixes touch */
	ObtainWriteLock(&tvc->lock);
	tvc->m.Date = osi_Time();
	ReleaseWriteLock(&tvc->lock);
    }
    if (tvc->m.DataVersion == 0 && writing) {
	/* we just created this file, so make sure that we eventually store something back,
	    otherwise salvager will delete the fragment. */
	ObtainWriteLock(&tvc->lock);
	tdc = afs_GetDCache(tvc, 0, &treq, &offset, &len, 0);
	if (tdc) {
	    afs_indexFlags[tdc->index] |= IFDataMod;
	    afs_PutDCache(tdc);
	}
	ReleaseWriteLock(&tvc->lock);
    }
    ObtainReadLock(&tvc->lock);
    if (writing) tvc->execsOrWriters++;
    tvc->opens++;
    ReleaseReadLock(&tvc->lock);
done:
    return afs_CheckCode(code, &treq);
}

/* this stupid routine is used to release the flocks held on a particular
    file descriptor.  Sun doesn't pass file descr. info through to the vnode
    layer, and yet we must unlock flocked files on the *appropriate* (not first, as
    in System V) close call.  Thus this code.
    
    How does this code get invoked? The afs FLOCK plugs in the new afs file ops structure
    into any afs file when it gets flocked.  N.B: Intercepting close syscall doesn't trap
    aborts or exit system calls.
*/
afs_closex(afd)
    register struct file *afd; {
    struct vrequest treq;
    register struct vcache *tvc;
    long flags;
    int closeDone;
    long code;
    
    /* setup the credentials */
    afs_InitReq(&treq, u.u_cred);

    closeDone = 0;
    /* we're the last one.  If we're an AFS vnode, clear the flags, close the file and
      release the lock when done.  Otherwise, just let the regular close code work.
      */
    if (afd->f_type == DTYPE_VNODE) {
	tvc = (struct vcache *) afd->f_data;
	if (tvc->v.v_op == afs_ops) {
	    VN_HOLD((struct vnode *) tvc);
	    flags = afd->f_flag & (FSHLOCK | FEXLOCK);
	    afd->f_flag &= ~(FSHLOCK | FEXLOCK);
	    code = vno_close(afd);
	    if (flags) HandleFlock(tvc, afd, LOCK_UN, &treq);
	    VN_RELE((struct vnode *) tvc);
	    closeDone = 1;
	}
    }
    /* now, if close not done, do it */
    if (!closeDone) {
	code = vno_close(afd);
    }
    return code;	/* return code from vnode layer */
}

/* handle any closing cleanup stuff */
afs_close(avc, aflags, acred)
    register struct vcache *avc;
    long aflags;
    struct ucred *acred; {
    register long code;
    register struct brequest *tb;
    struct vrequest treq;
    int coredump;

    afs_dp("close file %x flags %x\n", avc, aflags);
    if (aflags == -1) {
	/* special flags means core dump */
	aflags = FWRITE;
	coredump = 1;
    }
    else coredump = 0;
    afs_InitReq(&treq, acred);
    ObtainReadLock(&avc->lock);	/* so write locker never sees opens change */
    avc->opens--;
    ReleaseReadLock(&avc->lock);
    code = 0;
    if (aflags & (FWRITE | FTRUNC)) {
	register struct dcache *tdc;
	/* don't do coredumps locally due to stack space limitations at VN_RELE time */
	if (afs_BBusy()	&& !coredump) {
	    /* do it yourself if daemons are all busy */
	    ObtainWriteLock(&avc->lock);
	    avc->execsOrWriters--;
	    if (avc->execsOrWriters == 0) {
		/* put the file back */
		tdc = afs_FindDCache(avc, 0);
		if (tdc) {
		    ConvertWToSLock(&avc->lock);
		    code = afs_StoreDCache(avc, tdc, &treq);
		    UpgradeSToWLock(&avc->lock);
		    tdc->f.states &= ~DWriting;
		    tdc->f.states |= DEntryMod;
		    if (code) {
			/* failed to store, invalidate bad cache info */
			avc->states &= ~CStatd;
			afs_indexFlags[tdc->index] &= ~IFDataMod;
			tdc->f.versionNo = -1;
		    }
		    afs_PutDCache(tdc);
		}
		else afs_dp("warning, no file on close\n");
	    }
	    ReleaseWriteLock(&avc->lock);
	}
	else {
	    /* at least one daemon is idle, so ask it to do the store.
		Also, note that queued routine BStore unlocks this entry */
	    ObtainSharedLock(&avc->lock);
	    tb = afs_BQueue(BOP_STORE, avc, 1, acred->cr_uid);
	    /* sleep waiting for the store to start, then retrieve error code */
	    while ((tb->flags & BUVALID) == 0) {
		tb->flags |= BUWAIT;
		osi_Sleep(tb);
	    }
	    code = tb->code;
	    afs_BRelease(tb);
	}

	/* VNOVNODE is "acceptable" error code from close, since
	    may happen when deleting a file on another machine while
	    it is open here. */
	if (code == VNOVNODE)
	    code = 0;
	
	/* some codes merit specific complaint */
	if (code < 0) {
	    uprintf("afs: failed to store file (network problems)\n");
	}
	else if (code == ENOSPC) {
	    uprintf("afs: failed to store file (partition full)\n");
	}
	else if (code == EDQUOT) {
	    uprintf("afs: failed to store file (over quota)\n");
	}
	else if (code != 0) uprintf("afs: failed to store file (%d)\n", code);

	/* finally, we flush any text pages lying around here; check coredump flag
	  just in case, to minimize stack usage in VN_RELE */
	if ((avc->states & CDataMod) && !coredump) {
	    avc->states	&= ~CDataMod;	/* do this first, in case of race */
	    osi_FlushText(avc);
	}
    }
    return afs_CheckCode(code, &treq);
}

/* called on writes */
afs_write(avc, auio, aio, acred)
    register struct vcache *avc;
    struct uio *auio;
    int aio;
    struct ucred *acred; {
    long totalLength;
    long transferLength;
    long filePos;
    long startDate;
    register struct dcache *tdc;
    long offset, len, error;
    struct uio tuio;
    struct osi_file *tfile;
    register long code;
    struct vrequest treq;

    if (avc->execsOrWriters <= 0) {
	/* bozos at Sun don't open core files for writing */
	afs_dp("opening core file\n");
	avc->states |= CCore;
	avc->opens++;		/* make file look open */
	avc->execsOrWriters++;
    }
    startDate = osi_Time();
    afs_InitReq(&treq, acred);
    /* otherwise we read */
    totalLength = auio->afsio_resid;
    filePos = auio->afsio_offset;
    error = 0;
    transferLength = 0;
    tuio.afsio_iov = auio->afsio_iov;
    tuio.afsio_iovcnt = auio->afsio_iovcnt;
    tuio.afsio_seg = auio->afsio_seg;
#ifdef AFS_UIOFMODE
    tuio.afsio_fmode = auio->afsio_fmode;
#endif
    afs_dp("W");
    afs_CheckSize(totalLength>>10);	/* totalLength bounds the amount we'll grow this file */
    ObtainWriteLock(&avc->lock);
    if (aio & IO_APPEND) {
	/* append mode, start it at the right spot */
	filePos = auio->afsio_offset = avc->m.Length;
    }
    avc->m.Date	= startDate;	/* avoid counting lock-waiting time in file date (for ranlib) */
    while (totalLength > 0) {
	/* read the cached info */
	tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 0);
	if (!tdc) {
	    error = EIO;
	    break;
	}
	afs_indexFlags[tdc->index] |= IFDataMod;    /* so it doesn't disappear */
	if (tdc->f.states & DWriting == 0) {
	    /* don't mark entry as mod if we don't have to */
	    tdc->f.states |= (DEntryMod | DWriting);
	}
	tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
	if (!tfile) panic("afswrite open");
	len = totalLength;	/* write this amount by default */
	if ((1<<avc->chunkShift) < len + offset) {
	    /* it won't all fit in this chunk, so write as much
		as will fit */
	    len = (1<<avc->chunkShift) - offset;
	    afs_dp("write doesn't all fit, chunksize is %d, newlen %d\n", 1<<avc->chunkShift, len);
	}
	/* mung uio structure to be right for this transfer */
	tuio.afsio_resid = len;
	tuio.afsio_offset = offset;
	code = VOP_RDWR(tfile->vnode, &tuio, UIO_WRITE, 0, &osi_cred);
	if (code) {
	    error = code;
	    tdc->f.versionNo = -1;	/* bad data */
	    osi_Truncate(tfile,	0);	/* fake truncate the segment */
	    afs_AdjustSize(tdc,	0);	/* sets f.chunkSize to 0 */
	    afs_indexFlags[tdc->index] &= ~IFDataMod;	/* don't bother storing back */
	    afs_PutDCache(tdc);
	    osi_Close(tfile);
	    break;
	}
	/* otherwise we've written some, fixup length, etc and continue with next seg */
	len = len - tuio.afsio_resid; /* compute amount really transferred */
	/* compute new file size */
	if (offset + len > tdc->f.chunkBytes)
	    afs_AdjustSize(tdc, offset+len);
	totalLength -= len;
	transferLength += len;
	filePos += len;
	if (filePos > avc->m.Length)
	    avc->m.Length = filePos;
	osi_Close(tfile);
	afs_PutDCache(tdc);
    }
    ReleaseWriteLock(&avc->lock);
    auio->afsio_resid -= transferLength;
    auio->afsio_offset += transferLength;
    return afs_CheckCode(error, &treq);
}

/* the read/write entry point */
afs_rdwr(avc, auio, arw, aio, acred)
    register struct vcache *avc;
    struct uio *auio;
    enum uio_rw arw;
    int aio;
    struct ucred *acred; {
    long totalLength;
    long transferLength;
    long filePos;
    register struct dcache *tdc;
    long offset, len, error;
    struct uio tuio;
    struct osi_file *tfile;
    register long code;
    struct vrequest treq;

    if (arw == UIO_WRITE) return afs_write(avc, auio, aio, acred);
#ifdef AFS_GETDIRHACK
    /* for systems that sometimes call read on dirs, intercept appropriately */
    if (vType(avc) == VDIR) {
	auio->afsio_offset >>= 4;   /* avoid block truncation effects in seekdir */
	return afs_readdir(avc, auio, acred);
    }
#endif
    if (avc->v.v_type == VREG)
	avc->states |= CDataMod;	/* data has been read since last text flush */
    afs_dp("R");
    afs_InitReq(&treq, acred);
    /* otherwise we read */
    totalLength = auio->afsio_resid;
    filePos = auio->afsio_offset;
    error = 0;
    transferLength = 0;
    tuio.afsio_iov = auio->afsio_iov;
    tuio.afsio_iovcnt = auio->afsio_iovcnt;
    tuio.afsio_seg = auio->afsio_seg;
#ifdef AFS_UIOFMODE
    tuio.afsio_fmode = auio->afsio_fmode;
#endif
    ObtainReadLock(&avc->lock);
    while (totalLength > 0) {
	/* read all of the cached info */
	if (filePos >= avc->m.Length) break;	/* all done */
	/* a tricky question: does the presence of the DFetching flag
	    mean that we're fetching the latest version of the file?  No.
	    The server could update the file as soon as the fetch responsible for
	    the setting of the DFetching flag completes.
	    
	    However, the presence of the DFetching flag (visible under a read lock
	    since it is set and cleared only under a write lock) means that we're fetching
	    as good a version as was known to this client at the time of the last call to
	    afs_VerifyVCache, since the latter updates the stat cache's m.DataVersion field
	    under a write lock, and from the time that the DFetching flag goes on (before
	    the fetch starts), to the time it goes off (after the fetch completes), afs_GetDCache
	    keeps at least a read lock (actually it keeps an S lock) on the cache entry.
	    
	    This means that if the DFetching flag is set, we can use that data for any reads
	    that must come from the current version of the file (current == m.DataVersion).
	     
	    Another way of looking at this same point is this: if we're fetching some
	    data and then try do an afs_VerifyVCache, the VerifyVCache operation will
	    not complete until after the DFetching flag is turned off and the dcache entry's
	     f.versionNo field is updated.
	     
	     Note, by the way, that if DFetching is set, m.DataVersion > f.versionNo (the
	     latter is not updated until after the fetch completes).
	*/
	tdc = (struct dcache *) 0;
	if (!afs_BBusy()) {
	    /* create dc entry; offset and len are *not* set in a type 2 request */
	    tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2);
	    if (tdc) {
		/* see if we need better data */
		if (avc->m.DataVersion != tdc->f.versionNo) {
		    /* now see if data is already coming in */
		    if (!(tdc->f.states & DFetching)) {
			/* data not yet moving, start bkg daemon to get some */
			tdc->f.states |= DFetchReq;
			afs_BQueue(BOP_FETCH, avc, 0, filePos, (char *) tdc);
			while (tdc->f.states & DFetchReq) {
			    afs_dp("waiting for fetch ack %x\n", tdc);
			    /* don't need waiting flag on this one */
			    ReleaseReadLock(&avc->lock);
			    osi_Sleep(&tdc->validPos);
			    ObtainReadLock(&avc->lock);
			}
		    }
		    /* data is now streaming in.  Wait for some interesting stuff. */
		    while ((tdc->f.states & DFetching) && tdc->validPos <= filePos) {
			afs_dp("waiting for data %x\n", avc);
			/* too early: wait for DFetching flag to vanish, or data to appear */
			tdc->f.states |= DWaiting;
			ReleaseReadLock(&avc->lock);
			osi_Sleep(&tdc->validPos);
			ObtainReadLock(&avc->lock);
		    }
		    /* fetching flag gone or data is here */
		    if (tdc->f.states & DFetching) {
			/* still fetching, some new data is here: compute length and offset */
			offset = filePos - (tdc->f.chunk << 10);
			len = tdc->validPos - filePos;
		    }
		    else {
			/* no longer fetching, verify data version (avoid new GetDCache call) */
			if (avc->m.DataVersion == tdc->f.versionNo) {
			    offset = filePos - (tdc->f.chunk << 10);
			    len = tdc->f.chunkBytes - offset;
			}
			else {
			    /* don't have current data, so get it below */
			    afs_PutDCache(tdc);
			    tdc = (struct dcache *) 0;
			}
		    }
		}
		else {
		    /* data is current */
		    offset = filePos - (tdc->f.chunk << 10);
		    len = tdc->f.chunkBytes - offset;
		}
	    }
	}
	if (!tdc) {
	    ReleaseReadLock(&avc->lock);
	    tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1);
	    ObtainReadLock(&avc->lock);
	}
	if (!tdc) {
	    error = EIO;
	    break;
	}
	if (len	<= 0) {	/* better safe than sorry */
	    afs_PutDCache(tdc);
	    break;
	}
	tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
	if (!tfile) panic("afsread open");
	if (len	> totalLength) len = totalLength;   /* will read len bytes */
	/* mung uio structure to be right for this transfer */
	tuio.afsio_resid = len;
	tuio.afsio_offset = offset;
	code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, &osi_cred);
	if (code) {
	    error = code;
	    afs_PutDCache(tdc);
	    osi_Close(tfile);
	    break;
	}
	/* otherwise we've read some, fixup length, etc and continue with next seg */
	len = len - tuio.afsio_resid; /* compute amount really transferred */
	totalLength -= len;
	transferLength += len;
	filePos += len;
	osi_Close(tfile);
	afs_PutDCache(tdc);
    }
    ReleaseReadLock(&avc->lock);
    auio->afsio_resid -= transferLength;
    auio->afsio_offset += transferLength;
    return afs_CheckCode(error, &treq);
}

/* copy out attributes from cache entry */
afs_CopyOutAttrs(avc, attrs)
    register struct vattr *attrs;
    register struct vcache *avc; {
    register struct volume *tvp;

    attrs->va_type = vType(avc);
    attrs->va_mode = avc->m.Mode;
    attrs->va_uid = avc->m.Owner;
    attrs->va_gid = 32767;	/* not really, means this is a vice file */
    attrs->va_fsid = 1;
    if (avc->mvstat == 2) {
        tvp = afs_GetVolume(&avc->fid, 0);
	/* The mount point's vnode. */
        if (tvp) {
	    attrs->va_nodeid = tvp->mtpoint.Fid.Vnode + (tvp->mtpoint.Fid.Volume << 16);
	    afs_PutVolume(tvp);
	}
	else attrs->va_nodeid = 0;
    }
    else attrs->va_nodeid = avc->fid.Fid.Vnode + (avc->fid.Fid.Volume << 16);
    attrs->va_nlink = avc->m.LinkCount;
    attrs->va_size = avc->m.Length;
    attrs->va_blocksize = 8192;
    attrs->va_atime.tv_sec = attrs->va_mtime.tv_sec = attrs->va_ctime.tv_sec = avc->m.Date;
    attrs->va_atime.tv_usec = attrs->va_mtime.tv_usec = attrs->va_ctime.tv_usec = 0;
    attrs->va_rdev = 1;
    attrs->va_blocks = (attrs->va_size? ((attrs->va_size + 1023)>>10) << 1 : 1);
    return 0;
}

afs_getattr(avc, attrs, acred)
    register struct vcache *avc;
    register struct vattr *attrs;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;

    afs_dp("getattr %x\n", avc);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(avc, &treq);
    if (code == 0) {
	code = afs_CopyOutAttrs(avc, attrs);
    }
    return afs_CheckCode(code, &treq);
}

/* merge useful properties into status block */
afs_VAttrToVS(av, avc, avs)
    register struct vattr *av;
    register struct ViceStatus *avs;
    register struct vcache *avc; {
    avs->Mode = (av->va_mode != ((unsigned short) -1)? (av->va_mode & 0xffff) : avc->m.Mode);
    avs->Owner = (av->va_uid != -1? av->va_uid : avc->m.Owner);
    avs->Length = (av->va_size != -1? av->va_size : avc->m.Length);
    avs->Date = (av->va_mtime.tv_sec != -1? av->va_mtime.tv_sec : avc->m.Date);
    return 0;
}

afs_setattr(avc, attrs, acred)
    register struct vcache *avc;
    register struct vattr *attrs;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    struct ViceStatus tvstat;
    register long code;
    long offset, len;
    struct osi_file *tfile;

    afs_dp("setattr %x\n", avc);
    if (avc->states & CRO) {
	code=EROFS;
	goto done;
    }
    afs_InitReq(&treq, acred);
    /* if file size has changed, we need write access, otherwise (e.g. chmod) give it
	a shot; if it fails, we'll discard the status info */
    if (attrs->va_size != 0xffffffff && !afs_AccessOK(avc, PRSFS_WRITE, &treq)) {
	code = EACCES;
	goto done;
    }
    afs_VAttrToVS(attrs, avc, &tvstat);	/* interpret request */
    code = 0;
    if (attrs->va_size != -1) {
	ObtainWriteLock(&avc->lock);
	/* don't set "setlocks" parm in getdcache, since we already have lock.  Flag 2 means that we don't care about actually fetching the data, since we're going to truncate the file immediately anyway. */
	tvstat.Date = avc->m.Date = osi_Time();   /* truncate modifies file */
	if (attrs->va_size == 0)
	    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 2);
	else
	    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 0);
	if (tdc) {
	    if (avc->m.Length > attrs->va_size) {
		avc->m.Length = attrs->va_size;
	    }
	    /* shrink the cached file */
	    if (tdc->f.chunkBytes > attrs->va_size) {
		afs_dp("truncating file %x locally\n", avc);
		tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
		if (tfile) {
		    afs_AdjustSize(tdc, attrs->va_size);
		    osi_Truncate(tfile, attrs->va_size);
		    osi_Close(tfile);
		}
	    }
	    /* now put the file back, if it is not open for writing */
	    if (avc->execsOrWriters > 0) {
		/* file is open for writing, close will store back */
		tdc->f.states |= (DWriting | DEntryMod);
		afs_indexFlags[tdc->index] |= IFDataMod;
	    }
	    else {
		/* file is not open now, so send back now */
		ConvertWToSLock(&avc->lock);
		code = afs_StoreDCache(avc, tdc, &treq);
		UpgradeSToWLock(&avc->lock);
	    }
	    afs_PutDCache(tdc);
	    /* purge page cache again */
	    if (avc->states & CDataMod) {
		avc->states &= ~CDataMod;	/* do this first, in case of race */
		osi_FlushText(avc);
	    }
	}
	ReleaseWriteLock(&avc->lock);
    }
    if (code == 0) {
	ObtainSharedLock(&avc->lock);	/* lock entry */
	code = afs_WriteVCache(avc, &tvstat, &treq);    /* send request */
	ReleaseSharedLock(&avc->lock);	/* release lock */
    }
    if (code) avc->states &= ~CStatd;	    /* error?  erase any changes we made to vcache entry */
done:
    return afs_CheckCode(code, &treq);
}

afs_access(avc, amode, acred)
    register struct vcache *avc;
    register long amode;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;

    afs_dp("access %x mode %x\n", avc, amode);
    afs_InitReq(&treq, acred);

    code = afs_VerifyVCache(avc, &treq);
    if (code) return afs_CheckCode(code, &treq);

    /* if we're looking for write access and we have a read-only file system, report it */
    if ((amode & VWRITE) && (avc->states & CRO)) {
	return EROFS;
    }
    code = 1;		/* Default from here on in is access ok. */
    if (vType(avc) == VDIR) {
	if (amode & VEXEC) code = afs_AccessOK(avc, PRSFS_LOOKUP, &treq);
	if (code && (amode & VWRITE)) {
	    code = afs_AccessOK(avc, PRSFS_INSERT, &treq);
	    if (!code) code = afs_AccessOK(avc, PRSFS_DELETE, &treq);
	}
	if (code && (amode & VREAD))
	    code = afs_AccessOK(avc, PRSFS_LOOKUP, &treq);
    }
    else {
	if (amode & VEXEC) {
	    code = afs_AccessOK(avc, PRSFS_READ, &treq);
	    if (code && (avc->m.Mode & 0100) == 0) code = 0;
	}
	if (code && (amode & VWRITE)) code = afs_AccessOK(avc, PRSFS_WRITE, &treq);
	if (code && (amode & VREAD)) code = afs_AccessOK(avc, PRSFS_READ, &treq);
    }
    if (code) return 0;		/* if access is ok */
    else return	afs_CheckCode(EACCES, &treq);		/* failure code */
}

/* call under write lock, evaluate mvid field from a mt pt */
EvalMountPoint(avc, advc, areq)
    register struct vcache *avc;
    struct vcache *advc;	    /* the containing dir */
    register struct vrequest *areq; {
    register long code;
    register struct volume *tvp;
    struct VenusFid tfid;
    struct cell *tcell;
    char *cpos;
    char type;

    if (avc->mvid && (avc->states & CMValid)) return 0;	/* done while racing */
    code = afs_HandleLink(avc, areq);
    if (code) return code;
    /* now link name is inavc->linkData, get appropriate volume info */
    type = avc->linkData[0];
    cpos = index(&avc->linkData[1], ':');
    if (cpos) {
	/* parse cellular mt point */
	*cpos = 0;
	code = 0;
	tcell = afs_GetCellByName(&avc->linkData[1]);
	if (tcell) {
	    tvp = afs_GetVolumeByName(cpos+1, tcell->cell, 1, areq);
	}
	else {
	    code = ENODEV;
	}
	*cpos =	':';	    /* put it back */
	if (code) return code;
    }
    else {
	tvp = afs_GetVolumeByName(&avc->linkData[1], avc->fid.Cell, 1, areq);
    }
    if (!tvp) {
	return ENOENT;
    }
    /* don't allocate mvid field until we're sure we have something to put in */
    if (avc->states & CRO) {
	if (tvp->states	& VBackup) {	/* watch for backups within backups */
	    afs_PutVolume(tvp);
	    return ELOOP;
	}
	if (type == '#' && tvp->roVol) {
	    /* there is a readonly volume, and we should map to it */
	    tfid.Fid.Volume = tvp->roVol;	/* remember volume we really want */
	    tfid.Cell = tvp->cell;
	    afs_PutVolume(tvp); /* release the old volume */
	    tvp = afs_GetVolume(&tfid, areq); /* get the new one */
	    if (!tvp) {
		return ENOENT;    /* oops, can't do it */
	    }
	}
    }
    if (avc->mvid == 0)
	avc->mvid = (struct VenusFid *) osi_Alloc(sizeof(struct VenusFid));
    avc->mvid->Cell = tvp->cell;
    avc->mvid->Fid.Volume = tvp->volume;
    avc->mvid->Fid.Vnode = 1;
    avc->mvid->Fid.Unique = 1;
    avc->states |= CMValid;
    tvp->mtpoint = avc->fid;		/* setup back pointer to mtpoint */
    tvp->dotdot = advc->fid;
    afs_PutVolume(tvp);
    return 0;
}
    
ENameOK(aname)
    register char *aname; {
    register char tc;
    if (aname[0] == '@') {
	aname++;
	if (!strncmp(aname, "cputype", 7)) return 0;
	if (!strncmp(aname, "sys", 3)) return 0;
	if (!strncmp(aname, "hostname", 8)) return 0;
    }
    /* check remainder of name for bogus chars */
    while(tc = (*aname++)) {
	if (tc & 0x80) return 0;
    }
    return 1;
}

HandleAtName(aname, aresult)
    register char *aname;
    register char *aresult; {
    if (!strncmp(aname, "@sys", 4)) {
#ifdef	sun
#ifdef	mc68020
	strcpy(aresult, "sun3_35");
	strcpy(aresult+7, aname+4);
#else
	strcpy(aresult, "sun2_35");
	strcpy(aresult+7, aname+4);
#endif
#endif	sun
#ifdef  ibm032
	strcpy(aresult, "rt_r3");
	strcpy(aresult+5, aname+4);
#endif
#ifdef	vax
	strcpy(aresult, "vax_22");
	strcpy(aresult+6, aname+4);
#endif	vax
    }
    else if (!strncmp(aname, "@cputype", 8)) {
#ifdef	sun
#ifdef	mc68020
	strcpy(aresult, "sun3");
	strcpy(aresult+4, aname+8);
#else
	strcpy(aresult, "sun2");
	strcpy(aresult+4, aname+8);
#endif
#endif	sun
#ifdef ibm032
	strcpy(aresult, "ibm032");
	strcpy(aresult+6, aname+8);
#endif
#ifdef	vax
	strcpy(aresult, "vax2");
	strcpy(aresult+4, aname+8);
#endif
    }
    else if (!strncmp(aname, "@hostname", 9)) {
	strcpy(aresult,hostname);
    }
    else strcpy(aresult, aname);
}

afs_lookup(adp, aname, avcp, acred)
    register struct vcache *adp, **avcp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    char *tname;
    register struct vcache *tvc;
    struct vcache *uvc;
    register long code;
    int evalFlag;
    register struct dcache *tdc;
    long dirOffset, dirLen, theDir;
    struct VenusFid tfid;

    /* lookup the name aname in the appropriate dir, and return a cache entry
      on the resulting fid */
    afs_dp("lookup dir %x, name %s\n", adp, aname);
    afs_InitReq(&treq, acred);
    if (aname[0] == '@') {
	tname = (char *) osi_AllocSendSpace();
	HandleAtName(aname, tname);
    }
    else {
	tname = aname;
    }
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    /* watch for ".." in a volume root */
    if (adp->mvstat == 2 && tname[0] == '.' && tname[1] == '.' && tname[2] == 0) {
	/* looking up ".." in root via special hacks */
	if (adp->mvid == (struct VenusFid *) 0 || adp->mvid->Fid.Volume == 0) {
	    afs_dp("afs: punting on '..' call for %x\n", adp);
	    code = ENOTTY;
	    goto done;
	}
	/* otherwise we have the fid here, so we use it */
	tvc = afs_GetVCache(adp->mvid, &treq);
	if (!tvc) {
	    code = ENOENT;
	    goto done;
	}
	else {
	    *avcp = tvc;
	    code = 0;
	    goto done;
	}
    }
    /* now check the access */
    if (!afs_AccessOK(adp, PRSFS_LOOKUP, &treq)) {
	code = EACCES;
	goto done;
    }
    /* now we have to lookup the next fid */
    tdc = afs_GetDCache(adp, 0, &treq, &dirOffset, &dirLen, 1);
    if (!tdc) {
	code = EIO;
	goto done;
    }
    /* now we will just call dir package with appropriate inode.
      Dirs are always fetched in their entirety for now */
    ObtainReadLock(&adp->lock);
    theDir = tdc->f.inode;
    code = dir_Lookup(&theDir, tname, &tfid.Fid);
    ReleaseReadLock(&adp->lock);
    afs_PutDCache(tdc);
    if (code) {
	goto done;
    }
    /* new fid has same cell and volume */
    tfid.Cell = adp->fid.Cell;
    tfid.Fid.Volume = adp->fid.Fid.Volume;
    afs_dp("lookup fid is %x.%x.%x.%x\n", tfid.Cell, tfid.Fid.Volume, tfid.Fid.Vnode, tfid.Fid.Unique);
    /* now get the status info */
    tvc = afs_GetVCache(&tfid, &treq);
    if (tvc) {
	tvc->parentVnode = adp->fid.Fid.Vnode;
	tvc->parentUnique = adp->fid.Fid.Unique;
	if (tvc->mvstat == 1) {
	    afs_dp("crossing mt point %x\n", tvc);
	    /* a mt point, possibly unevaluated */
	    if (tvc->mvid == (struct VenusFid *) 0 || !(tvc->states & CMValid)) {
		afs_dp("trying to eval mt point\n");
		ObtainWriteLock(&tvc->lock);
		code = EvalMountPoint(tvc, adp, &treq);
		if (code) afs_dp("evalmountpoint error %d\n", code);
		ReleaseWriteLock(&tvc->lock);
		evalFlag = 1;
	    }
	    else evalFlag = 0;
	    /* next, we want to continue using the target of the mt point */
	    if (tvc->mvid) {
		/* now lookup target, to set .. pointer */
		afs_dp("mount point is to %x.%x.%x.%x\n", tvc->fid.Cell, tvc->fid.Fid.Volume, tvc->fid.Fid.Vnode, tvc->fid.Fid.Unique);
		uvc = tvc;	/* remember for later */
		tvc = afs_GetVCache(tvc->mvid, &treq);
		afs_PutVCache(uvc); /* we're done with it */
		if (!tvc) {
		    code = ENOENT;
		    goto done;
		}
		/* now, if we came via a new mt pt (say because of a new release of a R/O volume), we must reevaluate the .. ptr to point back to the appropriate place */
		if (evalFlag) {
		    ObtainWriteLock(&tvc->lock);
		    if (tvc->mvid == (struct VenusFid *) 0) {
			tvc->mvid = (struct VenusFid *) osi_Alloc(sizeof(struct VenusFid));
		    }
		    /* setup backpointer */
		    *tvc->mvid = adp->fid;
		    ReleaseWriteLock(&tvc->lock);
		}
	    }
	    else {
		afs_dp("failed to find mt pt\n");
		afs_PutVCache(tvc);
		code = ENOENT;
		goto done;
	    }
	}
	*avcp = tvc;
	afs_dp("lookup returning ce %x\n", tvc);
	code = 0;
    }
    else code = ENOENT;
done:
    /* put the network buffer back, if need be */
    if (tname != aname) osi_FreeSendSpace(tname);
    if (code == 0) {
	afs_AddMarinerName(aname, tvc);
    }
    return afs_CheckCode(code, &treq);
}

afs_create(adp, aname, attrs, aexcl, amode, avcp, acred)
    register struct vcache *adp;
    char *aname;
    struct vattr *attrs;
    enum vcexcl aexcl;
    int amode;
    struct vcache **avcp;
    struct ucred *acred; {
    long origCBs;
    struct vrequest treq;
    register long code;
    register struct conn *tc;
    struct ViceFid bidFid;
    struct VenusFid newFid;
    struct ViceStatus vstat, dstat;
#ifdef	NINTERFACE
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
    struct AFSCallBack CallBack;
    long now;
#endif
    register struct dcache *tdc;
    long offset, len, host;
    register struct vcache *tvc;

    afs_dp("creating file %s in %x with mode %x\n", aname, adp, amode);
    if (!ENameOK(aname)) {
	code = EINVAL;
	goto done;
    }
    bidFid.Volume = 0;
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;

    tdc = afs_GetDCache(adp, 0, &treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock);
    if (tdc) {
	/* see if file already exists.  If it does, we only set the size attributes (to handle O_TRUNC) */
	code = dir_Lookup(&tdc->f.inode, aname, &newFid.Fid);
	if (code == 0) {
	    afs_PutDCache(tdc);
	    ReleaseWriteLock(&adp->lock);
	    if (aexcl != NONEXCL) {
		code = EEXIST;
		goto done;
	    }
	    /* found the file, so use it */
	    newFid.Cell = adp->fid.Cell;
	    newFid.Fid.Volume = adp->fid.Fid.Volume;
	    tvc = afs_GetVCache(&newFid, &treq);
	    if (tvc) {
		len = attrs->va_size;	/* only do the truncate */
		if (len != 0xffffffff) {
		    /* do a truncate */
		    vattr_null(attrs);
		    attrs->va_size = len;
		    tvc->execsOrWriters++;	/* pretend we are writing, to avoid store */
		    code = afs_setattr(tvc, attrs, acred);
		    tvc->execsOrWriters--;
		    if (code) {
			afs_PutVCache(tvc);
			goto done;
		    }
		}
		*avcp = tvc;
	    }
	    else code = ENOENT;
	    /* make sure vrefCount bumped only if code == 0 */
	    goto done;
	}
    }

    origCBs = afs_missedCBs;	/* if changes, we don't really have a callback */
    vstat.Date = osi_Time();
    vstat.Owner = 0;
    vstat.Mode = attrs->va_mode & 0xffff;   /* only care about protection bits */
    /* why do we hold the afs_xvcache lock for the entire create operation?
	Everywhere else we obtain a callback, we first hold the appropriate cache
	entry, so that we can detect race conditions when a break callback message
	hits simultaneously with the callback-obtaining fetch (or whatever) call.
	However, this is impossible with create, since we don't know the fid of the new file
	until the operation finishes.  We rely on the afs_missedCBs field to tell us if a break
	callback message arrived for this file before we created the entry, but unless
	we hold the afs_xvcache entry, someone could create and delete the entry
	before we see it.  The result would be that we would think we have a callback, but the
	break callback message could have been applied to the ephemeral cache entry.
	
	Thus we hold the lock (afs_xvcache) that prevents others from creating or deleting
	new cache entries.  We only need it in R mode during the create, but need it in
	W mode when we actually create the entry below.  We should really use S mode if
	this is a contention hot-spot. */
    ObtainWriteLock(&afs_xvcache);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    host = tc->server->host;	    /* remember for callback processing */
#ifdef	NINTERFACE
	    ViceStToAFSStoreSt(&vstat, &InStatus);
	    now = osi_Time();
	    code = AFS_CreateFile(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, &OutFidStatus, &OutDirStatus, &CallBack);
	    AFSFetchStToViceSt(&OutFidStatus, &vstat);
	    AFSFetchStToViceSt(&OutDirStatus, &dstat);
	    AFSCallBackStToViceSt(&CallBack, &vstat);
	    vstat.CallBackTime += now;
#else
	    code = RViceCreate(tc->id, &adp->fid.Fid, &bidFid, aname, &vstat, &newFid.Fid, &dstat);
#endif
	}
	else code = -1;
    } while(afs_Analyze(tc, code, &adp->fid, &treq));
    if (code == EEXIST && aexcl == NONEXCL) {
	/* if we get an EEXIST in nonexcl mode, just do a lookup */
	if (tdc) afs_PutDCache(tdc);
	ReleaseWriteLock(&adp->lock);
	ReleaseWriteLock(&afs_xvcache);
	code = afs_lookup(adp, aname, avcp, acred);
	goto done;
    }
    if (code) {
	ReleaseWriteLock(&adp->lock);
	ReleaseWriteLock(&afs_xvcache);
	if (tdc) afs_PutDCache(tdc);
	goto done;
    }
    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &dstat, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &newFid.Fid);
	    if (code) {
		tdc->f.versionNo = -1;	/* surprise error -- use invalid value */
		/* DEntryMod set by local hero */
	    }
	}
	afs_PutDCache(tdc);
    }
    newFid.Cell = adp->fid.Cell;
    newFid.Fid.Volume = adp->fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);
    /* If the RViceCreate worked (newFid has a new fid to use), then we shouldn't have
	the entry in the vcache, since the afs_xvcache lock has been held to prevent the
	creation of new entries since the file has been created.  Thus we can ignore the possibility
	that the cache entry already exists. */
    tvc = afs_NewVCache(&newFid, host);
    if (tvc) {
	/* add the callback in */
	if (origCBs == afs_missedCBs) {
	    tvc->states	|= CStatd;  /* we've fake entire thing, so don't stat */
	    tvc->cbExpires = vstat.CallBackTime;
	}
	else tvc->callback = 0;
	afs_ProcessVStat(tvc, &vstat);
	*avcp = tvc;
	code = 0;
    }
    else code = ENOENT;
    ReleaseWriteLock(&afs_xvcache);
done:
    if (code == 0) {
	afs_AddMarinerName(aname, *avcp);
	/* return the new status in vattr (barf) */
	afs_CopyOutAttrs(*avcp, attrs);
    }
    return afs_CheckCode(code, &treq);
}

/*
 * Check to see if we can track the change locally: requires that
 * we have sufficiently recent info in data cache.  If so, we
 * know the new DataVersion number, and place it correctly in both the
 * data and stat cache entries.  This routine returns 1 if we should
 * do the operation locally, and 0 otherwise.
 *
 * This routine must be called with the stat cache entry write-locked.
 */
afs_LocalHero(avc, adc, astat, aincr)
    register struct vcache *avc;
    register ViceStatus *astat;
    register struct dcache *adc;
    register int aincr; {
    register long avers;

    avers = astat->DataVersion;
    /* this *is* the version number, no matter what */
    avc->m.DataVersion = avers;
    avc->m.Length = astat->Length;
    avc->m.Date = astat->Date;
    adc->f.states |= DEntryMod;
    if (avers == adc->f.versionNo + aincr) {
	/* we've been tracking things correctly */
	adc->f.versionNo = avers;
	return 1;
    }
    else {
	adc->f.versionNo = -1;
	return 0;
    }
}

afs_remove(adp, aname, acred)
    register struct vcache *adp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    struct ViceStatus dstat;
    register struct dcache *tdc;
    struct VenusFid unlinkFid;
    register long code;
    register struct conn *tc;
    long offset, len;
#ifdef	NINTERFACE
    struct AFSFetchStatus OutDirStatus;
#endif

    afs_dp("remove dir %x, name %s\n", adp, aname);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) return afs_CheckCode(code, &treq);
    tdc	= afs_GetDCache(adp, 0,	&treq, &offset,	&len, 1);  /* test for error below */
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
#ifdef	NINTERFACE
	    code = AFS_RemoveFile(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &OutDirStatus);
	    AFSFetchStToViceSt(&OutDirStatus, &dstat);	    
#else
	    code = RViceRemove(tc->id, &adp->fid.Fid, aname, &dstat);
#endif
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (tdc) afs_PutDCache(tdc);
	ReleaseWriteLock(&adp->lock);
	return afs_CheckCode(code, &treq);
    }
    unlinkFid.Fid.Vnode = 0;
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &dstat, 1)) {
	    /* we can do it locally */
	    code = dir_Lookup(&tdc->f.inode, aname, &unlinkFid.Fid);
	    if (code) unlinkFid.Fid.Vnode = 0;
	    else {
		unlinkFid.Cell = adp->fid.Cell;
		unlinkFid.Fid.Volume = adp->fid.Fid.Volume;
	    }
	    code = dir_Delete(&tdc->f.inode, aname);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- invalid value */
	    /* DEntryMod set by local hero */
	}
	afs_PutDCache(tdc);	/* drop ref count */
    }
    ReleaseWriteLock(&adp->lock);
    /* now, get vnode for unlinked dude, and see if we should force it from cache.  adp is
	now the deleted files vnode.  Note that we call FindVCache instead of GetVCache
	since if the file's really gone, we won't be able to fetch the status info anyway.  */
    ObtainReadLock(&afs_xvcache);
    adp = afs_FindVCache(&unlinkFid);
    ReleaseReadLock(&afs_xvcache);
    if (adp) {
	ObtainWriteLock(&adp->lock);
	adp->states &= ~CStatd;	/* link count, ctime are wrong; we don't have correct value */
	afs_TryToSmush(adp);
	ReleaseWriteLock(&adp->lock);
	afs_PutVCache(adp);
    }
    return 0;
}

afs_link(avc, adp, aname, acred)
    register struct vcache *avc;
    register struct vcache *adp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    struct ViceStatus dstat, ustat;
    register struct dcache *tdc;
    register long code;
    register struct conn *tc;
    long offset, len;
#ifdef	NINTERFACE
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
#endif

    afs_dp("hard link %x in dir %x with name %s\n", avc, adp, aname);
    /* create a hard link; new entry is aname in dir adp */
    afs_InitReq(&treq, acred);
    if (avc->fid.Cell != adp->fid.Cell || avc->fid.Fid.Volume != adp->fid.Fid.Volume) {
	code = EXDEV;
	goto done;
    }
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    tdc	= afs_GetDCache(adp, 0,	&treq, &offset,	&len, 1);  /* test for error below */
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
#ifdef	NINTERFACE
	    code = AFS_Link(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, (struct AFSFid *) &avc->fid.Fid, &OutFidStatus, &OutDirStatus);
	    AFSFetchStToViceSt(&OutFidStatus, &ustat);
	    AFSFetchStToViceSt(&OutDirStatus, &dstat);
#else
	    code = RViceLink(tc->id, &adp->fid.Fid, aname, &avc->fid.Fid, &ustat, &dstat);
#endif
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (tdc) afs_PutDCache(tdc);
	ReleaseWriteLock(&adp->lock);
	goto done;
    }
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &dstat, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &avc->fid.Fid);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- invalid value */
	}
	afs_PutDCache(tdc);	/* drop ref count */
    }
    ReleaseWriteLock(&adp->lock);
    ObtainWriteLock(&avc->lock);    /* correct link count */
    /* we could lock both dir and file; since we get the new fid status back, you'd think
	we could put it in the cache status entry at that point.  Note that if we don't lock the
	file over the rpc call, we have no guarantee that the status info returned in ustat
	is the most recent to store in the file's cache entry */
    avc->states	&= ~CStatd;	/* don't really know new link count */
    ReleaseWriteLock(&avc->lock);
    code = 0;
done:
    return afs_CheckCode(code, &treq);
}

afs_rename(aodp, aname1, andp, aname2, acred)
    register struct vcache *aodp, *andp;
    char *aname1, *aname2;
    struct ucred *acred; {
    struct vrequest treq;
    register struct conn *tc;
    register long code;
    long returnCode;
    int oneDir, doLocally;
    struct ViceStatus dstat1, dstat2;
    struct VenusFid unlinkFid, fileFid;
    struct vcache *tvc;
    struct dcache *tdc1, *tdc2;
    long offset, len;
#ifdef	NINTERFACE
    struct AFSFetchStatus OutOldDirStatus, OutNewDirStatus;
#endif
    
    afs_InitReq(&treq, acred);

    afs_dp("rename d1 %x name %s to d2 %x name %s\n", aodp, aname1, andp, aname2);
    /* verify the latest versions of the stat cache entries */
    code = afs_VerifyVCache(aodp, &treq);
    if (code) goto done;
    code = afs_VerifyVCache(andp, &treq);
    if (code) goto done;
    
    /* lock in appropriate order, after some checks */
    if (aodp->fid.Cell != andp->fid.Cell || aodp->fid.Fid.Volume != andp->fid.Fid.Volume) {
	code = EXDEV;
	goto done;
    }
    oneDir = 0;
    if (andp->fid.Fid.Unique == aodp->fid.Fid.Unique) {
	ObtainWriteLock(&andp->lock);
	oneDir = 1;	    /* only one dude locked */
    }
    else if (andp->fid.Fid.Unique < aodp->fid.Fid.Unique) {
	ObtainWriteLock(&andp->lock);	/* lock smaller one first */
	ObtainWriteLock(&aodp->lock);
    }
    else {
	ObtainWriteLock(&aodp->lock);	/* lock smaller one first */
	ObtainWriteLock(&andp->lock);
    }
    
    /* locks are now set, proceed to do the real work */
    do {
	tc = afs_Conn(&aodp->fid, &treq);
#ifdef	NINTERFACE
	if (tc) {
	    code = AFS_Rename(tc->id, (struct AFSFid *) &aodp->fid.Fid, aname1, (struct AFSFid *) &andp->fid.Fid, aname2, &OutOldDirStatus, &OutNewDirStatus);
	    AFSFetchStToViceSt(&OutOldDirStatus, &dstat1);
	    AFSFetchStToViceSt(&OutNewDirStatus, &dstat2);
	} else code = -1;

#else
	if (tc) code = RViceRename(tc->id, &aodp->fid.Fid, aname1, &andp->fid.Fid, aname2, &dstat1, &dstat2);
	else code = -1;
#endif
    } while (afs_Analyze(tc, code, &andp->fid, &treq));
    returnCode = code;	    /* remember for later */
    
    /* Now we try to do things locally.  This is really loathsome code. */
    if (code == 0) {
	tdc1 = tdc2 = 0;
	tdc1 = afs_GetDCache(aodp, 0, &treq, &offset, &len, 0);
	if (!oneDir) tdc2 = afs_GetDCache(andp, 0, &treq, &offset, &len, 0);
	else tdc2 = tdc1;
	if (tdc1 && tdc2) {
	    /* see if version numbers increased properly */
	    doLocally = 0;
	    unlinkFid.Fid.Vnode = 0;
	    if (oneDir) {
		/* number increases by 1 for whole rename operation */
		if (afs_LocalHero(aodp, tdc1, &dstat1, 1)) {
		    doLocally = 1;
		}
	    }
	    else {
		/* two separate dirs, each increasing by 1 */
		if (afs_LocalHero(aodp, tdc1, &dstat1, 1) && afs_LocalHero(andp, tdc2, &dstat2, 1))
		    doLocally = 1;
		else {
		    /* local hero might not have invalidated both */
		    tdc1->f.versionNo = -1;
		    tdc2->f.versionNo = -1;
		}
	    }
	    /* now really do the work */
	    if (doLocally) {
		/* first lookup the fid of the dude we're moving */
		code = dir_Lookup(&tdc1->f.inode, aname1, &fileFid.Fid);
		if (code == 0) {
		    /* delete the source */
		    code = dir_Delete(&tdc1->f.inode, aname1);
		    if (code) tdc1->f.versionNo = -1;   /* mysterious failure */
		    /* first see if target is there */
		    code = dir_Lookup(&tdc2->f.inode, aname2, &unlinkFid.Fid);
		    if (code == 0) {
			/* target already exists, and will be unlinked by server */
			code = dir_Delete(&tdc2->f.inode, aname2);
			if (code) tdc2->f.versionNo = -1;	/* oops */
		    }
		    code = dir_Create(&tdc2->f.inode, aname2, &fileFid.Fid);
		    if (code) tdc2->f.versionNo = -1;
		}
	    }
	}
	if (tdc1) afs_PutDCache(tdc1);
	if ((!oneDir) && tdc2) afs_PutDCache(tdc2);
    }
    /* update dir link counts */
    aodp->m.LinkCount = dstat1.LinkCount;
    if (!oneDir)
	andp->m.LinkCount = dstat2.LinkCount;

    /* release locks */
    ReleaseWriteLock(&aodp->lock);
    if (!oneDir) ReleaseWriteLock(&andp->lock);
    
    /* now, some more details.  if unlinkFid.Fid.Vnode then we should decrement
	the link count on this file.  Note that if fileFid is a dir, then we don't
	have to invalidate its ".." entry, since its DataVersion # should have
	changed. However, interface is not good enough to tell us the
	*file*'s new DataVersion, so we're stuck.  Our hack: delete mark
	the data as having an "unknown" version (effectively discarding the ".."
	entry */
    if (unlinkFid.Fid.Vnode) {
	unlinkFid.Fid.Volume = aodp->fid.Fid.Volume;
	unlinkFid.Cell = aodp->fid.Cell;
	tvc = afs_GetVCache(&unlinkFid, &treq);
	if (tvc) {
	    ObtainWriteLock(&tvc->lock);
	    tvc->states	&= ~CStatd; /* don't have old name's file's new status */
	    /* if this was last guy (probably) discard from cache.  Never hurts
		to do this, even if this isn't the last link: at worst we'll fetch
		the file again. TryToSmush won't get rid of the file if it is
		important (e.g. modified or locked) */
	    if (tvc->m.LinkCount == 1) {
		/* try to discard from cache to save space */
		afs_TryToSmush(tvc);
	    }
	    ReleaseWriteLock(&tvc->lock);
	    afs_PutVCache(tvc);
	}
    }

    /* now handle ".." invalidation */
    if (!oneDir) {
	fileFid.Fid.Volume = aodp->fid.Fid.Volume;
	fileFid.Cell = aodp->fid.Cell;
	tvc = afs_GetVCache(&fileFid, &treq);
	if (tvc && (vType(tvc) == VDIR)) {
	    tdc1 = afs_FindDCache(tvc, 0);
	    if (tdc1) {
		tdc1->f.versionNo = -1;	/* mark it as unknown */
		tdc1->f.states |= DEntryMod;
		afs_PutDCache(tdc1);	/* put it back */
	    }
	    afs_PutVCache(tvc);
	}
    }
    code = returnCode;
done:
    return afs_CheckCode(code, &treq);
}

afs_mkdir(adp, aname, attrs, avcp, acred)
    register struct vcache *adp;
    register struct vcache **avcp;
    char *aname;
    struct vattr *attrs;
    struct ucred *acred; {
    struct vrequest treq;
    register long code;
    register struct conn *tc;
    struct VenusFid newFid;
    struct ViceStatus vstat, dstat;
    register struct dcache *tdc;
    long offset, len;
    register struct vcache *tvc;
#ifdef	NINTERFACE
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
    struct AFSCallBack CallBack;
    long now;
#endif

    afs_dp("mkdir %s in %x\n", aname, adp);
    if(!ENameOK(aname)) {
	code = EINVAL;
	goto done;
    }
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    vstat.Date = osi_Time();
    vstat.Owner = 0;
    vstat.Mode = attrs->va_mode & 0xffff;   /* only care about protection bits */
    tdc = afs_GetDCache(adp, 0, &treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
#ifdef	NINTERFACE
	    ViceStToAFSStoreSt(&vstat, &InStatus);
	    now = osi_Time();
	    code = AFS_MakeDir(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, &OutFidStatus, &OutDirStatus, &CallBack);
	    AFSFetchStToViceSt(&OutFidStatus, &vstat);
	    AFSFetchStToViceSt(&OutDirStatus, &dstat);
	    AFSCallBackStToViceSt(&CallBack, &vstat);
	    vstat.CallBackTime += now;
/* DON'T forget to Set the callback value... */
#else
	    code = RViceMakeDir(tc->id, &adp->fid.Fid, aname, &vstat, &newFid.Fid, &dstat);
#endif
	}
	else code = -1;
    } while(afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	ReleaseWriteLock(&adp->lock);
	if (tdc) afs_PutDCache(tdc);
	goto done;
    }
    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &dstat, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &newFid.Fid);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- use invalid value */
	}
	afs_PutDCache(tdc);
    }
    adp->m.LinkCount = dstat.LinkCount;
    newFid.Cell = adp->fid.Cell;
    newFid.Fid.Volume = adp->fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);
    /* now we're done with parent dir, create the real dir's cache entry */
    tvc = afs_GetVCache(&newFid, &treq);
    if (tvc) {
	code = 0;
	*avcp = tvc;
    }
    else code = ENOENT;
done:
    return afs_CheckCode(code, &treq);
}

afs_rmdir(adp, aname, acred)
    register struct vcache *adp;
    char *aname;
    struct ucred *acred; {
    struct vrequest treq;
    struct ViceStatus dstat;
    register struct dcache *tdc;
    register long code;
    register struct conn *tc;
    long offset, len;
#ifdef	NINTERFACE
    struct AFSFetchStatus OutDirStatus;
#endif

    afs_dp("rmdir of %s from %x\n", aname, adp);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) goto done;
    tdc	= afs_GetDCache(adp, 0,	&treq, &offset,	&len, 1);	/* test for error below */
    ObtainWriteLock(&adp->lock);
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
#ifdef	NINTERFACE
	    code = AFS_RemoveDir(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, &OutDirStatus);
	    AFSFetchStToViceSt(&OutDirStatus, &dstat);
#else
	    code = RViceRemoveDir(tc->id, &adp->fid.Fid, aname, &dstat);
#endif
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	if (tdc) afs_PutDCache(tdc);
	ReleaseWriteLock(&adp->lock);
	goto done;
    }
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &dstat, 1)) {
	    /* we can do it locally */
	    code = dir_Delete(&tdc->f.inode, aname);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- invalid value */
	}
	afs_PutDCache(tdc);	/* drop ref count */
    }
    ReleaseWriteLock(&adp->lock);
    /* don't worry about link count since dirs can not be hardlinked */
    code = 0;
done:
    return afs_CheckCode(code, &treq);
}

/* BlobScan is supposed to ensure that the blob reference refers to a valid
    directory entry.  It consults the allocation map in the page header
    to determine whether a blob is actually in use or not.

    More formally, BlobScan is supposed to return a new blob number which is just like
    the input parameter, only it is advanced over header or free blobs.
    
    Note that BlobScan switches pages if necessary.  BlobScan may return
    either 0 or an out-of-range blob number for end of file.
*/
static BlobScan(afile, ablob)
    long *afile;
    register long ablob; {
    register long relativeBlob;
    long pageBlob;
    register struct PageHeader *tpe;
    register long i;

    /* advance ablob over free and header blobs */
    while (1) {
	pageBlob = ablob & ~(EPP-1);	/* base blob in same page */
	tpe = (struct PageHeader *) dir_GetBlob(afile, pageBlob);
	if (!tpe) return 0;		    /* we've past the end */
	relativeBlob = ablob - pageBlob;    /* relative to page's first blob */
	/* first watch for headers */
	if (pageBlob ==	0) {		    /* first dir page has extra-big header */
	    /* first page */
	    if (relativeBlob < DHE+1) relativeBlob = DHE+1;
	}
	else {				    /* others have one header blob */
	    if (relativeBlob == 0) relativeBlob = 1;
	}
	/* make sure blob is allocated */
	for(i = relativeBlob; i < EPP; i++) {
	    if (tpe->freebitmap[i>>3] & (1<<(i&7))) break;
	}
	/* now relativeBlob is the page-relative first allocated blob,
	 or EPP (if there are none in this page). */
	DRelease(tpe, 0);
	if (i != EPP) return i+pageBlob;
	ablob =	pageBlob + EPP;	/* go around again */
    }
    /* never get here */
}

/*
 * Read directory entries.
 * There are some weird things to look out for here.  The uio_offset
 * field is either 0 or it is the offset returned from a previous
 * readdir.  It is an opaque value used by the server to find the
 * correct directory block to read.  The byte count must be at least
 * vtoblksz(vp) bytes.  The count field is the number of blocks to
 * read on the server.  This is advisory only, the server may return
 * only one block's worth of entries.  Entries may be compressed on
 * the server.
 *
 * This routine encodes knowledge of Vice dirs.
 */

afs_readdir(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {
    struct vrequest treq;
    register struct dcache *tdc;
    long us;
    register long code;
    long offset, len, nextBlob;
    struct DirEntry *tde;
    struct direct dirEntry;	    /* big, contains 255 byte entry name */
    long origOffset;
    /* opaque value is pointer into a vice dir; use bit map to decide
	if the entries are in use.  Always assumed to be valid.  0 is
	special, means start of a new dir.  Long inode, followed by
	short reclen and short namelen.  Namelen does not include
	the null byte.  Followed by null-terminated string.
    */
    afs_InitReq(&treq, acred);	    /* setup request structure */
    /* update the cache entry */
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    /* get a reference to the entire directory */
    tdc = afs_GetDCache(avc, 0, &treq, &offset, &len, 1);
    if (!tdc) {
	code = ENOENT;
	goto done;
    }
    ObtainReadLock(&avc->lock);
    origOffset = auio->afsio_offset;
    nextBlob = origOffset >> 5;		/* entry we want */
    /* scan for the next interesting entry */
    us = BlobScan(&tdc->f.inode, nextBlob);	/* scan for in-use blob */
    if (us == 0) {
	afs_PutDCache(tdc);
	ReleaseReadLock(&avc->lock);
	code = 0;
	goto done;
    }
    /* otherwise up point at this blob */
    tde = (struct DirEntry *) dir_GetBlob(&tdc->f.inode, us);
    if (tde == (struct DirEntry *) 0) {
	afs_PutDCache(tdc);
	ReleaseReadLock(&avc->lock);
	code = 0;
	goto done;
    }
    /* copy the name structure out to user space */
    /* volume << 16 + vnode is the d_fileno field */
    dirEntry.d_fileno = (avc->fid.Fid.Volume << 16) + ntohl(tde->fid.vnode);
    dirEntry.d_reclen = 512;
    dirEntry.d_namlen = strlen(tde->name);	/* don't count terminating null */
    strcpy(dirEntry.d_name, tde->name);
    code = uiomove(&dirEntry, sizeof(dirEntry), UIO_READ, auio);
    auio->afsio_resid -= (512 - sizeof(dirEntry));    /* pretend we read 512 bytes */
    auio->afsio_offset = (us + dir_NameBlobs(tde->name)) << 5;	/* skip entry */
    DRelease(tde, 0);
    afs_PutDCache(tdc);
    ReleaseReadLock(&avc->lock);
done:
    return afs_CheckCode(code, &treq);
}

afs_symlink(adp, aname, attrs, atargetName, acred)
    register struct vcache *adp;
    register char *atargetName;
    char *aname;
    struct vattr *attrs;
    struct ucred *acred; {
    struct vrequest treq;
    register long code;
    register struct conn *tc;
    struct VenusFid newFid;
    struct ViceStatus vstat, dstat;
    register struct dcache *tdc;
    long offset, len, alen, host;
    register struct vcache *tvc;
#ifdef	NINTERFACE
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutFidStatus, OutDirStatus;
#endif

    afs_dp("symlink dir %s in %x\n", aname, adp);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(adp, &treq);
    if (code) return afs_CheckCode(code, &treq);
    vstat.Date = osi_Time();
    vstat.Owner = 0;
    alen = strlen(atargetName);	    /* we want it to include the null */
    if (*atargetName == '#' || *atargetName == '%' || *atargetName == '$')
	vstat.Mode = 0644;	/* mt pt: null from "." at end */
    else {
	vstat.Mode = 0755;
	alen++;	    /* add in the null */
    }
    tdc = afs_GetDCache(adp, 0, &treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock);
    ObtainWriteLock(&afs_xvcache);  /* prevent others from creating this entry */
    do {
	tc = afs_Conn(&adp->fid, &treq);
	if (tc) {
	    host = tc->server->host;
#ifdef	NINTERFACE
	    ViceStToAFSStoreSt(&vstat, &InStatus);
	    code = AFS_Symlink(tc->id, (struct AFSFid *) &adp->fid.Fid, aname, atargetName, &InStatus, (struct AFSFid *) &newFid.Fid, &OutFidStatus, &OutDirStatus);
	    AFSFetchStToViceSt(&OutFidStatus, &vstat);
	    AFSFetchStToViceSt(&OutDirStatus, &dstat);
#else
	    code = RViceSymLink(tc->id, &adp->fid.Fid, aname, atargetName, &vstat, &newFid.Fid, &dstat);
#endif
	}
	else code = -1;
    } while(afs_Analyze(tc, code, &adp->fid, &treq));
    if (code) {
	ReleaseWriteLock(&adp->lock);
	ReleaseWriteLock(&afs_xvcache);
	if (tdc) afs_PutDCache(tdc);
	goto done;
    }
    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc) {
	/* we have the thing in the cache */
	if (afs_LocalHero(adp, tdc, &dstat, 1)) {
	    /* we can do it locally */
	    code = dir_Create(&tdc->f.inode, aname, &newFid.Fid);
	    if (code) tdc->f.versionNo = -1;	/* surprise error -- use invalid value */
	}
	afs_PutDCache(tdc);
    }
    newFid.Cell = adp->fid.Cell;
    newFid.Fid.Volume = adp->fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);

    /* now we're done with parent dir, create the link's entry.  Note that no one can get a pointer
	to the new cache entry until we release the xvcache lock. */
    tvc = afs_NewVCache(&newFid, host);
    tvc->cbExpires = 0x7fffffff;	/* never expires, they can't change */
    afs_ProcessVStat(tvc, &vstat);
    tvc->states |= CStatd;		/* have valid info */
    if (!tvc->linkData) {
	tvc->linkData = (char *) osi_Alloc(alen);
	strncpy(tvc->linkData, atargetName, alen-1);
	tvc->linkData[alen-1] = 0;
    }
    ReleaseWriteLock(&afs_xvcache);
    afs_PutVCache(tvc);
    code = 0;
done:
    return afs_CheckCode(code, &treq);
}

/* call under write-lock to read link into memory */
afs_HandleLink(avc, areq)
    register struct vcache *avc;
    struct vrequest *areq; {
    register struct dcache *tdc;
    register char *tp;
    struct osi_file *tfile;
    long offset, len, alen;
    register long code;

    /* two different formats, one for links protected 644, have a "." at the end
	of the file name, which we turn into a null.  Others, protected 755,
	we add a null to the end of */
    if (!avc->linkData) {
	tdc = afs_GetDCache(avc, 0, areq, &offset, &len, 0);
	if (!tdc) {
	    return EIO;
	}
	/* otherwise we have the data loaded, go for it */
	if (len > 1024) {
	    afs_PutDCache(tdc);
	    return EFAULT;
	}
	tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
	if (!tfile) {
	    afs_PutDCache(tdc);
	    return EIO;
	}
	if (avc->m.Mode	& 0111)	alen = len+1;	/* regular link */
	else alen = len;			/* mt point */
	tp = osi_Alloc(alen);	/* make room for terminating null */
	code = osi_Read(tfile, tp, len);
	tp[alen-1] = 0;
	osi_Close(tfile);
	afs_PutDCache(tdc);
	if (code != len) {
	    osi_Free(tp, alen);
	    return EIO;
	}
	avc->linkData = tp;
    }
    return 0;
}

afs_readlink(avc, auio, acred)
    register struct vcache *avc;
    struct uio *auio;
    struct ucred *acred; {
    register long code;
    struct vrequest treq;
    register char *tp;

    afs_dp("readlink %x\n", avc);
    afs_InitReq(&treq, acred);
    code = afs_VerifyVCache(avc, &treq);
    if (code) goto done;
    if (vType(avc) != VLNK) {
	code = EINVAL;
	goto done;
    }
    ObtainWriteLock(&avc->lock);
    code = afs_HandleLink(avc, &treq);
    /* finally uiomove it to user-land */
    if (code == 0) {
	tp = avc->linkData;
	if (tp) code = uiomove(tp, strlen(tp), UIO_READ, auio);
	else code = EIO;
    }
    ReleaseWriteLock(&avc->lock);
done:
    return afs_CheckCode(code, &treq);
}

afs_fsync(avc, acred)
    register struct vcache *avc;
    struct ucred *acred; {
    register struct dcache *tdc;
    register long code;
    struct vrequest treq;

    afs_dp("fsync file %x\n", avc);
    afs_InitReq(&treq, acred);
    ObtainSharedLock(&avc->lock);
    code = 0;
    if (avc->execsOrWriters > 0) {
	/* put the file back */
	tdc = afs_FindDCache(avc, 0);
	if (tdc) {
	    code = afs_StoreDCache(avc, tdc, &treq);
	    afs_PutDCache(tdc);
	}
	else afs_dp("warning, no file on fsync\n");
    }
    ReleaseSharedLock(&avc->lock);
    return afs_CheckCode(code, &treq);
}

afs_inactive(avc, acred)
    register struct vcache *avc;
    struct ucred *acred; {

    /* at this point, there are no references to the vnode, even in core page structures, so we can clear the flag indicating that someone might have an outstanding core page reference. */
    avc->states &= ~CDataMod;

    if (avc->states & CCore) {
	/* barf */
	avc->vrefCount++;
	avc->states &= ~CCore;
	afs_dp("doing delayed core store\n");
	afs_close(avc, -1, acred);  /* special flags field means core dump */
	avc->vrefCount--;
    }
    return 0;	    /* lru should do everything for us */
}

afs_bmap(avc, abn, anvp, anbn)
    register struct vcache *avc;
    long abn, *anbn;
    struct vcache **anvp; {
    if (anvp)
	*anvp = avc;
    if (anbn)
	*anbn =	abn * (8192 / DEV_BSIZE);   /* in 512 byte units */
    return 0;
}

/* basic strategy routine, assuming stuff mapped in. */
afs_ustrategy(abp)
    register struct buf *abp; {
    register long code;
    struct uio tuio;
    struct iovec tiovec[1];

    afs_dp("strategy %x\n", abp->b_vp);
    if ((abp->b_flags & B_READ) == B_READ) {
	/* read b_bcount bytes into kernel address b_un.b_addr starting
	    at byte DEV_BSIZE * b_blkno.  Bzero anything we can't read,
	    and finally call iodone(abp).  File is in abp->b_vp.  Credentials
	    are from u area??
	*/
	tuio.afsio_iov = tiovec;
	tuio.afsio_iovcnt = 1;
	tuio.afsio_offset = DEV_BSIZE * abp->b_blkno;
	tuio.afsio_seg = AFS_UIOSYS;
#ifdef AFS_UIOFMODE
	tuio.afsio_fmode = 0;
#endif
	tuio.afsio_resid = abp->b_bcount;
	tiovec[0].iov_base = abp->b_un.b_addr;
	tiovec[0].iov_len = abp->b_bcount;
	/* are user's credentials valid here?  probably, but this
	     sure seems like the wrong things to do. */
	code = afs_rdwr((struct vcache *) abp->b_vp, &tuio, UIO_READ, 0, u.u_cred);
	if (code == 0) {
	    if (tuio.afsio_resid > 0)
		bzero(abp->b_un.b_addr + abp->b_bcount - tuio.afsio_resid, tuio.afsio_resid);
	}
	iodone(abp);
    }
    else {
	/* we shouldn't get called for writing until mmap is implemented. */
	panic("afs_strategy write");
    }
    return code;
}

/* real strategy */
afs_strategy (abp)
    register struct buf *abp; {
    return osi_MapStrategy(afs_ustrategy, abp);
}

/* avc must be held, its parent must be unlocked.  Returns true if any of
    the requested access modes are allowed. */
afs_AccessOK (avc, arights, areq)
    register struct vcache *avc;
    struct vrequest *areq;
    long arights; {
    register struct vcache *tvc;
#ifdef	NINTERFACE
    struct AFSFetchStatus OutStatus;
    struct AFSCallBack CallBack;
#else
    BBS dummybs, OptionalData;
    struct BD BDesc;		/* Unused, prevent xdr coredumps */
    ViceFid bidfid;
#endif
    ViceStatus tstat;
    struct VenusFid aclfid;
    register long i, mask;
    long code;
    int canLockFid;
    struct conn *newconn;

    /*
    This routine checks that the access bits in a directory permit a particular access to
    work.  It makes use of the fact that GetVStat clears the access information before
    it stats a directory, and that GetVStat was recently called on the directory in question, either, if
    this is a file, in the next few lines, or, if this is a directory, when the first
    parameter to this function was computed.

    Note that avc's ref count is assumed to be bumped.  It should not be otherwise locked.  Its parent is
    assumed to unlocked.
    */

    /* This next used to use different mode bits for owner and others. */
    mask = (avc->m.Mode & 0700) >> 6;

    if (vType(avc) == VDIR) {
        tvc = avc;
        aclfid = avc->fid;
	mask = -1;	/* ignore mode bits for dirs */
	canLockFid = 0;	/* means aclfid is same as avc */
    }
    else {
	/* possible race: new cache entry between IWalk (setting parent field) and here.
	    In this case, avc->parentVnode is 0, and we don't know who our parent really
	    is.  This will get fixed when we fix the stat interface to return parent info.  For now,
	    venus doesn't block these operations; they'll fail later. */
	if (avc->parentVnode == 0) return 1;		/* fail operation later */
	aclfid.Cell = avc->fid.Cell;
        aclfid.Fid.Volume = avc->fid.Fid.Volume;
        aclfid.Fid.Vnode = avc->parentVnode;
        aclfid.Fid.Unique = avc->parentUnique;
        tvc = afs_GetVCache(&aclfid, areq);
	if (tvc == 0) return 0;
        mask = fileModeMap[mask];
	canLockFid = 1;	/* means new fid */
    }


    if ((mask & arights & tvc->anyAccess) == arights) {
	if (canLockFid) afs_PutVCache(tvc);
	return 1;
    }

    for(i=0;i<CPSIZE;i++) {
        if (areq->uid == tvc->randomUid[i]) {
	    if (canLockFid) afs_PutVCache(tvc);
	    return ((mask & arights & tvc->randomAccess[i]) == arights);
	}
    }

    /* If we make it here, we just don't know the answer. */
#ifndef	NINTERFACE
    bidfid.Volume = 0;
    dummybs.SeqLen = 0;
    dummybs.MaxSeqLen = 0;
    dummybs.SeqBody = (char *) 1;		/* prevent xdr from allocating space */
    OptionalData.MaxSeqLen = 0;
    OptionalData.SeqLen = 0;
    OptionalData.SeqBody = (char *) 1;	/* prevent xdr malloc */
#endif
    do {
        newconn = afs_Conn(&aclfid, areq);
#ifdef	NINTERFACE
	if (newconn){
	    code = AFS_FetchStatus(newconn->id, (struct AFSFid *) &aclfid.Fid, &OutStatus, &CallBack);
	    AFSFetchStToViceSt(&OutStatus, &tstat);	
	    AFSCallBackStToViceSt(&CallBack, &tstat);
	} else code = -1;

#else
        if (newconn) code = RViceFetchP(newconn->id, &aclfid.Fid, &bidfid,
	    FetchNoData, &BDesc, &OptionalData, &dummybs, &tstat);
        else code = -1;
#endif
    } while (afs_Analyze(newconn, code, &aclfid, areq));
    if (code != 0) {
	if (canLockFid) afs_PutVCache(tvc);
	return 0;
    }
    for(i=0; i<CPSIZE-1; i++) {
        tvc->randomUid[i] = tvc->randomUid[i+1];
        tvc->randomAccess[i] = tvc->randomAccess[i+1];
    }
    tvc->randomUid[CPSIZE-1] = areq->uid;
    tvc->randomAccess[CPSIZE-1] = tstat.MyAccess;
    if (canLockFid) afs_PutVCache(tvc);
    return ((mask & tstat.MyAccess & arights) == arights);
}

#ifdef	NINTERFACE

ViceStToAFSStoreSt(ViceStat, AFSStoreStat)
struct ViceStatus * ViceStat;
struct AFSStoreStatus * AFSStoreStat;
{
    AFSStoreStat->Mask = 0;
    if (ViceStat->Date != 0) {
	AFSStoreStat->ClientModTime = ViceStat->Date;
	AFSStoreStat->Mask |= AFS_SETMODTIME;
    }
    if (ViceStat->Owner != 0) {
	AFSStoreStat->Owner = ViceStat->Owner;
	AFSStoreStat->Mask |= AFS_SETOWNER;
    }
    if (ViceStat->Mode != 0) {
	AFSStoreStat->UnixModeBits = ViceStat->Mode;
	AFSStoreStat->Mask |= AFS_SETMODE;
    }
}
	

AFSFetchStToViceSt(AFSFetchStat, ViceStat)
struct AFSFetchStatus * AFSFetchStat;
struct ViceStatus * ViceStat;
{
    ViceStat->InterfaceVersion = AFSFetchStat->InterfaceVersion;
    ViceStat->VnodeType = AFSFetchStat->FileType;
    ViceStat->LinkCount = AFSFetchStat->LinkCount;
    ViceStat->Length = AFSFetchStat->Length;
    ViceStat->DataVersion = AFSFetchStat->DataVersion;
    ViceStat->Author = AFSFetchStat->Author;
    ViceStat->Owner = AFSFetchStat->Owner;
    ViceStat->MyAccess = AFSFetchStat->CallerAccess;
    ViceStat->AnyAccess = AFSFetchStat->AnonymousAccess;
    ViceStat->Mode = AFSFetchStat->UnixModeBits;
    ViceStat->Date = AFSFetchStat->ClientModTime;
}

AFSCallBackStToViceSt(CallBack, ViceStat)
struct AFSCallBack * CallBack;
struct ViceStatus * ViceStat;
{
    ViceStat->CallBackTime = CallBack->ExpirationTime;
}

#endif	NINTERFACE
