#	START NEW ARIX SCCS HEADER
#
#	@(#) cas.s: version 25.1 created on 11/27/91 at 15:01:47
#
#	Copyright (c) 1990 by Arix Corporation
#	All Rights Reserved
#
#	ident	"@(#)cas.s	25.1	11/27/91 Copyright (c) 1990 by Arix Corporation"
#
#	END NEW ARIX SCCS HEADER
#
	ident	"@(#)cas.s	25.1	11/27/91 Copyright (c) 1990 by Arix Corporation"
# cas.s -- spin locks and atomic operations

ident	"@(#)uts/ml/M68040:cas.s	23.2"

	global	spin_lock, spin_unlock
	global	exit_short_cr_no_spl, restore_spin_lock
	global	atom_and, atom_or, atom_set, atom_and_or, atom_ref_clear_valid
	global	atom_add, atom_sub, atom_inc, atom_dec, atom_inc_nz
	global	atom_or_byte, atom_and_byte
	global	atom_or_short, atom_and_short
	global	atom_tset_byte
	global	add_chain, remove_chain, remove_end
	global	cas_long
	global	dizzy_lock, dizzy_unlock

# S P I N   L O C K S
#
# spin_lock(&lock)
#	critical code goes here
# spin_unlock(&lock)
#

# DEBUG JPC: change cas to do a long word compare to check 040 problem
#		add link/unlk for fun
spin_lock:
	mov.l	4(%sp), %a0		# get spin_lock pointer in a0
	link	%fp,&0
	mov.l	%d2, -(%sp)		# save d2
	clr.l	%d1
	mov.b	own_lock_id, %d1	# get lock id
	clr.l	%d2
	mov.w	%sr, %d2		# save psw
	bfclr	%d2{&28:&4}		# we don't care about the bottom 4 bits
	or.l	%d1, %d2		# of the psw, so set them to lock id
	mov.l	%d2, %d0		# set up update value
	lsl.l	&8, %d0
	or.l	%d1, %d0
	lsl.l	&8, %d0
	or.w	&0x0700, %sr		# spl7
	mov.l	(%a0),%d1		# compare value
	bfclr	%d1{&0:&16}
	cas.l	%d1, %d0, (%a0)
	bne.b	L%enter_fail_init_cnt
#
#	We now have the lock and can stash any debug stuff we want
#
#	mov.b	own_lock_id, 2(%a0)
	mov.l	8(%sp), 4(%a0)		# save the pc of whoever locked it
	add.l	&1, 8(%a0)		# inc non-collision count 
	add.l	&1, own_spin_lock_cnt
	mov.l	%d0, 20(%a0)		# save original lock value
	mov.l	(%sp)+, %d2		# restore d2
	unlk	%fp
	rts

L%enter_fail_init_cnt:
	mov.l	&0, %a1			# for counting how long we wait

L%enter_fail:

#
#	SINGLE PROCESSOR VERSION SHOULD NEVER FAIL HERE
#
#	jmp 	1
#
#	END OF SINGLE PROCESSOR

	mov.w	%d2, %sr
L%enter_loop:
	add.l	&1, %a1			# increment wait count
	tst.w	(%a0)
	bne.b	L%enter_loop

L%spin_lock_try_again:
	or.w	&0x0700, %sr		# spl7
	mov.l	(%a0),%d1		# compare value
	bfclr	%d1{&0:&16}
	cas.l	%d1, %d0, (%a0)
	bne.b	L%enter_fail
#
#	We now have the lock and can stash any debug stuff we want
#
##	mov.b	own_lock_id, 2(%a0)
	mov.l	8(%sp), 4(%a0)		# save the pc of whoever locked it
	add.l	&1, 12(%a0)		# inc collision count
	add.l	&1, own_spin_lock_cnt
	mov.l	%a1, %d1
	add.l	%d1, 16(%a0)		# inc wait count
	mov.l	%d0, 20(%a0)		# save original lock value
	mov.l	(%sp)+, %d2		# restore d2
	unlk	%fp
	rts

#############################

# spin_unlock has been modified to do an rte, instead an rts.
# this is to force a pending interrupt to be taken after
# the rte (thus in the calling routine), instead of in the
# middle of spin_unlock.  this allows the kernel profiler somewhat 
# clearer view.

spin_unlock:
#	mov.l	4(%sp), %a0
#	mov.w	(%a0), %d0
#	beq.b	spin_error		# this lock was already cleared?
#	clr.w	(%a0)
#	mov.w	%d0, %sr
#	rts
	# coming in
	#
	#sp-->	pc high word
	#	pc low word
	#	addr high of spinlock
	#	addr low of spinlock
	#
	mov.l	4(%sp), %a0		# grab spinlock addr
	mov.l	(%sp)+, %a1		# pop pc in %a1
	clr.w	-(%sp)			# clear format/vector info
	mov.l	%a1, -(%sp)		# push pc back on stack
	mov.w	(%a0), -(%sp)		# copy old sr into position
	beq.b	L%spu_e1
	link	%fp, &0
	mov.l	%d2, -(%sp)		# push %d2
	clr.l	%d1
	mov.b	own_lock_id, %d1	# set up replace value
	lsl.l	&8, %d1			# shift it into position
	mov.l	(%a0), %d0
	cmp.w	%d0, %d1		# make sure we own the lock
	bne.b	L%spu_e3
	mov.l	%d0, %d2		# save original %d0 in %d2
	cas.l	%d0, %d1, (%a0)		# clear it. 
					# No test for success. It better have.
	bne.b	L%spu_e2 # hanna DEBUG 

	#sp->	old sr
	#	pc high word
	#	pc low word
	#	0
	#	addr high of spinlock
	#	addr low of spinlock
	sub.l	&1, own_spin_lock_cnt
	mov.l	(%sp)+, %d2		# pop %d2
	unlk	%fp
	rte

#############################

L%spu_e1:
	mov.l	&L%spu_str1, -(%sp)
	jsr	printf
	add.l	&4, %sp
	bra.b	spin_error
L%spu_e3:
	mov.l	%d1, -(%sp)
	mov.l	%d0, -(%sp)
	mov.l	&L%spu_str3, -(%sp)
	jsr	printf
	add.l	&12, %sp
	bra.b	spin_error

spin_error:
	mov.l	%a0, -(%sp)
	mov.l	&L%spu_err, -(%sp)
	jsr	printf
	add.l	&8, %sp
	jsr	backtrace #hanna DEBUG
	jsr	stop_all_processors # hanna DEBUG
	trap	&15			# trap into tdb

L%spu_e2:
#	nop
#	nop
#	mov.l	(%a0), %d1	# re-read the spin_lock (goes into trace)
#	mov.l	0xff780000,%d1
#	bset	&15,%d1		# set bit of private control reg (trigger trace)
#	mov.l	%d1,0xff780000
#	mov.l	(%a0), -(%sp)	# re-read the spin_lock (for printf)
	mov.l	%d2, -(%sp)
	mov.l	%d1, -(%sp)
	mov.l	%d0, -(%sp)
	mov.l	&L%spu_str2, -(%sp)
	jsr	printf
	add.l	&16, %sp
	bra.b	spin_error

# exit_short_cr_no_spl(short_cr_sem)
#	release a spin lock without restoring the IPL used by sleep routines
#
exit_short_cr_no_spl:
	sub.l	&1, own_spin_lock_cnt
	mov.l	4(%sp), %a0
	link	%fp, &0
	mov.l	%d2, -(%sp)		# push %d2
	mov.l	(%a0), %d0		# set up the compare value
	mov.l	%d0, %d1
	mov.l	%d0, %d2
	bfclr	%d1{&0:&16}		# set up the clear value.
	cas.l	%d0, %d1, (%a0)		# clear it. 
	bne.b	L%spu_e2		# should never fail
	mov.l	(%sp)+, %d2		# pop %d2
	unlk	%fp
	rts

# restore_spin_lock(short_cr_sem, saved_psw)
#	restore the psw of our spin lock after awakening
#
restore_spin_lock:
	mov.l	4(%sp), %a0		# get spin_lock pointer
	mov.l	8(%sp), %d1		# get new psw value
	link	%fp, &0
	mov.l	%d2, -(%sp)		# push %d2
	swap	%d1			# move psw to high word of %d1
	mov.l	%d1, %d2		# save in %d2
	mov.l	(%a0), %d0		# set up the compare value
	mov.l	%d0, %d1
	bfclr	%d1{&0:&16}		# set up the clear value.
	or.l	%d2, %d1
	mov.l	%d0, %d2
	cas.l	%d0, %d1, (%a0)		# restore it.
	bne.b	L%spu_e2		# should never fail
	mov.l	%d1, 20(%a0)		# save lock value
	mov.l	(%sp)+, %d2		# pop %d2
	unlk	%fp
	rts

#------------------------------------------------------------------------------
#	dizzy_lock( &lock )
#
dizzy_lock:
	mov.l	4(%sp), %a0		# addr of lock
	clr.l	%d0
	mov.w	%sr, %d0
	mov.l	%d0, %a1		# save sr in %a1
	or.w	&0x0700, %sr		# spl7

	# add lock req to list of requesters
	mov.l	(%a0), %d0		# previous head of list. compare value
	mov.l	own_lock_req_ptr, %d1	# addr of our lock array element.

	mov.l	&1, 12(%d1.l)		# set spin element to 1
dizzy_lock_cas_again:
	mov.l	%d0, (%d1.l)		# update our lock req next_lock_req.
	cas.l	%d0, %d1, (%a0)		# try to grab lock
	bne.b	dizzy_lock_cas_again

	tst.l	%d0			# was the lock previously free?
	bne.b	dizzy_lock_wait		# if no, branch

	clr.l	12(%d1.l)		# clear spin element
	mov.b	own_lock_id, 11(%a0)	# save our id in lock
	addq.l	&1, 16(%a0)		# inc non-collision count in lock
dizzy_lock_got_it:
	mov.l	%a1, 4(%a0)		# save psw in lock
	mov.l	(%sp), 12(%a0)		# save our PC in lock for diagnostics
	addq.l	&1, own_spin_lock_cnt
	rts

dizzy_lock_wait:
	clr.l	%d0			# wait counter
dizzy_lock_spin:
	addq.l	&1, %d0
	tst.l	12(%d1.l)		# test spin element
	bne.b	dizzy_lock_spin

ifdef(`ASDEBUG',
`	tst.l	(%a0)			# ASSERT lock is still locked
	beq	spin_error')
ifdef(`ASDEBUG',
`	mov.b	own_lock_id, %d1
	cmp.b	%d1, 11(%a0)		# ASSERT lock_owner == own_lock_id
	bne	spin_error')

	addq.l	&1, 20(%a0)		# inc fail count in lock
	add.l	%d0, 24(%a0)		# add to wait count in lock
	bra.b	dizzy_lock_got_it

#------------------------------------------------------------------------------
#	dizzy_unlock( &lock )
#
# Try the case where no one is waiting for the lock first. If we transition
# from locked with no one waiting to free we are done. Else call dizzy_unlock_c
#
# The stack has been modified to do an rte, instead of an rts. This is to force
# a pending interrupt to be taken after the rte (thus in the calling routine),
# instead of in the middle of dizzy_unlock.  This allows the kernel profiler
# somewhat clearer view.

dizzy_unlock:
	mov.l	4(%sp), %a0		# addr of lock

ifdef(`ASDEBUG',
`	mov.b	own_lock_id, %d0
	cmp.b	%d0, 11(%a0)		# ASSERT lock_owner == own_lock_id
	bne	spin_error')

	sub.l	&1, own_spin_lock_cnt
ifdef(`ASDEBUG',
	`bcs	spin_error		# ASSERT own_spin_lock_cnt was > 0 ')

	mov.l	4(%a0), %a1		# save psw stored in lock
	mov.l	(%a0), %d0		# compare value.
ifdef(`ASDEBUG',
	`beq	spin_error		# ASSERT is lock locked ')

	# coming in				going out
	#					sp-->	old psw
	#						pc high word
	#sp-->	pc high word				pc low word	
	#	pc low word				0
	#	addr high of lock			addr high of lock
	#	addr low of lock			addr low of lock
	#
	mov.l	(%sp)+, %d1		# pop pc into %d1
	clr.w	-(%sp)			# clear format/vector info
	mov.l	%d1, -(%sp)		# push pc back on stack
	mov.w	%a1, -(%sp)		# push psw on stack

	cmp.l	%d0, own_lock_req_ptr	# anyone (besides us) on list, waiting?
	bne.b	dizzy_unlock_the_hard_way	# if yes, branch
	clr.l	%d1			# update value
	cas.l	%d0, %d1, (%a0)		# try to free lock w/ no waiters
	bne.b	dizzy_unlock_the_hard_way	# if failure, branch
	rte

dizzy_unlock_the_hard_way:
	mov.l	%a0, -(%sp)		# push addr of lock arg
	jsr	dizzy_unlock_c
	addq.l	&4, %sp
	rte

#
# A T O M I C    A R I T H M E T I C    O P E R A T I O N S
#
# ulong
# atom_and(&ulong, and_val)	and-s and_val into ulong using a cas, returns d0
#

atom_and:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	8(%sp), %a1		# load and_val into a1
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_and_loop:
	mov.l	%a1, %d1
	and.l	%d0, %d1		# d1 = d0 & a1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_and_loop
	rts

# ulong
# atom_or(&ulong, or_val)	or-s or_val into ulong using a cas, returns prev
#
atom_or:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	8(%sp), %a1		# load or_val into a1
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_or_loop:
	mov.l	%a1, %d1
	or.l	%d0, %d1		# d1 = d0 | a1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_or_loop
	rts

# ulong
# atom_add(&ulong, add_val)	adds add_val to ulong using a cas, returns prev
#

atom_add:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	8(%sp), %a1		# load add_val into a1
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_add_loop:
	mov.l	%a1, %d1
	add.l	%d0, %d1		# d1 = d0 + a1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_add_loop
	rts

# ulong
# atom_sub(&ulong, sub_val)	subtracts sub_val from ulong, returns prev value
#

atom_sub:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	8(%sp), %a1		# load sub_val into a1
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_sub_loop:
	mov.l	%d0, %d1
	sub.l	%a1, %d1		# d1 = d0 - a1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_sub_loop
	rts

# ulong
# atom_inc(&ulong)	adds 1 to ulong using a cas, returns prev
#

atom_inc:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_inc_loop:
	mov.l	%d0, %d1
	addq.l	&1, %d1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_inc_loop
	rts


# ulong
# atom_dec(&ulong)	subtracts 1 from ulong, returns prev value
#

atom_dec:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_dec_loop:
	mov.l	%d0, %d1
	sub.l	&1, %d1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_dec_loop
	rts

# ulong
# atom_inc_nz(&long)	like atom_inc, but only increments if ulong
#			is non_zero, and returns the new value instead of old

atom_inc_nz:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	(%a0), %d1		# pre-load ulong's value into d0
L%atom_inc_nz_loop:
	mov.l	%d1, %d0
	beq.b   L%atom_inc_nz_exit
	addq.l	&1, %d0
	cas.l	%d1, %d0, (%a0)
	bne.b	L%atom_inc_nz_loop
L%atom_inc_nz_exit:
	rts

# unchar
# unchar
# atom_or_byte(addr, or_val)
# unchar *addr;
# unchar or_val;
# {
#	unchar retval = *addr;
#	*addr |= or_val;
#	return(retval);
# }
#
# atom_or_byte -- or in bits and return prev value
# atom_and_byte -- same as atom_or_byte except and in bits


atom_or_byte:
	mov.l	4(%sp), %a0		# get address of byte
	mov.l	8(%sp), %a1		# get OR bit mask
	clr.l	%d0			# retval = 0
	mov.b	(%a0), %d0		# get current value of byte
L%atom_or_byte_loop:
	mov.l	%a1, %d1		# get OR bit mask
	or.b	%d0, %d1		# or current value with mask
	cas.b	%d0, %d1, (%a0)		# try to make a change
	bne.b	L%atom_or_byte_loop	# if it changed on us, try again
	rts				# d0 contains prev value

atom_and_byte:
	mov.l	4(%sp), %a0		# get address of byte
	mov.l	8(%sp), %a1		# get AND bit mask
	clr.l	%d0			# retval = 0
	mov.b	(%a0), %d0		# get current value of byte
L%atom_and_byte_loop:
	mov.l	%a1, %d1		# get AND bit mask
	and.b	%d0, %d1		# and current value with mask
	cas.b	%d0, %d1, (%a0)		# try to make a change
	bne.b	L%atom_and_byte_loop	# if it changed on us, try again
	rts				# d0 contains prev value

# ushort
# atom_or_short(addr, or_val)
# unshort *addr;
# unshort or_val;
# {
#	unshort retval = *addr;
#	*addr |= or_val;
#	return(retval);
# }
#
# atom_or_short -- or in bits and return prev value
# atom_and_short -- same as atom_or_byte except and in bits

atom_or_short:
	mov.l	4(%sp), %a0		# get address of byte
	mov.l	8(%sp), %a1		# get OR bit mask
	clr.l	%d0			# retval = 0
	mov.w	(%a0), %d0		# get current value of byte
L%atom_or_short_loop:
	mov.l	%a1, %d1		# get OR bit mask
	or.w	%d0, %d1		# or current value with mask
	cas.w	%d0, %d1, (%a0)		# try to make a change
	bne.b	L%atom_or_short_loop	# if it changed on us, try again
	rts				# d0 contains prev value

atom_and_short:
	mov.l	4(%sp), %a0		# get address of byte
	mov.l	8(%sp), %a1		# get AND bit mask
	clr.l	%d0			# retval = 0
	mov.w	(%a0), %d0		# get current value of byte
L%atom_and_short_loop:
	mov.l	%a1, %d1		# get AND bit mask
	and.w	%d0, %d1		# and current value with mask
	cas.w	%d0, %d1, (%a0)		# try to make a change
	bne.b	L%atom_and_short_loop	# if it changed on us, try again
	rts				# d0 contains prev value
# ulong
# atom_set(&ulong, val)		sets ulong to val using a cas, returns prev val
#

atom_set:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	8(%sp), %d1		# load val into a1
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_set_loop:
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_set_loop
	rts

# ulong
# atom_and_or(&ulong, and_val, or_val)	and-s and or-s ulong atomically
#

atom_and_or:
	mov.l	4(%sp), %a0		# load address of ulong into a0
	mov.l	8(%sp), %a1		# load and_val into a1
	mov.l	(%a0), %d0		# pre-load ulong's value into d0
L%atom_and_or_loop:
	mov.l	%a1, %d1
	and.l	%d0, %d1		# d1 = d0 & a1
	or.l	12(%sp), %d1		# d1 |= or_val
	cas.l	%d0, %d1, (%a0)
	bne.b	L%atom_and_or_loop
	rts

# pde_t
# atom_ref_clear_valid(&pde)	clears the valid bit in pde if the referenced
#				bit is not set, returns zero for success
#				For the cas operation to succeed, the valid
#				bit must have been set, and the referenced
#				bit must have been cleared.
#

atom_ref_clear_valid:
	mov.l	4(%sp), %a0		# load address of pde into a0
	mov.l	(%a0), %d0		# pre-load pde's value into d0
	bclr	&3, %d0			# referenced bit must be clear
	bset	&0, %d0			# valid bit must be set
L%atom_ref_clva_loop:
	mov.l	%d0, %d1
	bclr	&0, %d1			# valid bit will be cleared
	cas.l	%d0, %d1, (%a0)
	beq.b	ret0			# if succeeded, return zero
	bclr	&3, %d0			# referenced bit must be clear
	bne.b	L%atom_ref_clva_ret	# was the referenced bit clear?
	bset	&0, %d0			# valid bit must be set
	bne.b	L%atom_ref_clva_loop	# was the valid bit set?
L%atom_ref_clva_ret:
	mov.l	&1, %d0			# no, return non-zero for failure
	rts
ret0:
	clr.l	%d0
	rts

#
# UPKERN support
#
#	typedef struct {
#		ushort	up_cnt;		/* reference count of lock */
#		ushort	up_pm_id;	/* pm_id of processor that has lock */
#	} upkern_t;
# 
# Note that the upkern is dealt with as a single 32 bit quantity by these
# routines, as well as two 16 bit quantities.

global	upkern_inc, upkern_dec, upkern_try_inc, upkern_try_lock
global	upkern_waiting_inc

#
# upkern_inc()	atomically increment upkern.up_cnt, making it stick to a pm.
#
#		up_cnt is upkern.up_cnt
#		up_pm_id is upkern.up_pm_id
#		old_up_cnt is %d0
#		new_up_cnt is %d1

upkern_inc:
	mov.l	&upkern, %a0	# address of upkern data struct
	mov.w	(%a0), %d0	# old_up_cnt = up_cnt
L%upk_inc_loop:
	mov.w	%d0, %d1	# new_up_cnt = old_up_cnt
	addq.w	&1, %d1		# increment copy of up_cnt
	cas.w	%d0, %d1, (%a0) # up_cnt == old_up_cnt ? up_cnt = new_up_cnt :
				#			 old_up_cnt = up_cnt
	bne.b	L%upk_inc_loop	# if we failed %d0 was updated to current value
	clr.l	%d0		# we succeded
	mov.w	2(%a0), %d0	# return up_pm_id
	rts

# upkern_waiting_inc
#
#	like upkern_inc, but also sets upkern_waiting_bit

upkern_waiting_inc:
	mov.l	&upkern, %a0	# address of upkern data struct
	mov.w	(%a0), %d0	# old_up_cnt = up_cnt
L%upkw_inc_loop:
	mov.w	%d0, %d1	# new_up_cnt = old_up_cnt
	addq.w	&1, %d1		# increment copy of up_cnt
				# and or in the upkern_waiting_bit
	ori.w	&up_wait_bit_short, %d1
	cas.w	%d0, %d1, (%a0) # up_cnt == old_up_cnt ? up_cnt = new_up_cnt :
				#			 old_up_cnt = up_cnt
	bne.b	L%upkw_inc_loop	# if we failed %d0 was updated to current value
	clr.l	%d0		# we succeded
	mov.w	2(%a0), %d0	# return up_slot
	rts


# upkern_dec:
#
#	* denotes atomic operations
#	
#		get old value of upkern
#	start:
#		if (wanted set && cnt == 2)
#			* clear wanted and decrement cnt
#			* set slot to next_upkern_pm_id
#			if (failed)
#				goto start;
#			else
#				upkern_waiting_handler();
#		else
#			* normal dec
#				if failed
#					goto start;

upkern_dec:
	mov.l	&upkern, %a0		# address of upkern data struct
	mov.l	(%a0), %d0		# assign old_upkern
L%upk_dec_start:
	cmp.l	%d0, own_upkern_dec_val	# up_cnt == 2, waiting_set, slot == me
	bne.b	L%upk_dec_loop		# no, try a normal decrement

	mov.l	next_upkern_pm_id, %d1	# yes, get new slot
	add.l	&up_cnt_long, %d1	# set up_cnt = 1, clear waiting
	cas.l	%d0, %d1, (%a0)
	bne.b	L%upk_dec_start		# something changed, start over
	jsr	upkern_waiting_handler	# success, upkern has been moved
	rts

L%upk_dec_loop:
	mov.l	%d0, %d1		# new_upkern = old_upkern 
	sub.l	&up_cnt_long, %d1	# new.up_cnt--
	cas.l	%d0, %d1, (%a0)
	bne.b	L%upk_dec_start		# something changed, start over.
	rts


# uint
# upkern_try_inc()  	return 0 if unable to acquire upkern, return 1
#		 	if either we already have it, or can and have
#			acquired it.
#
#			up_cnt is upkern.up_cnt
#			up_pm_id is upkern.up_pm_id


upkern_try_inc:
upkern_try_lock:

	mov.l	&upkern, %a0		# address of upkern data struct

L%upkern_try_inc_loop:			# check first if upkern is free
	mov.l	o_upkern_init_val, %d1	# compare value to acquire free upkern
	clr.l	%d0
	mov.w	2(%a0), %d0		# get up_pm_id, but not up_cnt
	cas.l	%d0, %d1, (%a0)		# up_cnt == 0, upkern = upkern_init_val
	bne.b	L%upkern_check_my_pm_id
	mov.l	&1, %d0			# suceeded
	rts

L%upkern_check_my_pm_id:		# upkern is busy, but maybe we own it
	cmp.w	%d0, %d1		# check for our pm_id
	bne.b	L%upkern_try_inc_failed	# another pm owns it, return a failure
	mov.l	%d0, %d1		# otherwise we own the upkern.
	add.l	&up_cnt_long, %d1	# attempt to atomically inc up_cnt.
	cas.l	%d0, %d1, (%a0)
	bne.b	L%upkern_try_inc_loop	# failed, we must have been upkern_inc'd
					# or upkern_dec'd by another pm.
					# start over again.
	mov.l	&1, %d0			# succeeded
	rts

L%upkern_try_inc_failed:
	clr.l	%d0			# failed
	rts


# atom_tset(&target, val)		test-and-set target to val using a cas
#	int r;
#	START ATOMIC SEQUENCE
#	r = *target;
#	if (r == 0)
#		*target = val;
#	END ATOMIC SEQUENCE
#	return(r);
#

atom_tset_byte:
	mov.l	4(%sp), %a0		# load &target into a0
	mov.l	8(%sp), %d1		# load val into d1
	clr.l	%d0			# set d0 to 0
	cas.b	%d0, %d1, (%a0)
	rts

#Ravi: we are not using the following two routines.  In future if 
#there is any need, take out the comment.
#atom_tset:
#	mov.l	4(%sp), %a0		# load &target into a0
#	mov.l	8(%sp), %d1		# load val into d1
#	clr.l	%d0			# set d0 to 0
#	cas.l	%d0, %d1, (%a0)
#	rts
#
#atom_tset_short:
#	mov.l	4(%sp), %a0		# load &target into a0
#	mov.l	8(%sp), %d1		# load val into d1
#	clr.l	%d0			# set d0 to 0
#	cas.w	%d0, %d1, (%a0)
#	rts

#-------------------------------------------------------------------------------
# add_chain( &headptr, &newelement, &newelement_link_ptr )

add_chain:
	mov.l	4(%sp),%a1	# address of head pointer
	mov.l	8(%sp),%d1	# address of new element
	mov.l	12(%sp),%a0	# address of new element forward pointer
	mov.l	(%a1),%d0	# head for comparison
L%add_loop:
	mov.l	%d0,(%a0)	# setup new element forward pointer
	cas.l	%d0,%d1,(%a1)	# change head pointer to new element
	bne.b	L%add_loop	# failed to add new element
	rts

#-------------------------------------------------------------------------------
# remove_chain( &headptr )

remove_chain:
	mov.l	4(%sp),%a0
	clr.l	%d1		# new head_pointer will be 0. ie remove chain
	mov.l	(%a0),%d0	# is first element on chain null?
	beq.b	L%remove_done	# yes. (return null)
L%remove_loop:
	cas.l	%d0,%d1,(%a0)
	bne.b	L%remove_loop	# cas not done. try again
L%remove_done:
	rts			# return first element on chain

#-------------------------------------------------------------------------------
# remove_end( &ptr to last element, ptr to last element )

remove_end:
	mov.l	4(%sp),%a0	# address of pointer to last element
	mov.l	8(%sp),%d0	# pointer to last element
	mov.l	&0,%d1		# new tail pointer
	cas.l	%d0,%d1,(%a0)
	beq.b	remove_end_ok
	mov.l	&0,%d0		# cas failed. return failure
remove_end_ok:
	rts

# cas_long(address, old_val, new_val)
#
#	/* Start atomic sequence */
#	if (*address != old_val)
#		return(0);
#
#	*address = new_val
#	/* End atomic sequence */
#	return(1);

cas_long:
	mov.l	4(%sp), %a0
	mov.l	8(%sp), %d0
	mov.l	12(%sp), %d1
	cas.l	%d0, %d1, (%a0)
	bne.b	L%cas_long_fail
	mov.l	&1, %d0
	bra.b	L%cas_long_out
L%cas_long_fail:
	mov.l	&0, %d0
L%cas_long_out:
	rts

#------------------------------------------------------------------------
#		    4(%sp),   8(%sp),   12(%sp),  16(%sp)
# atom_set_err_lock(lock_loc, lock_val, test_val, num_tries)
#  char * lock_loc;
#  char lock_val, test_val, old_val;
#  int num_tries;
# 	while( (*lock_loc != test_val) && (numtries--));
#	old_val = *lock_loc;
#	*lock_loc = lock_val;
#	return(old_val);
#       
# Used specifically by level_seven() and buserr() routines to set a
# coordinating lock for reporting and clearing unrecoverable errors.
# kind of a spin lock with a time limit.
# We're going down anyway, speed is not important.
#------------------------------------------------------------------------
	global	atom_set_err_lock
atom_set_err_lock:
	mov.l	8(%sp), %d1		# lock_val
asel_loop:
	mov.l	12(%sp), %d0		# test_val
	cas.b	%d0, %d1, 4(%sp)
	cmp.l	%d0, 12(%sp)		# success?
	beq.b	asel_out
	sub.l	&1, 16(%sp)		# num_tries--
	tst.l	16(%sp)			# num_tries == 0?
	bne.b	asel_loop
asel_out:
	rts				# current *lock_loc in %d0

#------------------------------------------------------------------------
# Error strings for spin_unlock
#------------------------------------------------------------------------

L%spu_str1:		# "\nLock already free!"
	byte	10,'L,'o,'c,'k,' ,'a,'l,'r,'e,'a,'d,'y,' ,'f,'r,'e,'e,'!,0

L%spu_str2:		# "\nUnlock cas failed! d0=%x d1=%x d2=%x"
	byte	10,'U,'n,'l,'o,'c,'k,' ,'c,'a,'s,' ,'f,'a,'i,'l,'e,'d,'!
	byte	' ,'d,'0,'=,'%,'x,' ,'d,'1,'=,'%,'x,' ,'d,'2,'=,'%,'x,0

L%spu_str3:		# "\nLock stolen! d0=%x d1=%x"
	byte	10,'L,'o,'c,'k,' ,'s,'t,'o,'l,'e,'n,'!
	byte	' ,'d,'0,'=,'%,'x,' ,'d,'1,'=,'%,'x,0

L%spu_err:		# " -- Fatal error in spin_unlock(%x)\n";
	byte	' ,'-,'-,' ,'F,'a,'t,'a,'l,' ,'e,'r,'r,'o,'r,' ,'i,'n
	byte	' ,'s,'p,'i,'n,'_,'u,'n,'l,'o,'c,'k,'(,'%,'x,'),10,0
