# include "DEFS.h"

/************************************************************************/
/*									*/
/*  Single precision square root function				*/
/*  Copyright 1983 by							*/
/*									*/
/*  U  S   S o f t w a r e    C o r p o r a t i o n			*/
/*									*/
/*									*/
/************************************************************************/

/*  Take square root of the single precision floating point value */
/*  on the top of the stack. */

/*  Use the Newton iteration technique to compute the square root. */

/*      X(n+1) = (X(n) + Z/X(n)) / 2 */

/*  The two*s exponent is scaled to restrict the solution domain to 1.0 */
/*  through 4.0.  A linear approximation to the square root is used to */
/*  produce a first guess with greater than 4 bits of accuracy.  Three */
/*  successive iterations are performed in registers to obtain accuracy */
/*  of about 30 bits.  The final iteration is performed in the floating */
/*  point domain. */

FBIAS = 127

	.globl	asqrt
.text

asqrt:				/* Local sqrt for asin() */
	link	fp,#-40
	moveml	#0x3cfc,sp@	/* Save registers */
	jbr	FPSQ5

ENTRY(r_sqrt)
	link	fp,#-40
	moveml	#0x3cfc,sp@	/* Save registers */
	jbsr	FPFADJ

	bvs	FFNANR		/* J/ NaN arg -> NaN result */
	bmi	FFNANR		/* J/ neg arg -> NaN result */
	bcs	FFPINR		/* J/ +INF arg -> +INF result */
	beq	FFZERR		/* J/ 0.0 arg -> 0.0 result */

FPSQ5:
	swap	d0
	movw	d0,d1		/* Get S/E/M word */

	subw	#128*FBIAS,d1	/* Extract argument's two's exp */
	clrb	d1		/* Make it a factor of two */
	subw	d1,d0		/* Scale arg. range to 4.0 > arg' >= 1.0 */
	swap	d0
	asrw	#8,d1		/* Square root of scaled two power */
	movb	d1,d5		/* Save two's exp of result */

	movl	d0,d1		/* Create fixed point integer for approx */
	lsll	#8,d1		/* Produce arg' * 2^30 in d1 */
	bset	#31,d1		/* Set implicit bit */
	beq	FPSQ10		/* J/ arg' >= 2.0 */

	lsrl	#1,d1		/* Adjust D1 */

FPSQ10:	movw	#42720-65536,d2	/* D2 = 0.325926 * 2^17 */
	swap	d1		/* D1.W = arg' * 2^14 */
	mulu	d1,d2		/* D2 = arg' * 0.325926 * 2^31 */
	swap	d2
	addw	#23616,d2	/* + 0.7207 * 2^15 - to 4+ bits */
	subxw	d3,d3
	orw	d3,d2		/* Top out approximation at 1.99997 */

	swap	d1
	lsrl	#1,d1		/* Arg' * 2^29 in d1 (prevent overflow) */

	movl	d1,d3		/* Copy into D3 */
	divu	d2,d3		/* Arg'/X0 * 2^14 in d3 */
	lsrw	#1,d2
	addw	d3,d2		/* X1 in D2 - to 8 bits */

	movl	d1,d3		/* Second in-register iteration */
	divu	d2,d3
	lsrw	#1,d2
	addw	d3,d2		/* X2 in D2 - to 16 bits */

	movl	d1,d3
	divu	d2,d3
	movw	d3,d4
	clrw	d3
	swap	d4
	divu	d2,d3
	movw	d3,d4		/* 32 bit division result */
	swap	d2
	clrw	d2
	lsrl	#1,d2
	addl	d4,d2		/* X3 in D2 - to 24+ bits */

	moveq	#0x7F,d1
	addl	d1,d2		/* Round value in D2 to 24 bits */
	roll	#1,d2		/* Strip implicit bit */
	addb	d5,d1		/* Rebias exponent */
	movb	d1,d2		/* Put exponent in d2 */
	rorl	#8,d2		/* Position FP value */
	lsrl	#1,d2		/* Force sign bit to 0 */
	movl	d2,d0
	moveml	sp@,#0x3cfc	/* Restore registers */
	unlk	fp
	rts

	.data
