/* availexpr.c */
/*
 * HCR Confidential
 *
 * These computer programs are the confidential, proprietary property
 * of HCR (Human Computing Resources Corporation, 10 St. Mary Street,
 * Toronto, Ontario, Canada), and may not be disclosed except with the
 * prior written agreement of HCR.
 *
 * Copyright (c) 1984, 1985, 1986 Human Computing Resources Corporation
 * All Rights Reserved
 */

#ifndef lint
static char *rcsid = "@(#) (Gould) $Header: availexpr.c,v 5.5 89/05/12 12:49:16 pcc Rel-3_0 $";
/* static char ID[] = "@(#)availexpr.c	15.3	of 86/09/29"; */
#endif

/*
 *	Available Expressions.
 *	See Aho & Ullman, Principles of Compiler Design, section 14.2
 *
 */

# include <assert.h>
# include <activity.h>
# include <availexpr.h>
# include <blocks.h>
# include <bool.h>
# include <cost.h>
# include <dag.h>
# include <daghash.h>
# include <dagsymbol.h>
# include <erroro.h>
# include <flow.h>
# include <identifier.h>
# include <livedead.h>
# include <longset.h>
# include <option.h>
# include <storage.h>
# include <temp.h>

/*
 * Export
 */

int AvExprdebug;

AvailIndex AELastIdentifier;	/* Index of last identifier we will look at.
				 * (It changes under our feet.)
				 */
int NumAvailableExpressions;	/* Number of avail exprs. found */
DAG_Node *AEChain = NULL;	/* head of lists of available expressions */

/*
 * Private
 */

# define NotHashed	NoId	/* This expression not hashed - must not be
				 * a valid identifier number
				 */

# define NewAEIndex() (NextAvail++)

static AvailIndex NextAvail;
static AvailIndex LastAESeen;

static LongSet Universe;
static LongSet GlobalExpressions;
static LongSet *IDExpressions;

static BasicBlock GBlock;		/* Tunnel to GenDly() */

/*
 * Forward
 */

static void	AvailHash();
static void	HashBlock();
static void	AvailAllocStorage();
static void	AvailFreeStorage();
static void	DataFlowEquations();
static Boolean  GenNode();
static void	ComputeGen();
static void	MakeDepend();
static void	UMULDepend();
static void	BlockDependencies();
static void	PAEInfo();
static void	GenDef();
static void	Done();
static void	GenDly();

/*
 * Determine available expressions for each basic block in the program.
 * General outline:
 * 1) Hash all DAG nodes in all blocks, and assign each unique expression
 *	a unique index.
 * 2) For each block, determine Kill and Gen.
 * 3) Solve the data flow equations
 */

void
AvailableExpressions()
{
	int i;

	AELastIdentifier = MaxIdentifier;
	NextAvail = AELastIdentifier + 1;

	AvailHash();
	AvailAllocStorage();

	LastAESeen = AELastIdentifier;

	for( i = 0; i < NumReachableNodes; i++ )
		BlockDependencies(FlowGraph[DFN[i]].block);

	for( i = 0; i < NumReachableNodes; i++ )
		ComputeGen(FlowGraph[DFN[i]].block);

	DataFlowEquations();

	if( AvExprdebug > 1 )
		PAEInfo();
	
	AvailFreeStorage();
}

/*
 * Hash all DAG nodes
 */

static void
AvailHash()
{
	int i;

	InitHash();
	for( i = 1; i < NumReachableNodes; i++ )
		HashBlock(FlowGraph[DFN[i]].block);
}

/*
 * Hashing for a single basic block "b".
 */

static void
HashBlock(b)
	BasicBlock b;
{
	DAG_Node d;
	DAG_Node dhash;

	for( d = b->Dag; d != NULL; d = d->next )
	{

		/* First, assign an available expression index to this
		 * DAG node:
		 * 1) If the node is not one that we would ordinarily hash,
		 *	or (for now) the node may not be evaluated,
		 *	assign it an index of NotHashed
		 * 2) If the node is one that we usually hash, try to find
		 *	it in the hash table.  If we succeed, the expression
		 *	is computed elsewhere, and has already been assigned
		 *	an index that we can use.
		 *	If we do not find this node in the hash table, then
		 *	assign it a new index.
		 *
		 * The available expression index for a leaf is defined to be
		 * its leaf id.
		 */

		if( d->in_cond != NotConditional )
		{
			d->AEIndex = NotHashed;
		}
		else
		switch(d->op)
		{
			/* These cannot possibly return something that we can
			 * do anything with
			 */

		case STASG:
		case STCALL:
		case UNARY STCALL:
		case CM:
		case COLON:
		case STARG:
			d->AEIndex = NotHashed;
			break;

			/* These cannot be used directly.  */

		case ASSIGN:
		case CALL:
		case UNARY CALL:
		case FORTCALL:
		case UNARY FORTCALL:
			d->AEIndex = NotHashed;
			break;

			/* Fetches through pointers are a bit of a problem.
			 * If we can find another tree with the same pointer
			 * we might still have different information about
			 * what is referenced by the two fetches.  (This
			 * can happen due to things like
			 *	*(a = &b)
			 * and later
			 *	*a
			 * If this turns out to be the case, we will not call
			 * the two the same subexpression.  This may be
			 * overly conservative and could berelaxed some day.
			 *
			 * Even if we have no idea where the fetch comes from
			 * we are conservative enough about calls and pointer
			 * stores that we can still look for common
			 * subexpressions, PROVIDED global things are not
			 * volatile.
			 */
		case UNARY MUL:
			if( d->is_fetch && d->u.in.left->hash != NotHashed &&
			    d->indirect != NoId && GoodGlobals )
			{
				dhash = UnaryHashLook(d->op, d->type,
					d->u.in.left, d->u.tn.rval);

				if( dhash != NULL )
				{
					if(d->indirect != dhash->indirect)
						d->AEIndex = NotHashed;
					else
						d->AEIndex = dhash->AEIndex;
				}
				else
				{
					d->AEIndex = NewAEIndex();
					EnterHash(d);
				}
			}
			else
				d->AEIndex = NotHashed;
			break;

		case FCON:
			dhash = FconHashLook( d->type, d->u.fpn.dval );
			if( dhash != NULL )
				d->AEIndex = dhash->AEIndex;
			else
			{
				d->AEIndex = NewAEIndex();
				(void) EnterFHash( d, d->u.fpn.dval );
			}
			break;

		default:
			switch(optype(d->op))
			{
			case LTYPE:
				d->AEIndex = d->leaf_id;
				break;

			case UTYPE:
				if( d->u.in.left->hash == NotHashed )
				{
					d->AEIndex = NotHashed;
			/**/		break;
				}
				dhash = UnaryHashLook(d->op, d->type,
						d->u.in.left, d->u.tn.rval);
				if( dhash != NULL )
					d->AEIndex = dhash->AEIndex;
				else
				{
					d->AEIndex = NewAEIndex();
					EnterHash(d);
				}
				break;

			case BITYPE:
				if( d->u.in.left->hash == NotHashed ||
				    d->u.in.right->hash == NotHashed )
				{
					d->AEIndex = NotHashed;
			/**/		break;
				}
				dhash = BinaryHashLook(d->op, d->type,
					   d->u.in.left, d->u.in.right);
				if( dhash == NULL &&
				    (dope[d->op]&(ASGFLG|COMMFLG))==COMMFLG)
					dhash = BinaryHashLook(d->op, d->type,
						   d->u.in.right,d->u.in.left);
				if( dhash != NULL )
					d->AEIndex = dhash->AEIndex;
				else
				{
					d->AEIndex = NewAEIndex();
					EnterHash(d);
				}
				break;

			default:
				InternalFault("Bad optype for op %d", d->op);
			}
		}


		/* Now that we have determined the AEIndex for this node,
		 * we need to determine the hash cookie that will be used
		 * in hashing the parents of this node.  If the node has a
		 * legitimate carrier, use this.  If there is no legitimate
		 * carrier, use the expression itself.
		 * If the node is conditional, forget it.
		 *
		 * It is not clear what to do here if the carrier is set
		 * elsewhere.  For now, ignore it if it is transparent.
		 */

		if( d->in_cond != NotConditional )
			d->hash = NotHashed;
		else
		if( d->carrier != NoId && !IsTransparent(d->carrier) )
			d->hash = d->carrier;
		else
			d->hash = d->AEIndex;
	}

	if( AvExprdebug > 0  &&  b->Dag != NULL )
	{
		printf("Hashed Dag for block # %d\n", b->blocknum);
		PrintGraph(b->Dag);
	}
}

/*
 *	Storage Allocation:
 *
 *	AEChain: For each available expression, a pointer to a list of
 *		DAG nodes that generate that expression.
 *	GlobalExpressions: The set of available expressions that might
 *		be hurt by pointer stores and the like.
 *	Universe: The set of all available expressions.
 *	IDExpressions: For each identifier, the set of available expressions
 *		whose value is hurt when the identifier is killed.  Note
 *		that the identifier itself is included in this set in order to
 *		provide a cheap way of determining if the identifier is killed
 *		in any given block (if it is, it will appear in Kill for that
 *		block).
 */

static void
AvailAllocStorage()
{

	Identifier id;
	LongSet AESet;

	if( AEChain != NULL )
	{
		DecreaseSpace(s_AvailExpr,
			NumAvailableExpressions * sizeof(DAG_Node));
		free(AEChain);
	}


	NumAvailableExpressions = NextAvail;
	GlobalExpressions = CreateSet(NumAvailableExpressions);
	Universe = CreateSet(NumAvailableExpressions);

	IDExpressions = GetArray(s_AvailExpr, AELastIdentifier+1, LongSet);
	CheckStorage(IDExpressions, "Allocating Avail. Exprs. for ids, n = %d+1", AELastIdentifier);

	AEChain = GetArray(s_AvailExpr, NumAvailableExpressions, DAG_Node);
	CheckStorage(AEChain, "Allocating Avail. Exprs., n = %d", NumAvailableExpressions);

	for( id = FirstId; id <= AELastIdentifier; id++ )
	{
		IDExpressions[id] = AESet = CreateSet(NumAvailableExpressions);
		NullSet(AESet);
		Insert((int)id, AESet);
		if( IdOp(id) == NAME  ||  WasAddressed(id) )
			Insert((int)id, GlobalExpressions);
	}

	NullSet(GlobalExpressions);
	NullSet(Universe);
}

static void
AvailFreeStorage()
{
	Identifier id;

	DestroySet(GlobalExpressions);
	DestroySet(Universe);
	for( id = FirstId; id <= AELastIdentifier; id++ )
		DestroySet(IDExpressions[id]);
	free(IDExpressions);
	DecreaseSpace(s_AvailExpr, (AELastIdentifier+1)*sizeof(LongSet));
}

/*
 * For each available expression that is generated in block "b" and that has
 * not yet been seen, determine the set of identifiers on which the expression
 * depends, and add it to the universe of possible expressions.
 *
 * This routine also does a bit of extra intialization.
 */

static void
BlockDependencies(b)
	BasicBlock b;
{
	DAG_Node d;


	for( d = b->Dag; d != NULL; d = d->next )
	{
		/* To recognize new available expressions, the basic blocks
		 * in the same order as they are hashed.  In this way, each
		 * new available expression will be assigned an index that
		 * one greater than the last new one.
		 */

		if( d->AEIndex > LastAESeen )
		{
			assert( d->AEIndex - LastAESeen == 1);
			LastAESeen = d->AEIndex;
			Insert((int)d->AEIndex, Universe);
			AEChain[d->AEIndex] = NULL;

			if( d->op == UNARY MUL )
				UMULDepend(d->AEIndex, d);

			switch(optype(d->op))
			{
			case BITYPE:
				MakeDepend(d->AEIndex, d->u.in.right);
				MakeDepend(d->AEIndex, d->u.in.left);
				break;

			case UTYPE:
				MakeDepend(d->AEIndex, d->u.in.left);
				break;

			case LTYPE:
				if( d->op != FCON )
					InternalFault("Non-identifier available expression %d on leaf", d->AEIndex);
				break;

			default:
				InternalFault("Bad optype for op %d", d->op);
			}
		}

		/* If this node generates any available expression, add it
		 * to the list of places that the expression is generated.
		 * Finally, leave a pointer back to the basic block in the
		 * DAG node.  (This is also done in loop.c, but we do not
		 * want to rely on this.)
		 */

		if( d->AEIndex > AELastIdentifier )
		{
			d->av_expr_link = AEChain[d->AEIndex];
			AEChain[d->AEIndex] = d;
		}

		d->FGindex = b->FGindex;
	}
}

/*
 * "d" is a UNARY MUL fetch of a known identifier appearing in available
 * expression "index".  Note that "index" depends on the value of the
 * identifier, and also that it may be hurt by anonymous pointer stores.
 */

static void
UMULDepend(index, d)
	AvailIndex index;
	DAG_Node d;
{
	assert( d->op == UNARY MUL && d->is_fetch && d->indirect != NoId );
	assert(IDExpressions[d->indirect] != NULL);

	Insert((int)index, GlobalExpressions);
	Insert((int)index, IDExpressions[d->indirect]);
	if( AvExprdebug > 3 )
		printf("AE %d depends on id %d and globals\n", index, d->hash);
}

/*
 * "d" is a DAG node appearing in the computation of the available expression
 * "index".
 *
 * If d->hash is an identifier, then this node is a leaf of the available
 * expression.  The expression should be added to the list of expressions
 * killed by this identifier.  If the identifier is a NAME (or something whose
 * address has been taken), the expression should be added to the list of
 * available expressions hurt by pointer stores, calls, etc.
 *
 * If d->hash is not an identifier, then this node is not a leaf of the
 * available expression and we should resurse.
 */

static void
MakeDepend(index, d)
	AvailIndex index;
	DAG_Node d;
{
	if( d->hash <= AELastIdentifier )
	{
		/* Expression ends here */
		if( IdOp((Identifier)d->hash) == NAME ||
		    WasAddressed((Identifier)d->hash))
		{
			Insert((int)index, GlobalExpressions);
			if( AvExprdebug > 3 )
				printf("AE %d depends on globals\n", index);
		}

		assert(IDExpressions[d->hash] != NULL);
		Insert((int)index, IDExpressions[d->hash]);
		if( AvExprdebug > 3 )
			printf("AE %d depends on id %d\n", index, d->hash);
	}
	else
	if( d->op == UNARY MUL && d->is_fetch )
	{
		UMULDepend(index, d);
		MakeDepend(index, d->u.in.left);
	}
	else
	switch(optype(d->op))
	{
	case BITYPE:
		MakeDepend(index, d->u.in.right);
		MakeDepend(index, d->u.in.left);
		break;

	case UTYPE:
		MakeDepend(index, d->u.in.left);
		break;

	case LTYPE:
		if( d->op != FCON )
			InternalFault("LTYPE node found in dependencies for available expression %d", index);
		break;

	default:
		InternalFault("Bad optype for op %d", d->op);
	}
}

/*
 * For each basic block, compute
 *  Gen: the set of available expressions generated by this block
 * Kill: the set of available expressions killed in this block
 */

static void
ComputeGen(b)
	BasicBlock b;
{
	if( b->av.In == NULL )
	{
		b->av.In   = CreateSet(NumAvailableExpressions);
		b->av.Out  = CreateSet(NumAvailableExpressions);
		b->av.Gen  = CreateSet(NumAvailableExpressions);
		b->av.Comp = CreateSet(NumAvailableExpressions);
		b->av.Kill = CreateSet(NumAvailableExpressions);
	}
	NullSet(b->av.Gen);
	NullSet(b->av.Kill);

	GBlock = b;
	(void) ActivityDag(b->Dag, GenNode);
}

static Boolean
GenNode(d)				/* GBlock is implied */
	DAG_Node d;
{
	AttachedID aid;

	/*
	 * 1) If this node computes an available expression, add it
	 *	to Comp.  If the computation is unconditional, add
	 *	it to Gen.
	 * 2) Figure out which available expressions might be hurt by
	 *	the computation represented by this node.  The order
	 *	of (1) and (2) ought to be interchangeable; this
	 *	order is conservative.
	 * 3) Finally, figure out which avail expressions will be hurt
	 *	by assignments to attached identifiers.  Note that
	 *	this must be done after (1) to ensure that i=i+1 does
	 * 	not get recorded as generating i+1
	 */

	if( d->AEIndex > AELastIdentifier )
	{
		Insert((int)d->AEIndex, GBlock->av.Comp);
		if( d->in_cond == NotConditional )
			Insert((int)d->AEIndex, GBlock->av.Gen);
	}

	switch(d->op)
	{
	case CALL:
	case UNARY CALL:
	case FORTCALL:
	case UNARY FORTCALL:
	case STCALL:
	case UNARY STCALL:
		Difference(GBlock->av.Gen, GBlock->av.Gen, GlobalExpressions);
		Union(GBlock->av.Kill, GBlock->av.Kill, GlobalExpressions);
		break;

	case UNARY MUL:
		if( !d->is_fetch )
		{
			if( d->indirect == NoId )
			{
				Difference(GBlock->av.Gen, GBlock->av.Gen,
					GlobalExpressions);
				Union(GBlock->av.Kill, GBlock->av.Kill,
					GlobalExpressions);
			}
			else
			{
				GenDef(GBlock, d->indirect);
			}
		}
		break;
	}

	for( aid = d->attached; aid != NULL; aid = aid->next )
		GenDef(GBlock, aid->id);

	if( d->delayed != NULL && d->special_delay )
		GenDly(d);

	UpdateActivity(d, Done);

	return False;
}

static void
Done(d)
	DAG_Node d;
{
	assert(d->activity == 0);
	if( d->delayed && !(d->special_delay) )
		GenDly(d);
}

/* The activity count on d has just gone to 0.  Do any delayed stores
 * that are waiting on d.
 */

static void
GenDly(d)
	DAG_Node d;
{
	assert(optype(d->op) == LTYPE && d->delayed->activity > 0);
	GenDef(GBlock, d->leaf_id);
	d->delayed->activity--;
	if( d->delayed->activity == 0 )
		Done(d->delayed);
}

/*	Block "b" contains a simple definition of identifier "id".
 *	Update Gen and Kill accordingly
 */

static void
GenDef(b, id)
	BasicBlock b;
	Identifier id;
{
	LongSet IDSet;

	IDSet = IDExpressions[id];
	assert(IDSet != NULL);
	Difference(b->av.Gen,b->av.Gen,IDSet);
	Union(b->av.Kill,b->av.Kill,IDSet);
}

/*
 * Solve the data flow equations for available expressions.
 * See Aho & Ullman fig. 14.6
 */

static void
DataFlowEquations()
{
	BasicBlock b, bb;
	int i;
	LongSet NewIn;
	Boolean changed;
	FlowIndex n;
	PredNode p;

	NewIn = CreateSet(NumAvailableExpressions);

	/* Initialization */

	b = FlowGraph[0].block;
	NullSet(b->av.In);
	Union(b->av.Out, b->av.In, b->av.Gen);

	for( i = 1; i < NumReachableNodes; i++ )
	{
		b = FlowGraph[DFN[i]].block;
		CopySet(b->av.In, Universe);
		Difference(b->av.Out, b->av.In, b->av.Kill);
		Union(b->av.Out, b->av.Out, b->av.Gen);
	}

	/* Now iterate until nothing changes */

	do {
		changed = False;
		for( i = 1; i < NumReachableNodes; i++ )
		{
			n = DFN[i];
			b = FlowGraph[n].block;
			CopySet(NewIn, Universe);
			for( p = FlowGraph[n].preds; p != NULL; p = p->next )
			{
				bb = FlowGraph[p->p].block;
				if( bb->reachable )
					Intersection(NewIn, NewIn, bb->av.Out);
			}
			if( Not(SetEq(NewIn, b->av.In)) )
			{
				changed = True;
				CopySet(b->av.In, NewIn);
			}
			Difference(NewIn, NewIn, b->av.Kill);
			Union(b->av.Out, NewIn, b->av.Gen);
		}
	} while(changed);

	DestroySet(NewIn);
}

/*
 * Debugging routines
 */
static void
PAESet(s)
	LongSet s;
{
	int i;
	int count = 0;

	i = FirstElement(s);
	if( i != NoElement )
	{
		printf("\t\t");
		do
		{
			if( count > 20 )
			{
				count = 0;
				printf("\n\t\t");
			}
			printf("%d,", i);
			count++;
			i = NextElement(i,s);
		} while (i != NoElement);

		if( count != 0 )
			printf("\n");
	}
}

static void
PAEBlock(b)
	BasicBlock b;
{
	printf("AE Block: %o (#%d)\n", b, b->blocknum);
	printf("\tGen:\n"); PAESet(b->av.Gen);
	printf("\tKill:\n"); PAESet(b->av.Kill);
	printf("\tComp:\n"); PAESet(b->av.Comp);
	if( AvExprdebug > 2 )
	{
		printf("\tIn:\n"); PAESet(b->av.In);
		printf("\tOut:\n"); PAESet(b->av.Out);
	}
}

static void
PAEInfo()
{
	int n;
	for( n = 0; n < NumReachableNodes; n++ )
	{
		PAEBlock(FlowGraph[DFN[n]].block);
	}
}
