/*---------------------------------------------------
 * file:    checksum.h
 * purpose: Routines for checksums
 * author:  ahollowa@uci.edu
 * date:    1/14/09
 *-------------------------------------------------*/

 #include "mylib.h"
 #include "node.h" 
 #include "graph.h"
 #include "sampler.h"
 #include "alloc.h"
 
 /* Compute log p(X,P,L) */
double * log_likelihood(Graph *graph, int **p, int *path_lengths, int *l, int *d, int *w, int *Nd, int *sum_levels, double *pi_d, int ntot, int D, int L, int W, double alpha, double beta, double eta, double a, double b, int *docconcept){
	int i, j, k, from, to, wid, did, lev, node, N_i, N_grt_i,pos, T = 0;
	double *ll = dvec(4);
	
	//Probability of document paths
	for(i=0; i < graph->capacity; i++){
		if( graph->nodes[i].id == NOT_IN_USE){ continue; }

		//Iterate through feasible nodes backwards...
		for(j=0; j < graph->nodes[i].num_feasible; j++){

			//The current feasible node and its position
			node = graph->nodes[i].feasible[ (graph->nodes[i].num_feasible - 1) - j];
			pos  = graph->nodes[i].perm[node];

			for(k=0; k < graph->nodes[i].etot_k[node]; k++){
				N_i = k;
				N_grt_i = graph->nodes[i].etot_k_agg[node];

				//The probability of NOT choosing an earlier cluster:
				// A product over all previous clusters. For each cluster the probability of NOT selecting it is beta + the data
				// counts of the number of times we went beyond the cluster. This is just etot_k_agg and the current count k
				ll[0] += pos*(log(beta + N_i + N_grt_i) - log(alpha + beta + N_i + N_grt_i));

				//The probability of choosing the current cluster NODE:
				// The probability of choosing node is alpha + the data counts of the number of times we chose NODE before.
				// That is just k.
				ll[0] += log(alpha + N_i) - log(alpha + beta + N_i + N_grt_i);

				//Both quantities have the same normalizing constant which is log(alpha + beta + N_i + N_grt_i)
			}
		}
	}
	
	//Probability of levels
	for(i=0; i < ntot; i++){
		//if( docconcept[d[i]] == FIXED_PATH){ continue; }
		assert(pi_d[d[i]] > 0);
		assert(pi_d[d[i]] > DBL_MIN);
		ll[1] += l[i]*log(1-pi_d[d[i]]) + log(pi_d[d[i]]) - log((1 - pow(1-pi_d[d[i]], path_lengths[d[i]]+1)));
	}
	
	
	//(Unnormalized) Probability of truncated geometric hyperparameter
	for(i=0; i < D; i++){
		//if( docconcept[i] == FIXED_PATH){continue; }
		ll[2] += (a-1)*log(pi_d[i]) + (b-1)*log(1-pi_d[i]);
	}

	//Probability of word  distributions (if needed)
	T = 0;
	for(i=0; i < graph->capacity; i++){
		if( graph->nodes[i].id == NOT_IN_USE){ continue; }
		if( graph->nodes[i].ztot == 0 ){ continue; }
		//if( graph->nodes[i].keep_cp_fixed == TRUE ){ continue; }
		T++;
		for(j=0; j < W; j++){
			ll[3] += lgamma( eta + graph->nodes[i].cp[j]);
		}
		ll[3] -= lgamma(W*eta + graph->nodes[i].ztot);
	}
	ll[3] += T*(lgamma(W*eta) - W*lgamma(eta));
	
	
	return(ll);
 
 }
 
/* Compute log p(X,P,L) */
double * log_likelihood_poisson(Graph *graph, int **p, int *path_lengths, int *l, int *d, int *w, int *Nd, int *sum_levels, double *pi_d, int ntot, int D, int L, int W, double alpha, double beta, double eta, double a, double b, int *docconcept, int *factorial){
	int i, j, k, from, to, wid, did, lev, node, N_i, N_grt_i,pos, T = 0;
	double *ll = dvec(4);
	
	//Probability of document paths
	for(i=0; i < graph->capacity; i++){
		if( graph->nodes[i].id == NOT_IN_USE){ continue; }

		//Iterate through feasible nodes backwards...
		for(j=0; j < graph->nodes[i].num_feasible; j++){

			//The current feasible node and its position
			node = graph->nodes[i].feasible[ (graph->nodes[i].num_feasible - 1) - j];
			pos  = graph->nodes[i].perm[node];

			for(k=0; k < graph->nodes[i].etot_k[node]; k++){
				N_i = k;
				N_grt_i = graph->nodes[i].etot_k_agg[node];

				//The probability of NOT choosing an earlier cluster:
				// A product over all previous clusters. For each cluster the probability of NOT selecting it is beta + the data
				// counts of the number of times we went beyond the cluster. This is just etot_k_agg and the current count k
				ll[0] += pos*(log(beta + N_i + N_grt_i) - log(alpha + beta + N_i + N_grt_i));

				//The probability of choosing the current cluster NODE:
				// The probability of choosing node is alpha + the data counts of the number of times we chose NODE before.
				// That is just k.
				ll[0] += log(alpha + N_i) - log(alpha + beta + N_i + N_grt_i);

				//Both quantities have the same normalizing constant which is log(alpha + beta + N_i + N_grt_i)
			}
		}
	}
	
	//Probability of levels
	for(i=0; i < ntot; i++){
		if( docconcept[d[i]] == FIXED_PATH){ continue; }
		assert(pi_d[d[i]] > 0);
		assert(pi_d[d[i]] > DBL_MIN);
		ll[1] += l[i]*log(pi_d[d[i]]) - log(factorial[l[i]]) - pi_d[d[i]] + log(factorial[path_lengths[d[i]]]) - log(gsl_sf_gamma_inc(path_lengths[d[i]]+1,pi_d[d[i]]));
	}
	
	
	//(Unnormalized) Probability of truncated geometric hyperparameter
	for(i=0; i < D; i++){
		if( docconcept[i] == FIXED_PATH){continue; }
		ll[2] += (a-1)*pi_d[d[i]] - (pi_d[d[i]]/b);
	}

	//Probability of word  distributions (if needed)
	T = 0;
	for(i=0; i < graph->capacity; i++){
		if( graph->nodes[i].id == NOT_IN_USE){ continue; }
		if( graph->nodes[i].ztot == 0 ){ continue; }
		if( graph->nodes[i].keep_cp_fixed == TRUE ){ continue; }
		T++;
		for(j=0; j < W; j++){
			ll[3] += lgamma( eta + graph->nodes[i].cp[j]);
		}
		ll[3] -= lgamma(W*eta + graph->nodes[i].ztot);
	}
	ll[3] += T*(lgamma(W*eta) - W*lgamma(eta));
	
	
	return(ll);
 
 }
 


int checksum_dwl(int ***dwl, int **dl, int D, int W, int L){
	int d, l, w, cnt;
	
	for(d=0; d < D; d++){
		for(l=0; l < L; l++){
			cnt = 0;
			for(w = 0; w < W; w++){
				cnt += dwl[d][w][l];
			}
			assert(cnt == dl[d][l]);
		}
	}
	return(1);
}
 
 /* Checksum for etot_k vector */
 int checksum_etot_k_agg(Graph *graph){
	int i,j,k,sum,node;
	
	for(i=0; i < graph->capacity; i++){
		if( graph->nodes[i].id == NOT_IN_USE){ continue; }
			
		sum = 0;
		
		if( graph->nodes[i].num_feasible > 0 ){
			for(k=0; k < graph->nodes[i].num_feasible; k++){
				node = graph->nodes[i].feasible[k];
				sum+= graph->nodes[i].etot_k[node];
			}
			for(k=0; k < graph->nodes[i].num_feasible; k++){
				node = graph->nodes[i].feasible[k];
				sum -= graph->nodes[i].etot_k[node];
				if( sum != graph->nodes[i].etot_k_agg[node]){
					printf("from=%d to=%d sum = %d etot=%d\n",i, node, sum,graph->nodes[i].etot_k_agg[node]); 
					assert(sum == graph->nodes[i].etot_k_agg[node]);
				}			
			}
		}
	}	
	return(1); 
 }
 
 /* Checksum edge_cnts_agg matrix */
 int checksum_edge_cnts_agg(Graph *graph){
	return(1);
 }
 