/***************************************************************************
 *                                                                         *
 *                  (begin: Feb 20 2003)                                   *
 *                                                                         *
 *   Parallel IQPNNI - Important Quartet Puzzle with NNI                   *
 *                                                                         *
 *   Copyright (C) 2005 by Le Sy Vinh, Bui Quang Minh, Arndt von Haeseler  *
 *   Copyright (C) 2003-2004 by Le Sy Vinh, Arndt von Haeseler             *
 *   {vinh,minh}@cs.uni-duesseldorf.de                                     *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include "iqp.h"
#include <cstring>
#include <math.h>
#include <time.h>

#include "constant.h"
#include "model.h"
#include "ali.h"
#include "utl.h"
#include "outstream.h"
#include "usertree.h"
#include "br.h"
#include "treels.h"
#include "bionj.h"
#include "clusterarr.h"
#include "rate.h"
#include "prediction.h"
#include "rannum.h"
#include "urtreepar.h"
#include "urtreepararray.h"
#include "interface.h"

// max parameter optimization iterations
int MAX_IT_ALL_PAM_OPT = 10;



using namespace std;
extern int isContinuous;

#ifdef PARALLEL
// stop signal will be send by master, set to 1 to inform that the program
// comes to an end
int stop_signal = 0;
MPI_Request stop_request = MPI_REQUEST_NULL;
#endif

//--------------------------------------------------------------------
/**
we will init all things here before starting testing this method with data
*/
void IQP::init () {
	nSeq_ = alignment.getNSeq ();
}


/**
	one step in the optimization phase (try to escape from a local optimum)
@param orderNdNoLs ramdom order sequence list, when removing/reinserting
@param tree new local optimal tree
@param bestTree best tree of the whole process so far
@param bestClusterArr cluster array of best tree
@return log likelihood of the new local optimal tree
*/
//=================================================================
double IQP::escape (Vec<int> &orderNdNoLs, UrTree &tree, UrTree &bestTree, ClusterArr &bestClusterArr) {
	//  std::cout << "escaping..." << endl;
	int *esArr_ = new int[nSeq_];
	int seqNo_;
	
	
	// generate the random array esArr_ item to 1 or 0: 1 if the related sequence to be deleted
	if (major_step_test) {
		// don't generate random, just choose to delete from 0 to probDel_*nSeq_
		for (seqNo_ = 0; seqNo_ < nSeq_; seqNo_ ++)
			if (seqNo_ < probDel_ * nSeq_)
				esArr_[seqNo_] = 1;
			else
				esArr_[seqNo_] = 0;
	} else
		for (seqNo_ = 0; seqNo_ < nSeq_; seqNo_ ++) {
			int ranNum_ = RanNum::getRanInt(100);
			esArr_[seqNo_] = ( ranNum_ < probDel_ * 100);
		}
	
	int nSeqDeled_ = 0;
	for (seqNo_ = 0; seqNo_ < nSeq_; seqNo_ ++)
		if (esArr_[seqNo_] == 1 && nSeq_) {
			
			tree.delExNd (seqNo_);
			nSeqDeled_ ++;

			if (nSeqDeled_ + 2 == nSeq_) {
				for (int count_ = seqNo_ + 1; count_ < nSeq_; count_ ++)
					esArr_[count_] = 0;
				break;
			}

		}
	
	// reinsert the deleted external nodes to the tree, using the random order from orderNdNoLs
	for (int count_ = 0; count_ < nSeq_; count_ ++) {
		seqNo_ = orderNdNoLs[count_];
		if (esArr_[seqNo_] == 1)
			tree.insertNd (seqNo_, nRep_);
	}

	delete esArr_;
	
	// create the rooted tree
	tree.reCreateRootedTree (orderNdNoLs[0]);

#ifdef PARALLEL
	if (stopSignalCome())
		return 0;
#endif

	double logLi_ = tree.reCmpLogLi (COMPLETE, bestTree, bestClusterArr, 1);
	//  double logLi_ = -INFINITIVE;
	//  double logLi_ = tree.reCmpLogLi (COMPLETE,  1);
	return logLi_;
}

//****************************************************************************************************************************************************************
/**
	optimize the parameters
*/
void IQP::optPam () {
	int isChanged_;

	UrTree bestTree_;
	bestTree_.getOptUrTree ();

#ifdef PARALLEL
	//MPI_Barrier(MPI_COMM_WORLD);
	double start_time = MPI_Wtime();
	//opt_urtree.turnOnParallelPhase(PARALLEL_LOGLI);
#else
	time_t start_time = time(NULL);
#endif
	int nIter_ = 0;
	double old_logli = 0;
	while (nIter_ < MAX_IT_ALL_PAM_OPT) {
		nIter_ ++;

		mymodel.setPamStep(nIter_);
		if (isMasterProc())
			cout << "(" << nIter_ <<") ";
		isChanged_ = 0;


		double oldGammaShape_ = myrate.getGammaShape ();
		double old_prob_invar = myrate.prob_invar_site;
		if (myrate.getType () == GAMMA && (myrate.getGammaShapeType () == ESTIMATE || myrate.getInvarSiteType() == ESTIMATE)) {
			myrate.optRatePam ();
			double newGammaShape_ = myrate.getGammaShape ();
			if (fabs (newGammaShape_ - oldGammaShape_) > EPS_GAMMA_SHAPE_ERROR ||
				fabs (myrate.prob_invar_site - old_prob_invar) > EPS_MODEL_PAM_ERROR )
				isChanged_ = 1;
		}

		if (mymodel.optPam () == 1)
			isChanged_ = 1;

		cout.precision(10);
		if (isMasterProc())
			cout << "LogL = " << opt_urtree.getLogLi() << endl << endl;

		if  (isChanged_ == 0)
			break;
		double new_logli = opt_urtree.optBranches(MAX_IT_UR_BR_PAM_OPT);
		if (fabs(old_logli - new_logli) < EPS_MODEL_FUNC_ERROR) 
			break;
		old_logli = new_logli;
	} 

#ifdef PARALLEL
	//MPI_Barrier(MPI_COMM_WORLD);
	double end_time = MPI_Wtime();
	//opt_urtree.turnOffParallelPhase();
#else
	time_t end_time = time(NULL);
#endif // PARALLEL
	if (isMasterProc())
		cout << "Time optimizing parameter(s): " << end_time - start_time << endl;

}


//****************************************************************************************************************************************************************
/**
the reestimate of parameters at the final step, based on best tree found so far
@param bestTree current best tree
@return the likelihood of the best tree
*/
double IQP::tuneFineParameter (int nIter, UrTree &bestTree) {

	bestTree.createOptUrTree();
#ifdef PARALLEL
	char filename[20];
	sprintf(filename, "tree.%i", mpi_myrank);
	//bestTree.writeNewickForm(filename);
#endif
	if (nIter > 0)
	if (mymodel.isEstedPam () == 1 ||
	        (myrate.getType () == GAMMA && (myrate.getGammaShapeType () == ESTIMATE || myrate.getInvarSiteType() == ESTIMATE)) ||
	        myrate.getType () == SITE_SPECIFIC) {

		if (isMasterProc())
			std::cout <<"Re-Optimizing parameters..." << endl;
		optPam ();
		alignment.cmpGenDis (alignment.out_prefix);
	}

	double bestLogLi_ = opt_urtree.cmpLogLi ();
	if (myrate.getType () == SITE_SPECIFIC) {
		if (alignment.getNSeq() < 30 && isMasterProc()) 
			cout << endl << separator_str << endl << 
			"WARNING: Number of sequences is too small, site-specific rate estimates" << endl << 
			"         may NOT be stably inferred!" << endl << separator_str << endl;
		int nIter_ = 0;
		do {
			nIter_ ++;
			ClusterArr bestClusterArr_;
			bestTree.createCluster(bestClusterArr_);

			Vec<SeqPair> indSeqPairArr_;
			if (mymodel.ap_sitespec)
				bestTree.createAllSeqPair (indSeqPairArr_);
			else
				bestTree.createIndSeqPair (indSeqPairArr_);

			//         bestTree.draw(NON_BR_LEN, 0);
			//         OutStream::write (indSeqPairArr_, std::cout);

			if (nIter_ > 1 && isMasterProc())
				std::cout <<"Re-";

			int isChanged_ = myrate.optSpecificSite (indSeqPairArr_);
			if (isChanged_ == 0)
				break;

			if (isMasterProc())
				std::cout <<"Optimizing the tree using site-specific substitution rates... " << endl;
			bool topo_changed;
			double oldLogLi_ = bestLogLi_;
			if (nIter > 0) 
				bestLogLi_ = bestTree.doConOpt (1, topo_changed);
			else
				bestLogLi_ = bestTree.doConOpt (0, topo_changed);
			cout << "LogL = " << bestLogLi_ << endl;
			//int topDis_ = bestTree.cmpDis(bestClusterArr_) ;
			//if (topDis_ == 0)
				//break;
			if (bestLogLi_ <= oldLogLi_ + EPS_LOGLI_SITE_SPECIFIC_RATE_OPT)
				break;
		} while (nIter_ < MAX_IT_TREE_SITE_SPECIFIC_RATE_OPT);


	}

	if (nIter > 0) {
	
		if (isMasterProc())
			std::cout <<"Optimizing the final tree topology as well as branch lengths..." << endl;
		bool topo_changed;
		bestLogLi_ = bestTree.doConOpt(1, topo_changed);
	}
	std::cout.precision (10);
	if (isMasterProc())
		std::cout <<"Final best log likelihood: " << bestLogLi_ << endl;
	return bestLogLi_;
}

//****************************************************************************************************************************************************************
/**
	load all already reconstructed intermediate trees from the file into the memory
*/
void IQP::loadTreeLs (CommonParameters &params, const char *treeLsFileName, ClusterArr &bestClusterArr, ClusterArr &allCluArr,
                      UrTree &bestTree, int &nImpTree, const char *predictionFileName,
                      double &last_progTime) {

	ifstream treeLsFile_;
	treeLsFile_.open (treeLsFileName);
	ofstream predictionFile_;
	predictionFile_.open (predictionFileName);

	params.bestLogLi = -INFINITIVE;
	params.cur_nIter = 0;
	nImpTree = 0;

	while (!treeLsFile_.eof ()) {
		string aLine_;

		double tmpProgTime_;
		getline (treeLsFile_, aLine_);
		char *endPos_;
		tmpProgTime_ = strtod (aLine_.c_str(), &endPos_);

		double curLogLi_;
		getline (treeLsFile_, aLine_);
		curLogLi_ = strtod (aLine_.c_str(), &endPos_);


		Vec<char> newickTree_;
		getline (treeLsFile_, aLine_);
		newickTree_.set (aLine_.c_str());

		if (newickTree_.getSize () > 0) {
			UserTree curTree_;
			curTree_.setNewickTree (newickTree_);
			int status_ = curTree_.createUrTree ();
			if (status_ == NEWICK_TREE_FORM) {
				params.cur_nIter ++;
				last_progTime = tmpProgTime_;

				if (params.build_consensus) {
					ClusterArr newClusterArr_;
					curTree_.createCluster(newClusterArr_);
					for (int count_ = 0; count_ < newClusterArr_.totalNCluster_; count_ ++)
						allCluArr.addConTree(newClusterArr_[count_]);
				}

				if (curLogLi_ > params.bestLogLi + ZERO) {
					params.bestLogLi = curLogLi_;
					bestTree = curTree_;
					if (bestTree.cmpDis( bestClusterArr) != 0) {
						nImpTree ++;
						predictionFile_ << params.cur_nIter << endl;
						bestTree.createCluster (bestClusterArr);
					}
				} //end of if curLogLi_
			} //end of if status
		}  //end of if newickTree

	}  // end of while

	treeLsFile_.close ();
	predictionFile_.close ();
}

void IQP::loadTreeFile (CommonParameters &params, const char *treeFileName, ClusterArr &bestClusterArr, ClusterArr &allCluArr,
	                        UrTree &bestTree, int &nImpTree, const char *predictionFileName, double &last_progTime) {
	ifstream treeFile_;
	treeFile_.open (treeFileName);
	nImpTree = 0;

	//params.bestLogLi = -INFINITIVE;
	//params.cur_nIter = 0;

	string aLine_;

	double tmpProgTime_ = params.progTime;
	//double curLogLi_ = params.bestLogLi;

	Vec<char> newickTree_;
	getline (treeFile_, aLine_);
	newickTree_.set (aLine_.c_str());

	if (newickTree_.getSize () > 0) {
		UserTree curTree_;
		curTree_.setNewickTree (newickTree_);
		int status_ = curTree_.createUrTree ();
		if (status_ == NEWICK_TREE_FORM) {
			//params.cur_nIter ++;
			last_progTime = tmpProgTime_;

			if (params.build_consensus) {
				ClusterArr newClusterArr_;
				curTree_.createCluster(newClusterArr_);
				for (int count_ = 0; count_ < newClusterArr_.totalNCluster_; count_ ++)
					allCluArr.addConTree(newClusterArr_[count_]);
			}
			bestTree = curTree_;
			bestTree.createCluster (bestClusterArr);
		} else {
			Utl::announceError("Something wrong with .treefile");
		}//end of if status
	}  //end of if newickTree
	treeFile_.close ();

	ifstream predictionFile_;
	predictionFile_.open (predictionFileName);
	if (!predictionFile_.is_open()) return;

	double old_time = -1.0;
	while (predictionFile_.eof () == 0) {
		double tmpTime_ = -1.0;
		predictionFile_ >> tmpTime_;
		if (tmpTime_ > old_time) {
			nImpTree ++;
			old_time = tmpTime_;
		}
	}
	predictionFile_.close ();
}

//****************************************************************************************************************************************************************

bool optimization_step = false;

void startOptimizationStep() {
	optimization_step = true;
}

void stopOptimizationStep() {
	optimization_step = false;
}

inline bool isOptimizationStep() {
	return optimization_step;
}

#ifdef PARALLEL
bool stopSignalCome() {
	if (! isOptimizationStep())
		return false;
	if (isMasterProc())
		return false;
	if (stop_request == MPI_REQUEST_NULL)
		return stop_signal;
	int flag;
	MPI_Status status;
	MPI_Test(&stop_request, &flag, &status);
	return flag;
}
#endif


//****************************************************************************************************************************************************************
/**
	main optimization step: remove nodes, reinsert by IQP, branch swapping by NNI
@param nIter number of iterations
@param probDel probability of deleting an external node
@param stoppingRule type of stopping rule: YES, YES_MIN_ITER, YES_MAX_ITER, NO \
	YES_MIN_ITER: number of iterations is at least the user-predefined number
	YES_MAX_ITER: number of iterations is at most the user-predefined number 
@param outGrpSeqNo outgroup sequence number
@param bestTree current best tree, to return
@param conTree consensus Tree
@param beginTime beginning time
@param progTime running time
@return likelihood of the best tree
*/
double IQP::escapeLocalOpt (CommonParameters &params, InputParameters &in_pam, UrTree &bestTree, Vec<char> &conTree, time_t &beginTime) {

	string topoFileName_;
	string predictionFileName_;
	string treeLsFileName_;
	string bionjFileName_;
	string distFileName_;
	string checkPointFileName_;

	getOutFileName(SUF_TREEFILE, topoFileName_);
	getOutFileName(SUF_PREDICTION, predictionFileName_);
	getOutFileName(SUF_TREELS, treeLsFileName_);
	getOutFileName(SUF_BIONJ, bionjFileName_);
	getOutFileName(SUF_DIST, distFileName_);
	getOutFileName(SUF_CHECKPOINT, checkPointFileName_);
	
	nSeq_ = alignment.getNSeq ();
	nRep_ = params.nRep;
	probDel_ = params.probDel;

	int nSite_ = alignment.getNSite ();
	if (isMasterProc())
		std::cout <<"number sequences = " << nSeq_ << "; sequence length = " << nSite_ << endl;

	ClusterArr allCluArr_;
	allCluArr_.setupIndex ();

	int nPredictedIter_ = -1;
	int nImpTree_ = 1;
	

	Prediction prediction_;

	double stoppedProgTime_ = 0.0;
	int maxNIter_ = params.nIter;

	if (isContinuous == 0) {
		params.cur_nIter = 1;
#ifdef PARALLEL
		double start_time = MPI_Wtime();
#else
		time_t start_time = time(NULL);
#endif // PARALLEL
		
		UserTree initialTree_;
		bool hasInitTree = false;
		
		if (in_pam.tree_file != NULL) {
			std::ifstream userTreeFile_;
			userTreeFile_.open (in_pam.tree_file);
			if (userTreeFile_ != 0) {
				initialTree_.readFile (in_pam.tree_file);
				initialTree_.createUrTree ();
				hasInitTree = true;
			} 
			userTreeFile_.close();
		}
		
		if (!hasInitTree) {
			if (isMasterProc()) {
				cout << "Creating neighbour joining tree... ";
				cout.flush();
				// only master does the neighbour joining tree
				BioNj bioNj_;
				bioNj_.create (distFileName_.c_str(), bionjFileName_.c_str());
			}
			if (isMasterProc()) {
#ifdef PARALLEL
				cout << MPI_Wtime() - start_time << " s" << endl;
#else
				cout << time(NULL) - start_time << " s" << endl;
#endif // PARALLEL
			}
#ifdef PARALLEL
			// wait until master finishes doing the neighbor joining tree
			MPI_Barrier(MPI_COMM_WORLD);
#endif // PARALLEL
			initialTree_.readFile (bionjFileName_.c_str());
			initialTree_.createUrTree ();
			
		} else {
			if (isMasterProc())
				cout  << "Start the search from the user tree!" << endl;
		}
		//   bionjTree_.draw (BR_LEN, 0);

		// check the tree, positive branch lengths...
		initialTree_.checkUrTree ();


		// optimizing the branch length
		if (isMasterProc()) {
			cout << "Optimizing branch lengths... ";
			cout.flush();
		}
#ifdef PARALLEL
		start_time = MPI_Wtime();
#else
		start_time = time(NULL);
#endif // PARALLEL
		bool topo_changed;
		double bionjLogLi_ =  initialTree_.doConOpt (0, topo_changed, 1);
		if (isMasterProc()) {
#ifdef PARALLEL
			cout << MPI_Wtime() - start_time << " s" << endl;
#else
			cout << time(NULL) - start_time << " s" << endl;
#endif // PARALLEL

		}
		//  std::cout << bionjLogLi_ << endl;

		// estimate the parameters
		if ( mymodel.isEstedPam () == 1 ||
		        (myrate.getType () == GAMMA && (myrate.getGammaShapeType () == ESTIMATE ||
					myrate.getInvarSiteType() == ESTIMATE)) ) {
			//cout << " optPam: " << mpi_myrank << endl;
			optPam ();
			initialTree_.getOptUrTree();
			if (program_mode == PROG_PARM_ESTIMATE)
				Finalize(0);
			alignment.cmpGenDis (alignment.out_prefix);
		}
		if (!hasInitTree) {
			if (isMasterProc()) {
				cout << "Creating neighbour joining tree on corrected distances..." << endl;
				cout.flush();
				// only master does the neighbour joining tree
				BioNj bioNj_;
				bioNj_.create (distFileName_.c_str(), bionjFileName_.c_str());
			}
/*
#ifdef PARALLEL
			// wait until master finishes doing the neighbor joining tree
			MPI_Barrier(MPI_COMM_WORLD);
#endif // PARALLEL

			initialTree_.readFile (bionjFileName_.c_str());
			initialTree_.createUrTree ();
			bionjLogLi_ = initialTree_.doConOpt (0, topo_changed);
			if (isMasterProc())
				cout << "BIONJ tree log-likelihood = " << bionjLogLi_ << endl;*/
		}

		if (program_mode != PROG_ALL)
			Finalize(0);
		// do the NNI
		if (isMasterProc()) {
			if (maxNIter_ > 0)
				cout << "Doing Nearest Neighbour Interchange... ";
			else
				cout << "Reoptimizing branch lengths... ";
			cout.flush();
		}
#ifdef PARALLEL
		start_time = MPI_Wtime();
#else
		start_time = time(NULL);
#endif // PARALLEL
// DEBUG: uncomment the following when finished debugging

		if (maxNIter_ > 0)
			bionjLogLi_ = initialTree_.doConOpt (1, topo_changed);
		else
			bionjLogLi_ = initialTree_.doConOpt (0, topo_changed);

#ifdef PARALLEL
		MPI_Barrier(MPI_COMM_WORLD);
#endif

		if (isMasterProc()) {
#ifdef PARALLEL
			cout << MPI_Wtime() - start_time << " s" << endl;
#else
			cout << time(NULL) - start_time << " s" << endl;
#endif // PARALLEL

		}

		if (nni_test) 
			Finalize(0);
		
		bestTree = initialTree_;
		params.bestLogLi = bionjLogLi_;
		bestTree.createCluster (bestClusterArr_);

		
		if (params.build_consensus)
			for (int clusterNo_ = 0; clusterNo_ < bestClusterArr_.totalNCluster_; clusterNo_ ++)
				allCluArr_.addConTree(bestClusterArr_[clusterNo_]);

		time_t currentTime_;
		time(&currentTime_);

		//cout << mpi_myrank << ": we are here!" << endl;


		params.progTime = difftime (currentTime_, beginTime);

		if (isMasterProc()) {
			if (params.build_consensus) bestTree.writeNewickForm (treeLsFileName_.c_str(), 0,  params.progTime);

			std::cout << "We have constructed the initial tree !!!" << endl;
			std::cout.precision (10);
			std::cout << "The currently best log likelihood = " << params.bestLogLi  << endl;

			std::ofstream predictionFile_;
			predictionFile_.open (predictionFileName_.c_str());
			predictionFile_ << 1 << endl;
			predictionFile_.close ();
			bestTree.writeTop(topoFileName_.c_str(), 0);
		}
		//int isFinished_ = 0;
		if (isMasterProc())
			writePamCheckPoint(params, checkPointFileName_.c_str(), 0);

	} else {//if isContinuous == 0
		if (isMasterProc())
			std::cout <<"Loading the data from the last stopped point ...";
		if (params.build_consensus) 
			loadTreeLs (params, treeLsFileName_.c_str(), bestClusterArr_, allCluArr_, bestTree, nImpTree_, predictionFileName_.c_str(), stoppedProgTime_);
		else {
			loadTreeFile(params, topoFileName_.c_str(), bestClusterArr_, allCluArr_, bestTree, nImpTree_,  predictionFileName_.c_str(), stoppedProgTime_);
		}

		if (nImpTree_ >= START_PREDICTION /*&& params.stoppingRule != NO*/) {
			double tmpNPreIter_;
			prediction_.predict(predictionFileName_.c_str(), 0.95, tmpNPreIter_);
			nPredictedIter_ = static_cast<int> (tmpNPreIter_);
			if (params.stoppingRule != NO) {
				maxNIter_ = Utl::getMax (params.nIter, nPredictedIter_);
				if (params.stoppingRule == YES_MAX_ITER)
					maxNIter_ = Utl::getMin (params.max_nIter, maxNIter_);
			}
		}

		if (isMasterProc())
			std::cout << endl << "Continuing from the last stopped point ..." << endl;
	}

	double lastProgTime_ = 0.0;

	//***********************************************************************************************
	//  start IQPNNI
	//***********************************************************************************************


#ifdef PARALLEL

	double start_time = MPI_Wtime();

	/* following variables are for workers only, used to communicate with master */
	// tree_from_master is used for receiving data from master, non blocking communication
	UrTreePar tree_from_master(bestTree);
	// tree_to_master is used for sending data to master, non blocking communication
	UrTreePar tree_to_master;


	/* following array variables are for master only, used to communicate with workers */
	// tree array of master, use to receive data from all slaves
	UrTreeParArray tree_from_slaves(bestTree);
	if (isMasterProc())
		tree_from_slaves.receiveFromSlaves();
	UrTreeParArray tree_to_slaves(bestTree);


	int TAG_STOP_SIGNAL = 100;
	if (isSlaveProc())
		MPI_Irecv(&stop_signal, 1, MPI_INT, mpi_master_rank, TAG_STOP_SIGNAL, MPI_COMM_WORLD, &stop_request);

		
#endif // PARALLEL

	startOptimizationStep();
	while (params.cur_nIter < maxNIter_) {
		params.cur_nIter ++;

#ifdef PARALLEL
		if (isSlaveProc()) {
			if (stopSignalCome())
				break;
		}
#endif // PARALLEL

		UrTree curTree_;
		double curLogLi_;

#ifdef PARALLEL		
		// process only useful for master, it is the process rank which send tree to master
		int process = 0;
#endif

		if (
#ifdef PARALLEL 
			isSlaveProc() || mpi_size == 1
#else
			true
#endif // PARALLEL
		) {
			// if i'm a slave proc, or only 1 process spawn
			Vec<int> orderNdNoLs_;
			RanNum::createOrderNd (nSeq_, orderNdNoLs_);
			
			curTree_ = bestTree;
			curLogLi_ = escape (orderNdNoLs_, curTree_, bestTree, bestClusterArr_);
			if (curTree_.isRootedTree())
				cout << "curTree_ is rooted" << endl;
			
#ifdef PARALLEL
			// non blocking send the result tree to master

			if (stopSignalCome())
				break;
			if (mpi_size > 1) {
				tree_to_master.clone(curTree_);
				tree_to_master.sendToMaster();
				tree_from_master.receiveFromMaster();
				tree_from_master.waitReceive();
				if (stopSignalCome())
					break;
				
				tree_from_master.unpackData();
				if (tree_from_master.getLogLi() > bestTree.getLogLi()) {
					//cout << " updated!" << endl;
					bestTree.clone(tree_from_master);
					params.bestLogLi = tree_from_master.getLogLi();
					bestTree.createCluster (bestClusterArr_);

				} 
			}
#endif // PARALLEL

		} else {
			// master work: receive tree from all slaves
#ifdef PARALLEL
			
			process = tree_from_slaves.waitForSlaves();
			tree_from_slaves.getItem(process)->unpackData();
			curTree_.clone((UrTree*)tree_from_slaves.getItem(process));
			curLogLi_ = curTree_.getLogLi();

			/* now send the bestTree to worker */
			tree_to_slaves.getItem(process)->clone(bestTree);
			// non-blocking send the bestTree
			tree_to_slaves.getItem(process)->sendToProcess(process);
			/* non-blocking wait for worker */
			tree_from_slaves.getItem(process)->receiveFromProcess(process);
#endif

		}

		ClusterArr newClusterArr_;
		curTree_.createCluster(newClusterArr_);
		if (params.build_consensus)
			for (int count_ = 0; count_ < newClusterArr_.totalNCluster_; count_ ++)
				allCluArr_.addConTree(newClusterArr_[count_]);


		time_t currentTime_;
		time(&currentTime_);
		params.progTime = difftime (currentTime_, beginTime) + stoppedProgTime_;

		int nHour_ = static_cast<int> (params.progTime / 3600);
		int nMin_ = static_cast<int> ( (params.progTime - nHour_ * 3600) / 60);
		int nSec_ = static_cast<int> (params.progTime - nMin_ * 60 - nHour_ * 3600);

		if (isMasterProc() && params.build_consensus)
			curTree_.writeNewickForm (treeLsFileName_.c_str(), 1, params.progTime);



		int remNHour_ = 0;
		int remNMin_ = 0;
		int remNSec_ = 1;
		if (maxNIter_ > params.cur_nIter) {
			int remTime_ = static_cast<int> ( (params.progTime / params.cur_nIter) * (maxNIter_ - params.cur_nIter));
/*
#ifdef PARALLEL
			if (mpi_size > 1)
				remTime_ = static_cast<int>(remTime_ / (mpi_size - 1));
#endif
*/

			remNHour_ = static_cast<int> (remTime_ / 3600);
			remNMin_ = static_cast<int> ( (remTime_ - remNHour_ * 3600) / 60);
			remNSec_ = static_cast<int> (remTime_ - remNHour_ * 3600 - remNMin_ * 60);
		}

		bool write_pam_check = false;

		if (isMasterProc())
			if (params.progTime -  lastProgTime_ > 10.0 || curLogLi_ > params.bestLogLi || params.cur_nIter >= maxNIter_) {
				std::cout << params.cur_nIter << " Iterations / time elapsed = " << nHour_ <<"h:" << nMin_<<"m:" << nSec_<<"s";
				if (maxNIter_ > params.cur_nIter)
					std::cout << "  (will finish in " << remNHour_<<"h:" << remNMin_ <<"m:" << remNSec_ <<"s)" << endl;
				else
					cout << endl;
				lastProgTime_ = params.progTime;
				//int isFinished_ = 0;
				write_pam_check = true;
			}

		
		if (curLogLi_ > params.bestLogLi) {
			if (curTree_.cmpDis( bestClusterArr_) != 0) { // only if the topology is different!
				if (isMasterProc()) {
					// start the prediction
					ofstream predictionFile_;
					predictionFile_.open (predictionFileName_.c_str(), ios::app);
					predictionFile_ << params.cur_nIter << endl;
					predictionFile_.close ();

					nImpTree_ ++;
					if (nImpTree_ >= START_PREDICTION /*&& params.stoppingRule != NO*/) {
						double tmpNPreIter_;
						prediction_.predict(predictionFileName_.c_str(), 0.95, tmpNPreIter_);
						if (static_cast<int> (tmpNPreIter_) >
						        params.cur_nIter) {
							nPredictedIter_ = static_cast<int> (tmpNPreIter_);
							if (params.stoppingRule != NO) {
								maxNIter_ = Utl::getMax (params.nIter, nPredictedIter_);
								if (params.stoppingRule == YES_MAX_ITER)
									maxNIter_ = Utl::getMin (params.max_nIter, maxNIter_);
							}
						}  // end of if tmpNPreIter_ > iterNo
					}
					
					curTree_.writeTop(topoFileName_.c_str(), 0);

				} // if (isMasterProc())

				params.bestLogLi = curLogLi_;
				bestTree = curTree_;
				bestTree.createCluster (bestClusterArr_);

				if (isMasterProc()) {
					std::cout.precision (10);

					std::cout <<"GOOD NEWS: BETTER TREE FOUND: THE CURRENTLY BEST LOG LIKELIHOOD = " << params.bestLogLi << endl;
					if (nImpTree_ >= START_PREDICTION /*&& nPredictedIter_ < MAX_ITERATION*/) {
						double neededTime_ = ( params.progTime / params.cur_nIter) * (nPredictedIter_ - params.cur_nIter);
						int nNeededHour_ = static_cast<int> (neededTime_ / 3600);
						int nNeededMin_ = static_cast<int> ((neededTime_ - nNeededHour_ * 3600)/ 60);
						int nNeededSec_ = static_cast<int> (neededTime_ - nNeededMin_ * 60 - nNeededHour_ * 3600);
						std::cout <<"PREDICTION: " << (nPredictedIter_ - params.cur_nIter) << 
						" iterations more are needed (about " << nNeededHour_ <<"h:" << nNeededMin_ <<"m:"<<nNeededSec_<<"s)" <<endl;
					}
				}
			}
		}

		if (isMasterProc() && write_pam_check)
			writePamCheckPoint(params, checkPointFileName_.c_str(), 0);


		//    char answer_;
		if (params.cur_nIter >= maxNIter_) {
			if (isMasterProc() && nImpTree_ >= START_PREDICTION) {
				if (params.cur_nIter >= nPredictedIter_) 
					std::cout << endl << "WE HAVE FOUND THE BEST TREE WITH 95% CONFIDENCE" << endl;
				else
					cout << endl << separator_str << endl <<
					"WARNING: According to the stopping rule, at least " << (nPredictedIter_ - params.cur_nIter) << 
					" more iterations are" << endl << 
					"         suggested to ensure that with a 95% confidence the current search" << endl <<
					"         will not detect a better tree. If you want to specify the confidence" << endl << 
					"         level, you should switch on the stopping rule in the option menu." << endl << 
					separator_str << endl;
			}
			break;
		}

		/*
		if (params.cur_nIter >= params.nIter && nImpTree_ < START_PREDICTION) {
			if (isMasterProc())
				std::cout << "WE HAVE CREATED " << params.cur_nIter << " TREES" << endl;
			break;
		}
		*/
	} //end of while


#ifdef PARALLEL
	// now cancel all non blocking communication
	if (isMasterProc()) {
		tree_from_slaves.cancelCommunications();
		//master_bcast.cancelCommunications();
	} else {
		tree_from_master.cancelCommunications();
		//tree_to_master.cancelCommunications();
	}

	MPI_Request *request = new MPI_Request[mpi_size];
	int *stop = new int[mpi_size];

	// tell all slaves to stop
	if (isMasterProc()) {
		for (int cnt = 0; cnt < mpi_size; cnt++)
			if (cnt != mpi_master_rank) {
				stop[cnt] = 1;
				MPI_Isend(&stop[cnt], 1, MPI_INT, cnt, TAG_STOP_SIGNAL, MPI_COMM_WORLD,
				          &request[cnt]);
			}
	} else { // slave
		if (stop_request != MPI_REQUEST_NULL)
			MPI_Cancel(&stop_request);
	}

	if (isMasterProc()) {
		cout << "Time optimization step: " << MPI_Wtime() - start_time << endl;
	}
	
	
	if (isMasterProc()) {
		cout << "Wait for all slaves to stop... ";
		cout.flush();
		start_time = MPI_Wtime();
	}
	MPI_Barrier(MPI_COMM_WORLD);
	delete stop;
	delete request;
	if (isMasterProc())
		cout << MPI_Wtime() - start_time << " s" << endl;

	// broadcast the best tree to slaves
	UrTreeParArray master_bcast;
	if (isMasterProc()) {
		master_bcast.broadcastToSlaves(bestTree, mpi_master_rank);
	}
	else {
		// blocking-receive
		tree_from_master.receiveFromMaster(false);
		//tree_from_master.waitReceive();
		tree_from_master.unpackData();
		bestTree.clone(tree_from_master);
		bestTree.createCluster (bestClusterArr_);
	}

	MPI_Barrier(MPI_COMM_WORLD);


#endif // PARALLEL


	stopOptimizationStep();

	params.bestLogLi = tuneFineParameter (params.nIter, bestTree);
	params.nIter = params.cur_nIter;

	if (isMasterProc() && params.build_consensus) {
		std::cout <<"Constructing the majority rule consensus tree..." << endl;
		allCluArr_.createConTree(params.nIter, conTree);
	}



	time_t currentTime_;
	time(&currentTime_);
	params.progTime = difftime (currentTime_, beginTime) + stoppedProgTime_;
	return params.bestLogLi;
}
