static char rcsid[] = "$Id: terminal.c 223349 2020-10-28 02:49:25Z twu $";
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef HAVE_MEMCPY
#define memcpy(d,s,n) bcopy((s),(d),(n))
#endif
#ifndef HAVE_MEMMOVE
#define memmove(d,s,n) bcopy((s),(d),(n))
#endif

#include "terminal.h"

#include <stdio.h>
#include <stdlib.h>
#include "mem.h"
#include "assert.h"
#include "types.h"

#include "genomicpos.h"
#include "substring.h"

#include "genome128_hr.h"
#include "extension-search.h"	/* For handling Elt_T objects */


#define MAXTERMINALS 1000

static Univ_IIT_T chromosome_iit;
static Univcoord_T genomelength;
static int circular_typeint;

static Genome_T genomebits;
static Genome_T genomebits_alt;

static bool splicingp;

static int index1part = 15;
static int index1interval = 3;

static int subopt_levels;


#ifdef DEBUG
#define debug(x) x
#else
#define debug(x)
#endif

#ifdef DEBUG4E
#define debug4e(x) x
#else
#define debug4e(x)
#endif


/* Modified from find_spliceends_rna */
static void
find_terminals (List_T *sense_terminals, List_T *antisense_terminals, List_T elt_set,
#ifdef DEBUG4E
		char *queryptr,
#endif
		int querylength, int query_lastpos, int *mismatch_positions_alloc,
		Compress_T query_compress, Listpool_T listpool,
		bool plusp, int genestrand) {
#ifdef DEBUG4E
  char *gbuffer;
#endif

  Elt_T elt;
  List_T p;
  int k;

  Substring_T substring;
  Univcoord_T segment_univdiagonal, segment_left;
  int first_querypos, last_querypos, querystart, queryend, pos5_trimmed, pos3_trimmed, pos5, pos3;
  int nmismatches_ignore;

  /* int nmismatches_left, nmismatches_right; */
  int *mismatch_positions;
  int *positions_alloc;

  Chrnum_T chrnum;
  Univcoord_T chroffset, chrhigh;
  Chrpos_T chrlength;


#ifdef HAVE_ALLOCA
  if (querylength <= MAX_STACK_READLENGTH) {
    mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
    positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
  } else {
    mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
    positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
  }
#else
  mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
  positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
#endif

  debug4e(printf("Entering find_spliceends_rna (plusp %d) with %d elts\n",plusp,List_length(elt_set)));

  for (p = elt_set; p != NULL; p = List_next(p)) {
    elt = (Elt_T) List_head(p);
#if 0
    /* Not sure if this is still necessary */
    if ((first_querypos = segment->querystart - (index1interval - 1)) < 0) {
      first_querypos = 0;
    }
    if ((last_querypos = segment->queryend + (index1interval - 1)) > querylength) {
      last_querypos = querylength;
    }
#else
    first_querypos = elt->qstart;
    last_querypos = elt->qend;
#endif

    if (last_querypos < query_lastpos /*&& (first_querypos < index1part || segment->spliceable_low_p == true)*/) {
      /* Find splices on genomic right */
      for (k = 0; k < elt->n_all_univdiagonals; k++) {
	segment_univdiagonal = elt->all_univdiagonals[k];
	segment_left = segment_univdiagonal - (Univcoord_T) querylength;
	pos5 = (segment_univdiagonal >= (Univcoord_T) querylength) ? 0 : (int) -segment_left;
	pos3 = (segment_univdiagonal <= genomelength) ? querylength : (int) (genomelength - segment_left);

	/* Could use Univ_IIT_update_chrnum here */
	chrnum = Univ_IIT_get_chrnum(&chroffset,&chrhigh,&chrlength,chromosome_iit,
				     /*low*/segment_left+pos5,/*high*/segment_left+pos3,
				     circular_typeint);

	debug4e(printf("find_terminals: Checking mismatches at univdiagonal %llu (querypos %d..%d), plusp %d\n",
		       (unsigned long long) segment_univdiagonal,first_querypos,last_querypos,plusp));
	debug4e(
		gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
		Genome_fill_buffer_blocks(/*left*/segment_univdiagonal - querylength,querylength,gbuffer);
		printf("genome 0..: %s\n",gbuffer);
		printf("query  0..: %s\n",queryptr);
		);

	/* TODO: Consider trimming separately for SENSE_FORWARD and SENSE_ANTI */
	pos5_trimmed = Substring_trim_qstart_nosplice(&nmismatches_ignore,mismatch_positions_alloc,
						      query_compress,segment_left,chroffset,
						      pos5,pos3,querylength,plusp,genestrand);
	pos3_trimmed = Substring_trim_qend_nosplice(&nmismatches_ignore,mismatch_positions_alloc,
						    query_compress,segment_left,chrhigh,
						    pos5,pos3,querylength,plusp,genestrand);
	if (plusp == true) {
	  querystart = pos5_trimmed;
	  queryend = pos3_trimmed;
	} else {
	  querystart = querylength - pos3_trimmed;
	  queryend = querylength - pos5_trimmed;
	}

	if ((substring = Substring_new(/*nmismatches*/-1,/*ref_nmismatches*/-1,segment_left,
				       querystart,queryend,querylength,
				       plusp,genestrand,query_compress,chrnum,chroffset,chrhigh,chrlength,
				       /*splice5p*/false,/*splicetype5*/NO_SPLICE,/*ambig_prob_5*/0.0,
				       /*splice3p*/false,/*splicetype3*/NO_SPLICE,/*ambig_prob_3*/0.0,
				       /*orig_sensedir*/SENSE_FORWARD)) != NULL) {
	  debug4e(printf("=> %s terminal: (%d mismatches) %d..%d\n",
			 plusp == true ? "plus" : "minus",Substring_nmismatches_bothdiff(substring),
			 Substring_querystart(substring),Substring_queryend(substring)));
	  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
	  *sense_terminals = Listpool_push(*sense_terminals,listpool,(void *) substring);
	}

	if (splicingp == true) {
	  if ((substring = Substring_new(/*nmismatches*/-1,/*ref_nmismatches*/-1,segment_left,
					 querystart,queryend,querylength,
					 plusp,genestrand,query_compress,chrnum,chroffset,chrhigh,chrlength,
					 /*splice5p*/false,/*splicetype5*/NO_SPLICE,/*ambig_prob_5*/0.0,
					 /*splice3p*/false,/*splicetype3*/NO_SPLICE,/*ambig_prob_3*/0.0,
					 /*orig_sensedir*/SENSE_ANTI)) != NULL) {
	    debug4e(printf("=> %s terminal: (%d mismatches) %d..%d\n",
			   plusp == true ? "plus" : "minus",Substring_nmismatches_bothdiff(substring),
			   Substring_querystart(substring),Substring_queryend(substring)));
	    debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
	    *antisense_terminals = Listpool_push(*antisense_terminals,listpool,(void *) substring);
	  }
	}

	debug4e(FREE(gbuffer));
      }
    }

    if (first_querypos > index1part /*&& (last_querypos > query_lastpos || segment->spliceable_high_p == true)*/) {
      /* Find splices on genomic left */
      for (k = 0; k < elt->n_all_univdiagonals; k++) {
	segment_univdiagonal = elt->all_univdiagonals[k];
	segment_left = segment_univdiagonal - (Univcoord_T) querylength;
	pos5 = (segment_univdiagonal >= (Univcoord_T) querylength) ? 0 : (int) -segment_left;
	pos3 = (segment_univdiagonal <= genomelength) ? querylength : (int) (genomelength - segment_left);

	/* Could use Univ_IIT_update_chrnum here */
	chrnum = Univ_IIT_get_chrnum(&chroffset,&chrhigh,&chrlength,chromosome_iit,
				     /*low*/segment_left+pos5,/*high*/segment_left+pos3,
				     circular_typeint);

	debug4e(printf("find_terminals: Checking mismatches at univdiagonal %llu (querypos %d..%d), plusp %d\n",
		       (unsigned long long) segment_univdiagonal,first_querypos,last_querypos,plusp));
	debug4e(
		gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
		Genome_fill_buffer_blocks(/*left*/segment_univdiagonal - querylength,querylength,gbuffer);
		printf("genome 0..: %s\n",gbuffer);
		printf("query  0..: %s\n",queryptr);
		);

	/* TODO: Consider trimming separately for SENSE_FORWARD and SENSE_ANTI */
	pos5_trimmed = Substring_trim_qstart_nosplice(&nmismatches_ignore,mismatch_positions_alloc,
						      query_compress,segment_left,chroffset,
						      pos5,pos3,querylength,plusp,genestrand);
	pos3_trimmed = Substring_trim_qend_nosplice(&nmismatches_ignore,mismatch_positions_alloc,
						    query_compress,segment_left,chrhigh,
						    pos5,pos3,querylength,plusp,genestrand);
	if (plusp == true) {
	  querystart = pos5_trimmed;
	  queryend = pos3_trimmed;
	} else {
	  querystart = querylength - pos3_trimmed;
	  queryend = querylength - pos5_trimmed;
	}

	if ((substring = Substring_new(/*nmismatches*/-1,/*ref_nmismatches*/-1,segment_left,
				       querystart,queryend,querylength,
				       plusp,genestrand,query_compress,chrnum,chroffset,chrhigh,chrlength,
				       /*splice5p*/false,/*splicetype5*/NO_SPLICE,/*ambig_prob_5*/0.0,
				       /*splice3p*/false,/*splicetype3*/NO_SPLICE,/*ambig_prob_3*/0.0,
				       /*orig_sensedir*/SENSE_FORWARD)) != NULL) {
	  debug4e(printf("=> %s terminal: (%d mismatches) %d..%d\n",
			 plusp == true ? "plus" : "minus",Substring_nmismatches_bothdiff(substring),
			 Substring_querystart(substring),Substring_queryend(substring)));
	  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
	  *sense_terminals = Listpool_push(*sense_terminals,listpool,(void *) substring);
	}

	if (splicingp == true) {
	  if ((substring = Substring_new(/*nmismatches*/-1,/*ref_nmismatches*/-1,segment_left,
					 querystart,queryend,querylength,
					 plusp,genestrand,query_compress,chrnum,chroffset,chrhigh,chrlength,
					 /*splice5p*/false,/*splicetype5*/NO_SPLICE,/*ambig_prob_5*/0.0,
					 /*splice3p*/false,/*splicetype3*/NO_SPLICE,/*ambig_prob_3*/0.0,
					 /*orig_sensedir*/SENSE_ANTI)) != NULL) {
	    debug4e(printf("=> %s terminal: (%d mismatches) %d..%d\n",
			   plusp == true ? "plus" : "minus",Substring_nmismatches_bothdiff(substring),
			   Substring_querystart(substring),Substring_queryend(substring)));
	    debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
	    *antisense_terminals = Listpool_push(*antisense_terminals,listpool,(void *) substring);
	  }
	}

	debug4e(FREE(gbuffer));
      }
    }
  }


#ifdef HAVE_ALLOCA
  if (querylength <= MAX_STACK_READLENGTH) {
    FREEA(mismatch_positions);
    FREEA(positions_alloc);
  } else {
    FREE(mismatch_positions);
    FREE(positions_alloc);
  }
#else
  FREE(mismatch_positions);
  FREE(positions_alloc);
#endif

  return;
}


/* done_level should probably be renamed final_level.  opt_level
   should probably be renamed found_level or opt_level. */
void
Terminal_solve_plus (int *found_score_overall, int *found_score_within_trims,
		     List_T *sense_hits_plus, List_T *antisense_hits_plus,
		     List_T queryfwd_plus_set, List_T queryrev_plus_set,

		     int *mismatch_positions_alloc,
		     Compress_T query_compress_fwd, int querylength, 
		     int genestrand, Listpool_T listpool,
		     Hitlistpool_T hitlistpool, int level) {
  List_T sense_terminals_plus = NULL, antisense_terminals_plus = NULL, p;
  Stage3end_T hit;
  int nterminals;
  int query_lastpos = querylength - index1part;
  Substring_T substring;
#ifdef DEBUG13
  int missing_hit, missing_gmap;
#endif


  /* 9 (Term).  Find terminals */
  debug(printf("*** Stage 9 (Term).  Terminal_solve, allowing %d mismatches ***\n",max_terminal_mismatches));
  *sense_hits_plus = *antisense_hits_plus = (List_T) NULL;

  debug(printf("Starting find_terminals (plus)\n"));
  find_terminals(&sense_terminals_plus,&antisense_terminals_plus,queryfwd_plus_set,
#ifdef DEBUG4E		 
		 /*queryptr*/queryuc_ptr,
#endif
		 querylength,query_lastpos,mismatch_positions_alloc,
		 /*query_compress*/query_compress_fwd,
		 listpool,/*plusp*/true,genestrand);

  find_terminals(&sense_terminals_plus,&antisense_terminals_plus,queryrev_plus_set,
#ifdef DEBUG4E
		 /*queryptr*/queryuc_ptr,
#endif
		 querylength,query_lastpos,mismatch_positions_alloc,
		 /*query_compress*/query_compress_fwd,
		 listpool,/*plusp*/true,genestrand);
  debug(printf("Finished find_terminals (plus)\n"));

  debug4e(printf("Terminals: sense plus %d, antisense plus %d\n",
		 List_length(*sense_hits_plus),List_length(*antisense_hits_plus)));

#if 0
  opt_level = ((*found_score_within_trims) < opt_level) ? (*found_score_within_trims) : opt_level;
#endif
  /* done_level = (*found_score) + subopt_levels; */

#if 0
  debug4e(printf("Sorting terminals\n"));
  /* TODO: Prioritize inner terminals */
  terminals_left = Substring_sort_nmatches(terminals_left);
  terminals_right = Substring_sort_nmatches(terminals_right);
#endif

  debug(printf("*** Stage 9 (Term)\n"));

  nterminals = 0;
  for (p = sense_terminals_plus; p != NULL && nterminals < MAXTERMINALS; p = List_next(p)) {
    substring = (Substring_T) p->first;
    if ((hit = Stage3end_new_terminal(&(*found_score_overall),&(*found_score_within_trims),
				      substring,querylength,/*gplusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
				      listpool,/*method*/TERMINAL,level)) != NULL) {
      *sense_hits_plus = Hitlist_push(*sense_hits_plus,hitlistpool,(void *) hit);
      nterminals++;
    }
  }
  *sense_hits_plus = Stage3end_remove_overlaps(*sense_hits_plus,hitlistpool,querylength,/*finalp*/false);

  if (splicingp == true) {
    nterminals = 0;
    for (p = antisense_terminals_plus; p != NULL && nterminals < MAXTERMINALS; p = List_next(p)) {
      substring = (Substring_T) p->first;
      if ((hit = Stage3end_new_terminal(&(*found_score_overall),&(*found_score_within_trims),
					substring,querylength,/*gplusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
					listpool,/*method*/TERMINAL,level)) != NULL) {
	*antisense_hits_plus = Hitlist_push(*antisense_hits_plus,hitlistpool,(void *) hit);
	nterminals++;
      }
    }
    *antisense_hits_plus = Stage3end_remove_overlaps(*antisense_hits_plus,hitlistpool,querylength,/*finalp*/false);
  }

  /* Excess distant splicing should be freed already in find_splicepairs_rna */

#if 0
  /* Do not filter terminals by optimal score, since criterion is concordance, not length */
  debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(terminals)));
  terminals = Stage3end_optimal_score(terminals,query_compress_fwd,query_compress_rev,querylength,
				      /*keep_gmap_p*/true,/*finalp*/false);
  debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(terminals)));
#endif
  
#if 0
  if (terminals) {
#if 0
    opt_level = ((*found_score_within_trims) < opt_level) ? (*found_score) : opt_level;
    if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
      done_level = user_maxlevel;
    }
#endif
    /* done_level = (*found_score) + subopt_levels; */
    debug(printf("9 (Term)> found_score = %d, opt_level %d, done_level %d\n",
		 *found_score_within_trims,opt_level,done_level));
  }
#endif

  Substring_list_gc(&sense_terminals_plus);
  Substring_list_gc(&antisense_terminals_plus);

  return;
}


void
Terminal_solve_minus (int *found_score_overall, int *found_score_within_trims,
		      List_T *sense_hits_minus, List_T *antisense_hits_minus,
		      List_T queryfwd_minus_set, List_T queryrev_minus_set,

		      int *mismatch_positions_alloc,
		      Compress_T query_compress_rev, int querylength,
		      int genestrand, Listpool_T listpool,
		      Hitlistpool_T hitlistpool, int level) {
  List_T sense_terminals_minus = NULL, antisense_terminals_minus = NULL, p;
  Stage3end_T hit;
  int query_lastpos = querylength - index1part;
  int nterminals;
  Substring_T substring;
#ifdef DEBUG13
  int missing_hit, missing_gmap;
#endif


  /* 9 (Term).  Find terminals */
  debug(printf("*** Stage 9 (Term).  Terminal_solve, allowing %d mismatches ***\n",max_terminal_mismatches));
  *sense_hits_minus = *antisense_hits_minus = (List_T) NULL;

  debug(printf("Starting find_terminals (minus)\n"));
  find_terminals(&sense_terminals_minus,&antisense_terminals_minus,queryfwd_minus_set,
#ifdef DEBUG4E
		 /*queryptr*/queryrc,
#endif
		 querylength,query_lastpos,mismatch_positions_alloc,
		 /*query_compress*/query_compress_rev,
		 listpool,/*plusp*/false,genestrand);

  find_terminals(&sense_terminals_minus,&antisense_terminals_minus,queryrev_minus_set,
#ifdef DEBUG4E
		 /*queryptr*/queryrc,
#endif
		 querylength,query_lastpos,mismatch_positions_alloc,
		 /*query_compress*/query_compress_rev,
		 listpool,/*plusp*/false,genestrand);
  debug(printf("Finished find_terminals (minus)\n"));

  debug4e(printf("Terminals: sense minus %d and antisense minus %d\n",
		 List_length(*sense_hits_minus),List_length(*antisense_hits_minus)));

#if 0
  opt_level = ((*found_score_within_trims) < opt_level) ? (*found_score_within_trims) : opt_level;
#endif
  /* done_level = (*found_score_within_trims) + subopt_levels; */

#if 0
  debug4e(printf("Sorting terminals\n"));
  /* TODO: Prioritize inner terminals */
  terminals_left = Substring_sort_nmatches(terminals_left);
  terminals_right = Substring_sort_nmatches(terminals_right);
#endif

  debug(printf("*** Stage 9 (Term)\n"));

  nterminals = 0;
  for (p = sense_terminals_minus; p != NULL && nterminals < MAXTERMINALS; p = List_next(p)) {
    substring = (Substring_T) p->first;
    if ((hit = Stage3end_new_terminal(&(*found_score_overall),&(*found_score_within_trims),
				      substring,querylength,/*gplusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
				      listpool,/*method*/TERMINAL,level)) != NULL) {
      *sense_hits_minus = Hitlist_push(*sense_hits_minus,hitlistpool,(void *) hit);
      nterminals++;
    }
  }
  *sense_hits_minus = Stage3end_remove_overlaps(*sense_hits_minus,hitlistpool,querylength,/*finalp*/false);

  if (splicingp == true) {
    nterminals = 0;
    for (p = antisense_terminals_minus; p != NULL && nterminals < MAXTERMINALS; p = List_next(p)) {
      substring = (Substring_T) p->first;
      if ((hit = Stage3end_new_terminal(&(*found_score_overall),&(*found_score_within_trims),
					substring,querylength,/*gplusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
					listpool,/*method*/TERMINAL,level)) != NULL) {
	*antisense_hits_minus = Hitlist_push(*antisense_hits_minus,hitlistpool,(void *) hit);
	nterminals++;
      }
    }
    *antisense_hits_minus = Stage3end_remove_overlaps(*antisense_hits_minus,hitlistpool,querylength,/*finalp*/false);
  }

  /* Excess distant splicing should be freed already in find_splicepairs_rna */

#if 0
  /* Do not filter terminals by optimal score, since criterion is concordance, not length */
  debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(terminals)));
  terminals = Stage3end_optimal_score(terminals,hitlistpool,querylength,/*finalp*/false);
  debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(terminals)));
#endif
  
#if 0
  if (terminals) {
#if 0
    opt_level = ((*found_score_within_trims) < opt_level) ? (*found_score_within_trims) : opt_level;
    if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
      done_level = user_maxlevel;
    }
#endif
    /* done_level = (*found_score_within_trims) + subopt_levels; */
    debug(printf("9 (Term)> found_score = %d, opt_level %d, done_level %d\n",
		 *found_score_within_trims,opt_level,done_level));
  }
#endif

  Substring_list_gc(&sense_terminals_minus);
  Substring_list_gc(&antisense_terminals_minus);

  return;
}


void
Terminal_setup (Univ_IIT_T chromosome_iit_in, Univcoord_T genomelength_in, int circular_typeint_in,
		Genome_T genomebits_in, Genome_T genomebits_alt_in,
		bool splicingp_in, int index1part_in, int index1interval_in,
		int subopt_levels_in) {

  chromosome_iit = chromosome_iit_in;
  genomelength = genomelength_in;
  circular_typeint = circular_typeint_in;

  genomebits = genomebits_in;
  genomebits_alt = genomebits_alt_in;

  splicingp = splicingp_in;

  index1part = index1part_in;
  index1interval = index1interval_in;

  subopt_levels = subopt_levels_in;

  return;
}


