/* $Id: stage3hrdef.h 224729 2021-11-30 14:44:45Z twu $ */
#ifndef STAGE3HRDEF_INCLUDED
#define STAGE3HRDEF_INCLUDED

#include "bool.h"
#include "types.h"
#include "genomicpos.h"
#include "method.h"
#include "chrnum.h"
#include "iit-read.h"		/* For Overlap_T */
#include "stage3hr.h"		/* For Hittype_T */
#include "list.h"
#include "substring.h"


/* Note: Substring_T has genomiclength, but not Stage3end_T */

/* TODO: Allow a Stage3end_T object to hold solutions for both
   sensedirs.  Then the pairing operations can select the sensedirs
   that are best */
#define T Stage3end_T
struct T {
  Hittype_T hittype;
  Method_T method;
  int level;

  int distant_splice_i;		/* junction (0-based, on junctions_1toN) having a distant splice */
  Substring_T substring_donor;	/* donor substring adjacent to the distant splice junction */
  Substring_T substring_acceptor; /* acceptor substring adjacent to the distant splice junction */

  int querylength;		/* Needed for overlap and pairlength calculations */
  int querylength_adj;		/* Adjusted for insertions */

  /* For transcriptome alignment */
  List_T transcripts_consistent;
  List_T transcripts_inconsistent;

  List_T substrings_1toN;	/* query position 1 to N */
  List_T substrings_Nto1;	/* query position N to 1.  Keeps only pointers to the substrings. */

  List_T junctions_1toN;
  List_T junctions_Nto1;

  int querystart_chrbound;	/* Extending to full querylength (limited by chrbound) */
  int queryend_chrbound;

  /* Used by Stage3end_optimal_score for comparing terminals and non-terminals */
  int querystart_trimmed;	/* Extending to the trimmed alignment */
  int queryend_trimmed;

  /* if querystart_trimmed_splicep or queryend_trimmed_splicep is true, then trim is of type "unknown amb" */
  /* if querystart_trimmed_splicep or queryend_trimmed_splicep is false, then trim is of type "unknown" */
  bool querystart_trimmed_splicep;
  bool queryend_trimmed_splicep;

  Univcoord_T genomicstart;	/* Extending to full querylength (not limited by chrbound) */
  Univcoord_T genomicend;

  /* Corresponds to querystart_chrbound and queryend_chrbound */
  Univcoord_T low_chrbound; /* Extending to full querylength (limited by chrbound) */
  Univcoord_T high_chrbound;

  /* Corresponds to querystart_trimmed and queryend_trimmed */
  Univcoord_T low_trimmed;	/* Extending to the trimmed alignment (including limit by chrbound) */
  Univcoord_T high_trimmed;

  Chrpos_T genomiclength;
  Chrpos_T guided_insertlength; /* Used only by Stage3end_eval_and_sort_guided */

  bool shortdistancep;
  Chrnum_T chrnum; /* Needed for printing paired-end results.  A
		      chrnum of 0 indicates a translocation (two
		      different chromosomes), or an alignment that
		      needs to be printed as a translocation
		      (samechr_splice unless --merge_samechr is
		      selected).  Used for printing. */

  Chrnum_T effective_chrnum;	/* For determining concordance */
  Chrnum_T other_chrnum;	/* 0 for non-translocations, and other chrnum besides effective_chrnum for translocations */
  Univcoord_T chroffset;
  Univcoord_T chrhigh;
  Chrpos_T chrlength;

  bool plusp;
  int genestrand;

  /* For spliced alignments */
  int sensedir;			/* a private value */
  int sensedir_for_concordance;
  /* Possibilities:
     not spliced: sensedir_for_concordance is NULL.  sensedir_private is NULL.
     regular or transloc splice with certain sense: sensedir_for_concordance is {FORWARD,ANTI}.  sensedir(private) is the same {FORWARD,ANTI}.
     distant splice with uncertain sense: sensedir_for_concordance is NULL.  sensedir(private) is {FORWARD,ANTI}.
  */

  int nsplices;
  double splice_score;		/* Used by various SPLICE types */
  int nindels;			/* for indels */

  int nmismatches_bothdiff;
  int nmismatches_refdiff;	/* Set only for display */
  int nsegments;

  int refalt_nmatches_to_trims;
  int ref_nmatches_to_trims;
  int refalt_nmatches_plus_spliced_trims;  /* Includes alts and ambiguous parts after good splice ends */
  int ref_nmatches_plus_spliced_trims;

#if 0
  /* Is there any role for any ref_score_*? */
  int ref_score_overall;  /* Over entire query, so trimming raises the score */
#endif

  /* Use refalt_score_overall for filtering, because a mismatch in the masked region does not change refalt_score */
  /* Also use refalt_score_overall for ranking, so only mismatches in the unmasked regions count */
  int refalt_score_overall;  /* Over entire query, so trimming raises the score */
  int refalt_score_within_trims; /* From one trim to the other.  Independent of trimming, so use for ladder score */

  float mapq_loglik;
  int mapq_score;
  int absmq_score;		/* Absolute MAPQ, for XQ and X2 flags */


  /* score = querylength - nmatches - penalties */
  /* score_posttrim = querylength - nmatches_posttrim - penalties */
  /* score (best case) <= score_posttrim (worst case) */
  /* In computing concordance, can rank by score to get ambiguous
     alignments, but keep going until we reach score_posttrim to give
     complete alignments a chance */

  /* query: ------ACGTACGaACGa------ (length 24, spliced on left) */
  /*              ||||||| |||:       */
  /* ref:   ----agACGTACGTACgt------ */
  /* mask:  nnnnnnACGTACGTACnnnnnnnn */

  /*     refalt_score_overall: -13: -6 for left trim, -6 for right trim, -1 for mismatch (remove)
     (*) ref_score_overall: -14 = -6 for left trim, -6 for right trim, -2 for mismatches (keep)


     (*) refalt_score_within_trims: -1 for mismatches (keep)
         ref_score_within_trims: -2 for mismatches (remove)

         refalt_score_allowing_spliced_trims: -7 = -6 for right trim, -1 for mismatches (remove)
         ref_score_allowing_spliced_trims: -8 = -6 for right trim, -2 for mismatches (remove)
     (*) refalt_nmatches_plus_spliced_trims: 17 = 6 for left trim + 12 in alignment - 1 mismatch
     (*) ref_nmatches_plus_spliced_trims: 16 = 6 for left trim + 12 in alignment - 2 mismatches

     (*) refalt_nmatches_to_trims: 11 = 12 in alignment - 1 mismatch => (keep)
     (*) ref_nmatches_to_trims: 10 = 12 in alignment - 2 mismatches (keep)

         Use scores to monitor progress.  Use nmatches to compare hits.
	 Order nmatches to favor spliced trims, and then shorter trims
	 Use nmatches_to_trims for optimal_score_final within loci
  */

  /* Current */
  /* found_score: score_overall */
  /* found_score_within_trims: score_within_trims */
  /* Stage3end_output_cmp: nmatches_plus_spliced_trims */
  /* Stage3pair_output_cmp: nmatches_plus_spliced_trims, score_within_trims */
  /* hit_sort_cmp: score_within_trims, nmatches_plus_spliced_trims */
  /* hit_equiv_cmp: score_within_trims, nmatches_plus_spliced_trims */
  /* Stage3end_hit_goodness_cmp: nmatches_plus_spliced_trims */
  /* hitpair_sort_cmp: score_within_trims, nmatches_plus_spliced_trims */
  /* hitpair_equiv_cmp: nmatches_plus_spliced_trims */
  /* hitpair_goodness_cmp: nmatches_plus_spliced_trims */
  /* Stage3end_optimal_score_final: nmatches_to_trims */

  /* Desired */
  /* found_score: ref_score_overall */
  /* found_score_within_trims: refalt_score_within_trims */

  /* Stage3end_output_cmp: refalt_nmatches_plus_spliced_trims, ref_nmismatches_plus_spliced_trims, refalt_score_within_trims */
  /* Stage3pair_output_cmp: refalt_nmatches_plus_spliced_trims, ref_nmismatches_plus_spliced_trims, refalt_score_within_trims */
  /* hit_sort_cmp: refalt_score_within_trims, ref_score_within_trims, refalt_nmatches_plus_spliced_trims */
  /* hit_equiv_cmp: refalt_score_within_trims, ref_score_within_trims, refalt_nmatches_plus_spliced_trims */
  /* Stage3end_hit_goodness_cmp: refalt_nmatches_plus_spliced_trims */
  /* hitpair_sort_cmp: refalt_score_within_trims, ref_score_within_trims, refalt_nmatches_plus_spliced_trims */
  /* hitpair_equiv_cmp: nmatches_plus_spliced_trims */
  /* hitpair_goodness_cmp: nmatches_plus_spliced_trims */
  /* Stage3end_optimal_score_final: refalt_nmatches_plus_spliced_trims */
  /* Stage3pair_optimal_score_final: refalt_nmatches_plus_spliced_trims */


  int score_eventrim;		/* Temporary storage used by Stage3end_optimal_score */

  bool paired_usedp;

  int query_splicepos;		/* For splices.  Relative to querystart, so different from circularpos */

  int circularalias;			/* -1 if all below chrlength, 0 if straddles or NA (e.g., transloc), and +1 if above */
                                /* -2 if extends below beginning of circular chromosome, +2 if extends beyond end of second copy */
  int circularpos;		/* if circularalias == 0, then amount of queryseq below chrlength.  Defined relative to low */

  bool altlocp;
};


struct Stage3pair_T {
  int genestrand;
  int sensedir;

  T hit5;			/* Always a copy from the original */
  T hit3;			/* Always a copy from the original */

  Univcoord_T low_chrbound;
  Univcoord_T high_chrbound;
  Chrpos_T insertlength;
  int pair_relationship;
  int insertlength_expected_sign;	/* 1 if in (expected_pairlength_low, expected_pairlength_high),
					   0 if in (expected_pairlength_low, expected_pairlength_very_high), and
					   -1 if < expected_pairlength_low or > expected_pairlength_very_high */

  Chrpos_T outerlength;

  float mapq_loglik;
  int mapq_score;
  int absmq_score;

  /* Add values from hit5 and hit3: */
  /* int refalt_nmatches_to_trims; */
  /* int ref_nmatches_to_trims; */
  /* int ref_score_overall; */
  /* int refalt_score_within_trims; */
  /* int refalt_nmatches_plus_spliced_trims; */
  /* int ref_nmatches_plus_spliced_trims; */

  int nmismatches;		/* querylength - sum of nmatches */
  int score_eventrim;		/* for storage */

  /* Overlap_T gene_overlap; */
  long int tally;

#ifdef USE_ABSDIFFLENGTH
  Chrpos_T absdifflength;
#endif
#ifdef USE_BINGO
  bool absdifflength_bingo_p;
#endif
  int dir;			/* -1, 0, or +1 */
  bool sense_consistent_p;
  bool concordant_transcripts_p;

  int nsplices;

  bool circularp;		/* If either hit5 or hit3 are circular */
  int alts_status_inside;
};

#undef T
#endif

