#!/usr/bin/perl

#use warnings;
use strict;

use Bio::Seq;
use Bio::SeqIO;

### Variables ###
#unless (-e "Results") { system "mkdir Results"; }
my $run = $ARGV[0];  
my $blast_eval1 = $ARGV[1];
my $blast_eval2 = $ARGV[2];
my $neighbour_step = $ARGV[3]; ##Syntenoblast: number of steps out looking at neighbours for synteny.
my $synt_step = $ARGV[4]; ##Syntenoblast: number of rounds of syntenoblast
my $blastall= $ARGV[5];

#print "$run $blast_eval1 $blast_eval2 $neighbour_step $synt_step $blastall\n";

my $store = $run."/bac_stored_".$ARGV[0];

### direct errors in here ### 
open ERROR, '>'.$run."/error_step1.txt" or die $!;
STDERR->fdopen( \*ERROR, 'w' ) or die $!;

my (%spec_name, %spec_loc);
my $SN_file = "<".$store."/spec_name.tab"; my $SL_file = "<".$store."/spec_loc.tab";
open IN, $SN_file; my @sn = <IN>; close IN;
foreach my $line (@sn) {
    my @split = split (/\t/, $line);
    chomp $split[0]; chomp $split[1];
    $spec_name{$split[0]} = $split[1];
}
open IN, $SL_file; my @sl = <IN>; close IN;
foreach my $line (@sl) {
    my @split = split (/\t/, $line);
    chomp $split[0]; chomp $split[1]; chomp $split[2];
    $spec_loc{$split[0]}{$split[1]} = $split[2];
}
    
my $Blast_Out = $run."/Blast_Out";
unless (-e $Blast_Out) {
    system "mkdir $Blast_Out";
}
my $key; my $count_spec=0;
foreach my $specnum (keys %spec_name) {
    $count_spec++;
}

my $blast_on = 1;

## Hash declarations ##
my (%AA_seq_all, %NT_seq_all, %OGS_no_all, %Overlap_all, %Olap_all, %adj_l_all, %adj_r_all, %adj_l_nov_all, %adj_r_nov_all, %gene_num_all, %gene_start_all, %gene_stop_all, %chr_all, %species_all, %OGS_spec_id_all, %IG_all, %OGS_nt_seq_all, %OGS_seqall_all, %descr_all, %gb_acc_all, %chromlist, %gene_tag_all, %ps_all, %ps_start_all, %ps_stop_all, %all_feat_all, %all_feat_start_all, %all_feat_stop_all, %strand_all, %strand_all_af);

my $num=0;

foreach my $specnum (keys %spec_name) { 
  #  print "$specnum $spec_name{$specnum}\n";
    ################## 1. Parse Genbank Files ###########################################################

    my $tag = $spec_name{$specnum};
    my $count=0; my $count_ps=0; my $count_chr=0; my $count_feat=0;
    my (%fasta, %pseudo, %hash, %gene_start, %gene_stop, %gene_num, %gene_locus, %gene_name, %species, %aa, %gene_tag, %chr, %strand, %descr, %gb_acc, %ps, %ps_start, %ps_stop, %all_feat, %all_feat_start, %all_feat_stop, %strand_af);
    foreach my $chrnum (keys %{$spec_loc{$spec_name{$specnum}}}) {
	$count_chr++;	
	my $file = $spec_loc{$spec_name{$specnum}}{$chrnum};
	open IN, $file; my @in = <IN>; close IN;
	my $chromtag; my $descrip; my $locus;
	foreach my $line (@in) { ##getting accession no, description, and chromosome
	    if ($line =~ /^ACCESSION/) {
		my @split = split (/\s/, $line);
		$locus = $split[3];
	    }
	    if ($line =~ /^DEFINITION/) {
		my @split = split (/DEFINITION  /, $line);
		$descrip = $split[1];

	    }
	    if ($line =~ /^VERSION/) {
		my @split = split (/\s/, $line);
		$chromtag = $split[5];
		last;
	    }
	}
#	print "file:$file locus:$locus description:$descrip chromtag: $chromtag!\n";

	my $seqio_object = Bio::SeqIO->new(-file => "<$file", '-format' => "GenBank");

	my $last; my %ps_count; my %all_feat_count;
	while (my $seq_object = $seqio_object->next_seq) { # run over all contigs
	    $fasta{$chromtag} = $seq_object->seq;
	    for my $feat_object ($seq_object->get_SeqFeatures) { # run over all features (id source and 3xfeature(gene,mRNA,CDS) per real 'feature')
		my $prim = $feat_object->primary_tag;
		unless (($prim eq "CDS") || ($prim eq "gene") || ($prim eq "source")) {
		    $all_feat_count{$last}++;
		    $all_feat{$last}{$all_feat_count{$last}} = $prim;
                    for my $tag ($feat_object->get_all_tags) {
			if ($tag eq "pseudo" ) {
			    $all_feat{$last}{$all_feat_count{$last}} = $tag;
			}
			my $start=$feat_object->location->start;
			my $stop =$feat_object->location->end;
			$all_feat_start{$last}{$all_feat_count{$last}} = $start;
			$all_feat_stop{$last}{$all_feat_count{$last}} = $stop;
			if ($feat_object->location->strand == -1) { $strand_af{$last}{$all_feat_count{$last}} = "minus"; } else { $strand_af{$last}{$all_feat_count{$last}} = "plus" }
		    }
		}
		if ($feat_object->primary_tag eq "gene") {
		    for my $tag ($feat_object->get_all_tags) {
                        if ($tag eq "pseudo" ) {
			    my $start=$feat_object->location->start;
			    my $stop =$feat_object->location->end;
			    unless ($start == $all_feat_start{$last}{$all_feat_count{$last}}) {
				$all_feat_count{$last}++;
				$all_feat{$last}{$all_feat_count{$last}} = $tag;
				my $start=$feat_object->location->start;
				my $stop =$feat_object->location->end;
				$all_feat_start{$last}{$all_feat_count{$last}} = $start;
				$all_feat_stop{$last}{$all_feat_count{$last}} = $stop;
				if ($feat_object->location->strand == -1) { $strand_af{$last}{$all_feat_count{$last}} = "minus"; } else { $strand_af{$last}{$all_feat_count{$last}} = "plus" }
			    }
			}
		    }
		}
		if ($feat_object->primary_tag eq "CDS") { # only want to look CDS regions
		    my $note = '';
		    for my $tag ($feat_object->get_all_tags) {
			if ($tag eq "pseudo" ) {  ## make pseudogene database ###
			    $note = 'pseudo';
			    my $start=$feat_object->location->start;
			    my $stop =$feat_object->location->end;
			    unless ($start == $all_feat_start{$last}{$all_feat_count{$last}}) {
				$all_feat_count{$last}++;
				$all_feat{$last}{$all_feat_count{$last}} = $tag;
				$all_feat_start{$last}{$all_feat_count{$last}} = $start;
				$all_feat_stop{$last}{$all_feat_count{$last}} = $stop;
				if ($feat_object->location->strand == -1) { $strand_af{$last}{$all_feat_count{$last}} = "minus"; } else { $strand_af{$last}{$all_feat_count{$last}} = "plus" }
			    }
			}
		    }
		    unless ($note eq 'pseudo') {
			$count=$count+5; ##gene no going up in counts of 5 in order to allow addition of new genes plus gene numbers
			my $start=$feat_object->location->start; $hash{$count}{'start'} = $start;
			my $stop =$feat_object->location->end; $hash{$count}{'stop'} = $stop;
			for my $tag ($feat_object->get_all_tags) {
			    for my $value ($feat_object->get_tag_values($tag)) {
				if ($tag eq "locus_tag" ) { my $loc=$value; $hash{$count}{'locus'} = $loc; }
				if ($tag eq "protein_id" ) { my $pid=$value; $hash{$count}{'protein_id'} = $pid; }
				if ($tag eq "translation" ) { my $aa=$value; $hash{$count}{'aa'} = $aa; }
				if ($tag eq "gene" ) { my $genetag=$value; $hash{$count}{'genetag'} = $genetag; }
			    }
			}
			my $id = $tag."_".$hash{$count}{'protein_id'};
			$gene_num{$id} = $count;
			$gene_name{$id} = $id;
			if ($feat_object->location->strand == -1) { $strand{$id} = "minus"; } else { $strand{$id} = "plus" }
			$gene_start{$id} = $hash{$count}{'start'};
			$gene_stop{$id} = $hash{$count}{'stop'};
			$gene_locus{$id} = $hash{$count}{'locus'};
			$species{$id} = $tag;
			$aa{$id} = $hash{$count}{'aa'};
			$gene_tag{$id} = $hash{$count}{'genetag'};
			$chr{$id} = $chromtag;
			$descr{$chromtag} = $descrip;
			$gb_acc{$chromtag} = $locus;
			$last=$id;
		    }

		} 
	    }
	}
    }
  #  print "parse_gbk $tag done\n";

    ############################################### 2. Make OGS segments ############################################################

    my (%OGS_nt_seq, %IG, %Over, %OGS_no, %adj_r_nov, %adj_l_nov, %geneseq, %Olap, %geneseq_nt, %OGS_seqall, %adj_l, %adj_r, %name_plus_over, %ps_OGS, %ps_OGS_start, %ps_OGS_stop, %all_feat_OGS, %all_feat_OGS_start, %all_feat_OGS_stop, %strand_af_OGS);
    my $count_OGS=0;
    foreach my $chrom (keys %fasta) {
#	print "chr:$chrom\n";
	my $seq = $fasta{$chrom}; ##fasta nt seq of chromosome

	my $lastgene; my $count=0; #make adjacency files

	foreach my $gene (sort {$gene_num{$a} <=> $gene_num{$b}} keys %gene_start) {
	    if ($chr{$gene} eq $chrom) {
		if (($chr{$gene} eq $chr{$lastgene}) || ($lastgene eq undef)) {
		    $count++;
		    $adj_l{$gene} = $lastgene;
		    $adj_r{$lastgene} = $gene;
		    $lastgene = $gene;
		}
	    }
	}
	
#	print "$tag:$count\n";
	
	foreach my $gene (sort {$gene_num{$a} <=> $gene_num{$b}} keys %gene_start) {
	    if ($chr{$gene} eq $chrom) {
		if (defined $gene_start{$adj_r{$gene}}) {
		    if ($gene_stop{$gene} < $gene_start{$adj_r{$gene}}) {
			$adj_r_nov{$gene} = $adj_r{$gene}; ## making adjacency files for only genes with NO OVERLAPS
			$adj_l_nov{$adj_r{$gene}} = $gene;
			my $seq_intergenic = substr($seq, $gene_stop{$gene}, ($gene_start{$adj_r{$gene}}-$gene_stop{$gene}-1));    #sequence of the intergenic sequence, from first base after end of genel to last base before\start of genea.
			my $seq_l = substr($seq, $gene_start{$gene}-1, ($gene_stop{$gene}-$gene_start{$gene}+1)); ##left gene sequence
			my $seq_a = substr($seq, $gene_start{$adj_r{$gene}}-1, ($gene_stop{$adj_r{$gene}}-$gene_start{$adj_r{$gene}}+1)); ###right gene sequence

			my $frag = $seq_l.$seq_intergenic.$seq_a;
			$OGS_nt_seq{$gene."-".$adj_r{$gene}} = $frag;
			
			if ($strand{$gene} eq "plus") {
                            $geneseq_nt{$gene} = $seq_l;
                        } else {
                            my $revstring = reverse($seq_l);
                            $revstring =~ s/A/t/g;
                            $revstring =~ s/T/a/g;
                            $revstring =~ s/C/g/g;
                            $revstring =~ s/G/c/g;
                            my $revcomp = uc($revstring);
                            $geneseq_nt{$gene} = $revcomp;
                        }
                        $all_feat_OGS{$gene."-".$adj_r{$gene}} = $all_feat{$gene};
                        $all_feat_OGS_start{$gene."-".$adj_r{$gene}} = $all_feat_start{$gene};
                        $all_feat_OGS_stop{$gene."-".$adj_r{$gene}} = $all_feat_stop{$gene};
			foreach my $num (keys %{$strand_af{$gene}}) {
			    $strand_af_OGS{$gene."-".$adj_r{$gene}}{$num} = $strand_af{$gene}{$num};
			}
			$IG{$gene."-".$adj_r{$gene}}= $seq_intergenic;
			$chr{$gene."-".$adj_r{$gene}} = $chrom;
		    }
		    if ($gene_stop{$gene} >= $gene_start{$adj_r{$gene}}) { ## for OVERLAP OGSs
			my $seq_l = substr($seq, $gene_start{$gene}-1, ($gene_stop{$gene}-$gene_start{$gene}+1));
                        if ($strand{$gene} eq "plus") {
			    $geneseq_nt{$gene} = $seq_l;
			} else {
			    my $revstring = reverse($seq_l);			   
			    $revstring =~ s/A/t/g;
			    $revstring =~ s/T/a/g;
			    $revstring =~ s/C/g/g;
			    $revstring =~ s/G/c/g;
			    my $revcomp = uc($revstring);
			    $geneseq_nt{$gene} = $revcomp;
			}
                        $chr{$gene."-".$adj_r{$gene}} = $chrom;
		    }
		}
	    }
	    
	}
    }	
    open FASTA, ">".$store."/fasta_".$spec_name{$specnum}.".tab";

    foreach my $chrom (keys %fasta) {
	print FASTA "$chrom\t$fasta{$chrom}\n";
    }
    close FASTA;
    
    ############################################### 3. Make Overall hashes ############################################################    
    open OUT, ">".$store."/aa_".$spec_name{$specnum}.".fa"; ## ??
    foreach my $gene (keys %aa) {
        print OUT ">".$gene."\n";
        chomp $aa{$gene};
        print OUT "$aa{$gene}\n";

        $AA_seq_all{$gene} = $aa{$gene}; 
	}
    close OUT;
    foreach my $OGS (keys %IG) {
        $IG_all{$OGS} = $IG{$OGS};
	$OGS_nt_seq_all{$OGS} = $OGS_nt_seq{$OGS};
	$chr_all{$OGS} = $chr{$OGS};
        $all_feat_all{$OGS} = $all_feat_OGS{$OGS};
        $all_feat_start_all{$OGS} = $all_feat_OGS_start{$OGS};
        $all_feat_stop_all{$OGS} = $all_feat_OGS_stop{$OGS}; 
	foreach my $num (keys %{$strand_af_OGS{$OGS}}) {
	    $strand_all_af{$OGS} = $strand_af_OGS{$OGS};
	}
   }
    foreach my $chrom (keys %descr) {
	$chromlist{$spec_name{$specnum}}{$chrom} = $chrom;
        $descr_all{$chrom} = $descr{$chrom};
        $gb_acc_all{$chrom} = $gb_acc{$chrom};
    }
    foreach my $gene (keys %species) {
        $NT_seq_all{$gene} = $geneseq_nt{$gene};
        $species_all{$gene} = $species{$gene};
        $chr_all{$gene} = $chr{$gene};
        $gene_num_all{$gene} = $gene_num{$gene};
	$gene_start_all{$gene} = $gene_start{$gene};
	$strand_all{$gene} = $strand{$gene};
	$gene_stop_all{$gene} = $gene_stop{$gene};
        $gene_tag_all{$gene} = $gene_tag{$gene};
    }
    foreach my $gene (keys %adj_r) {
        $adj_r_all{$gene} = $adj_r{$gene};
    }
    foreach my $gene (keys %adj_l) {
        $adj_l_all{$gene} = $adj_l{$gene};
    }
    foreach my $gene (keys %adj_r_nov) {
        $adj_r_nov_all{$gene} = $adj_r_nov{$gene};
    }
    foreach my $gene (keys %adj_l_nov) {
        $adj_l_nov_all{$gene} = $adj_l_nov{$gene};
    }

}    

## Stage 4 - Do pairwise BLAST searches between species, and make reciprocal best BLAST hit hashes between species ####


# format amino acid database file for BLAST.
for (my $i=1; $i<=$count_spec; $i++) {
    system "formatdb -i ".$store."/aa_".$spec_name{$i}.".fa -p T -o F";
    system "chmod 755 ".$store."/aa_".$spec_name{$i}.".fa";
}

##run blast, using an e-5 cutoff and an e=10 cutoff.
if ($blast_on == 1) {
    for (my $i=1; $i<=$count_spec; $i++) {
	for (my $j=1; $j<=$count_spec; $j++) {
	    if ($j != $i) {
	#	print "$i $j $spec_name{$i} $spec_name{$j}\n";
		unless (-e $Blast_Out."/Strict_".$spec_name{$i}."_vs_".$spec_name{$j}."_1.fa") {
		    system "$blastall -p blastp -d ".$store."/aa_".$spec_name{$j}.".fa -i ".$store."/aa_".$spec_name{$i}.".fa -o ".$Blast_Out."/Strict_".$spec_name{$i}."_vs_".$spec_name{$j}."_1.fa -e $blast_eval1 -v 1 -b 1 -m 8";
		}
		unless (-e $Blast_Out."/Loose_".$spec_name{$i}."_vs_".$spec_name{$j}."_1.fa") {
		    system "$blastall -p blastp -d ".$store."/aa_".$spec_name{$j}.".fa -i ".$store."/aa_".$spec_name{$i}.".fa -o ".$Blast_Out."/Loose_".$spec_name{$i}."_vs_".$spec_name{$j}."_1.fa -e $blast_eval2 -v 1 -b 1 -m 8";
		}
	    }
	}
    }
}

### make reciprocal blast files

my %Recip_hash;
for (my $i=1; $i<=$count_spec; $i++) {
    for (my $j=1; $j<=$count_spec; $j++) {
        if ($j != $i) {
	    my $Blastout1 = $Blast_Out."/Strict_".$spec_name{$i}."_vs_".$spec_name{$j}."_1.fa"; my $Blastout2 = $Blast_Out."/Strict_".$spec_name{$j}."_vs_".$spec_name{$i}."_1.fa"; 
	    my $B = $spec_name{$i}."_vs_".$spec_name{$j}; my $EV = "Strict"; 
	    my $blast_recip_ref = &Recip_auto($Blastout1, $Blastout2, $B, $EV, $store); my %blast_recip = %{$blast_recip_ref};
	    foreach $key (keys %blast_recip) {
		$Recip_hash{$i."_v_".$j}{$key} = $blast_recip{$key};
	    }
        }
    }
}

### Stage 5 - Make initial "Everything reciprocal best hit to everything else pillar ######

my %rev_spec_name = reverse(%spec_name);

my %pillar_A; my $count=0; my %map; my %p_orig; ### setting up initial pillars and mapping.

open OUT, ">".$store."/AA_seq_all.fa";
foreach my $gene (keys %AA_seq_all) {
    $count++;
    $pillar_A{$count}{$species_all{$gene}} = $gene;
    $p_orig{$count}{$species_all{$gene}} = $gene;
    $map{$gene} = $count;

    print OUT ">".$gene."\n$AA_seq_all{$gene}\n";
}
close OUT;

my %mark; my %pillar_good; my %gene_counter_g;
### foreach gene, if it has a reciprocal best hit with another species gene, PROVISIONALLY place in pillar;
foreach my $pill (sort {$a<=>$b} keys %pillar_A) {
    foreach my $spec (keys %{$pillar_A{$pill}}) {
        my $gene = $pillar_A{$pill}{$spec};
        for (my $j=1; $j<=$count_spec; $j++) { #cycling over all species
            unless ($j == $rev_spec_name{$spec}) {
                if (defined $Recip_hash{$rev_spec_name{$spec}."_v_".$j}{$gene}) {
                    my $reciprocal_hit = $Recip_hash{$rev_spec_name{$spec}."_v_".$j}{$gene};
                    unless (defined $mark{$reciprocal_hit}) { # mark indicates gene is already placed in Pillar that passes "total reciprocality test";
                        $pillar_A{$pill}{$spec_name{$j}} = $reciprocal_hit;
#                        print "gene $gene mapkey $map{$gene} ($pill)  adding $pillar_A{$pill}{$spec_name{$j}} from $map{$reciprocal_hit}\n";
                    }
                }
            }
        }
    }
    ### total reciprocality test: foreach gene in PROVISIONAL pillar, see if it hits EACH OTHER GENE in reciprocal manner
    my $orth_count=0; my $good_hom=0;
    foreach my $spec (keys %{$pillar_A{$pill}}) {
        my $gene = $pillar_A{$pill}{$spec};
        $orth_count++;
        for (my $j=1; $j<=$count_spec; $j++) {
            if ((defined $pillar_A{$pill}{$spec_name{$j}}) && (defined $Recip_hash{$rev_spec_name{$spec}."_v_".$j}{$gene})) {
		my $pillar_orth = $pillar_A{$pill}{$spec_name{$j}}; my $recip_hit = $Recip_hash{$rev_spec_name{$spec}."_v_".$j}{$gene};
#		print "PO: gene: $gene po: $pillar_orth RH:$recip_hit\n";
		if ($pillar_orth eq $recip_hit) {
		    $good_hom++;
		}
	    }
        }
    }
    if (($good_hom == (($orth_count)*($orth_count-1))) && ($good_hom != 0)) { ##expect no of reciprocal matches within pillar to = num of genes in pillar mult by (num of genes-1), because each should hit the others but not itself
	foreach my $spec (keys %{$pillar_A{$pill}}) {
	    $pillar_good{$pill}{$spec} = $pillar_A{$pill}{$spec};
	    $gene_counter_g{$pillar_good{$pill}{$spec}}++;
	    my $gene = $pillar_A{$pill}{$spec};
	    $mark{$gene}++; # mark pillar if passes "total reciprocality test"
	    unless ($map{$gene} == $pill) {
		    delete($pillar_A{$map{$gene}}{$spec}); #delete gene that has already been placed
	    }
	}
    }
}

### putting any genes that haven't been placed in reciprocal pillars in single gene pillars.
my $count_new; my %pillar_B;
foreach my $pill (sort {$a<=>$b} keys %pillar_good) {
    $count_new++;
    foreach my $spec (keys %{$pillar_good{$pill}}) {
        $pillar_B{$count_new}{$spec} = $pillar_good{$pill}{$spec};
    }
}
foreach my $pill (sort {$a<=>$b} keys %p_orig) {
    foreach my $spec (keys %{$p_orig{$pill}}) {
        unless (defined $gene_counter_g{$p_orig{$pill}{$spec}}) {
            $count_new++;
            $pillar_B{$count_new}{$spec} = $p_orig{$pill}{$spec};
        }
    }
}

### get a count of number of members in pillars;
my $count1a=0; my $count1b=0; my %counter;
foreach my $pill (sort {$a<=>$b} keys %pillar_B) {
    $count1a++;
    my $count2;
    foreach my $spec (keys %{$pillar_B{$pill}}) {
        $count1b++;
        $count2++;
    }
    for (my $i=1; $i<=$count_spec; $i++) {
        if ($count2 == $i) {
            $counter{$i}++;
        }
    }
}
#print "all pillars: $count1a all genes: $count1b\n";
for (my $i=1; $i<=$count_spec; $i++) {
  #  print "$i: $counter{$i}\n";
}

open PILLB, ">".$store."/pillar_B.tab";
foreach my $pill (keys %pillar_B) {
    foreach my $spec (keys %{$pillar_B{$pill}}) {
	print PILLB "$pill\t$spec\t$pillar_B{$pill}{$spec}\n";
    }
}
close PILLB;


################# Stage 6 - Run SyntenoBlast to place genes that aren't reciprocal best hits in pillar #######################################

### Read in blast files ###

my $element; my %blast_hash;
for (my $i=1; $i<=$count_spec; $i++) {
    for (my $j=1; $j<=$count_spec; $j++) {
        unless ($i == $j) {
            open IN, "<".$Blast_Out."/Loose_".$spec_name{$i}."_vs_".$spec_name{$j}."_1.fa"; my @blast = <IN>; close IN;
            foreach $element (@blast) {
                push @{$blast_hash{$spec_name{$i}."_vs_".$spec_name{$j}}}, $element;
            }
        }
    }
}

#### Hash of Blast results ######
#### Key1 = e.g. 1v2, key2 = query gene, value = gene hit ###

my %blast_q; my %blast_all; my %blast_E;
foreach $key (keys %blast_hash) {
    my $count_q=0;
    my $last = '';
    foreach $element (@{$blast_hash{$key}}) {
        my @array = split(/\t/, $element);
        unless ($array[0] eq $last) {
            $blast_q{$key}{$array[0]} = $array[1];
            $blast_E{$key}{$array[0]} = $array[10];
            $blast_all{$array[0]} = $array[1];
            $last = $array[0];
        }
    }
}

my %count_PR; my $count_key=0; my %map_B;

#### Putting 5member pillars into pillarR_big2, 1-4member pillars into newpill ########

my %new_pil; my %species; my $key3; my $count_Rf=0; my $count_Rnf=0; my $count_R_p_tot=0; my %pillarR;
my %stat; my %map_stat; my %counter_NP;
foreach my $pill (sort {$a<=>$b} keys %pillar_B) {
    my $count_Rbig=0;
    foreach my $spec (keys %{$pillar_B{$pill}}) {
        $count_R_p_tot++;
        $count_Rbig++;
    }
    if ($count_Rbig == $count_spec) {
        $count_key++;
        foreach my $spec (keys %{$pillar_B{$pill}}) {
            $count_Rf++;
            $pillarR{$pill}{$spec} = $pillar_B{$pill}{$spec};
            my $gene = $pillar_B{$pill}{$spec};
            $map_B{$gene} = $count_key;
        }
    }
    if ($count_Rbig < $count_spec) {
        $count_key++;
        foreach my $spec (keys %{$pillar_B{$pill}}) {
            $count_Rnf++;
            $new_pil{$count_key}{$spec} = $pillar_B{$pill}{$spec};
            my $gene = $pillar_B{$pill}{$spec};
            $counter_NP{$gene}++;
            $map_B{$gene} = $count_key;
            $stat{$count_key}{$spec} = $pillar_B{$pill}{$spec};
            $map_stat{$gene} = $count_key;
        }
    }
}

# print "Not in strict pillars $count_Rnf genes, in strict pillars $count_Rf genes All genes: $count_R_p_tot\n";

########## Round 1 of synteno #######

my %np; my %rem_all; my %count_all; my %pillarR_toadd_all; my %pillarcount; my %count2hit_all; my %genecount;
for (my $x=1; $x<=$synt_step; $x++) {
    my @out_array =  @{&synt(\%new_pil, \%map_B, \%map_stat)};

    my %pillarR_toadd = %{$out_array[0]};
    %new_pil = %{$out_array[1]};

    my %map_out = %{$out_array[2]}; ###Update map between syntenoruns
    foreach my $gene (keys %map_out) {
        $map_B{$gene} = $map_out{$gene};
    }

    my %rem = %{$out_array[3]};
    my %countA = %{$out_array[4]};
    my %count2hit = %{$out_array[5]};

    foreach my $pill (keys %pillarR_toadd) {
        foreach my $spec (keys %{$pillarR_toadd{$pill}}) {
            $pillarR_toadd_all{$pill}{$spec} = $pillarR_toadd{$pill}{$spec};
        }
    }
    foreach my $pill (keys %new_pil) {
        $pillarcount{$x}++;
        foreach my $spec (keys %{$new_pil{$pill}}) {
            $genecount{$x}++;
            $np{$x}{$pill}{$spec} = $new_pil{$pill}{$spec};
        }
    }
    foreach $key (keys %countA) {
	$count_all{$x}{$key} = $countA{$key};
    }
    foreach $key (keys %count2hit) {
        $count2hit_all{$x}{$key} = $count2hit{$key};
    }
}

my %pillar_C; my $pfull=0; my $count_fin=0; my %map_C;  ### make full hash ###
foreach my $pill (sort {$a<=>$b} keys %pillarR) {
    $count_fin++;
    $pfull++;
    foreach my $spec (keys %{$pillarR{$pill}}) {
        $pillar_C{$count_fin}{$spec} = $pillarR{$pill}{$spec};
        $map_C{$pillarR{$pill}{$spec}} = $count_fin;
    }
}
foreach my $pill (sort {$a<=>$b} keys %pillarR_toadd_all) {
    $count_fin++;
    $pfull++;
    foreach my $spec (keys %{$pillarR_toadd_all{$pill}}) {
        $pillar_C{$count_fin}{$spec} = $pillarR_toadd_all{$pill}{$spec};
        $map_C{$pillarR_toadd_all{$pill}{$spec}} = $count_fin;
    }
}
foreach my $pill (sort {$a<=>$b} keys %new_pil) {
    $count_fin++;
    foreach my $spec (keys %{$new_pil{$pill}}) {
        $pillar_C{$count_fin}{$spec} = $new_pil{$pill}{$spec};
        $map_C{$new_pil{$pill}{$spec}} = $count_fin;
    }
}

open PILLC, ">".$store."/pillar_C.tab";
foreach my $pill (keys %pillar_C) {
    foreach my $spec (keys %{$pillar_C{$pill}}) {
        print PILLC "$pill\t$spec\t$pillar_C{$pill}{$spec}\n";
    }
}
close PILLC;
open MAPC, ">".$store."/map_C.tab";
open NTSEQ, ">".$store."/NT_seq_all.tab";
open SPEC, ">".$store."/species_all.tab";
open GENENUM, ">".$store."/gene_num_all.tab";
open GENESTART, ">".$store."/gene_start_all.tab";
open GENESTOP, ">".$store."/gene_stop_all.tab";
open GENETAG, ">".$store."/gene_tag_all.tab";
open STR, ">".$store."/strand_all.tab";
open STR_AF, ">".$store."/strand_all_af.tab";
open ADJR, ">".$store."/adj_r_all.tab";
open ADJL, ">".$store."/adj_l_all.tab";

foreach my $gene (keys %map_C) {
    print MAPC "$gene\t$map_C{$gene}\n";
    print NTSEQ "$gene\t$NT_seq_all{$gene}\n";
    print SPEC "$gene\t$species_all{$gene}\n";
    print GENENUM "$gene\t$gene_num_all{$gene}\n";
    print GENESTART "$gene\t$gene_start_all{$gene}\n";
    print GENESTOP "$gene\t$gene_stop_all{$gene}\n";
    print STR "$gene\t$strand_all{$gene}\n";
    print GENETAG "$gene\t$gene_tag_all{$gene}\n";
}
open DESCR, ">".$store."/descr_all.tab";
open GBACC, ">".$store."/gb_acc_all.tab";
open IG, ">".$store."/IG_all.tab";
open OGSNT, ">".$store."/OGS_nt_seq_all.tab";
open CHR, ">".$store."/chr_all.tab";
open CHROMLIST, ">".$store."/chromlist_all.tab";
open AF, ">".$store."/allfeat_all.tab";
open AF_START, ">".$store."/allfeat_start_all.tab";
open AF_STOP, ">".$store."/allfeat_stop_all.tab";

foreach my $chrom (keys %descr_all) {
    print DESCR "$chrom\t$descr_all{$chrom}\n";    
    print GBACC "$chrom\t$gb_acc_all{$chrom}\n";
}
foreach my $OGS (keys %IG_all) {
    print IG "$OGS\t$IG_all{$OGS}\n";
    print OGSNT "$OGS\t$OGS_nt_seq_all{$OGS}\n";
}

foreach my $OGS (keys %all_feat_all) {
    foreach my $num (keys %{$all_feat_start_all{$OGS}}) {
	print AF "$OGS\t$num\t$all_feat_all{$OGS}{$num}\n";
        print AF_START "$OGS\t$num\t$all_feat_start_all{$OGS}{$num}\n";
        print AF_STOP "$OGS\t$num\t$all_feat_stop_all{$OGS}{$num}\n";
	print STR_AF "$OGS\t$num\t$strand_all_af{$OGS}{$num}\n";
    }
}

foreach my $gene (keys %adj_r_all) {
    print ADJR "$gene\t$adj_r_all{$gene}\n";
}
foreach my $gene (keys %adj_l_all) {
    print ADJL "$gene\t$adj_l_all{$gene}\n";
}
foreach my $feat (keys %chr_all) {
    print CHR "$feat\t$chr_all{$feat}\n";
}
foreach my $spec (keys %chromlist) {
    foreach my $chrom (keys %{$chromlist{$spec}}) {
        print CHROMLIST "$spec\t$chrom\t$chromlist{$spec}{chrom}\n";
    }
}

close AF; close AF_START; close AF_STOP; close CHROMLIST; close CHR; close ADJR; close ADJL; close IG;
close OGSNT; close GBACC; close MAPC; close NTSEQ; close SPEC; close GENENUM; close GENESTART; close GENESTOP;
close GENETAG; close DESCR; close STR; close STR_AF;

#print "Step 1 (create structures) completed\n\n";

sub Recip_auto {
    my ($blast_file, $blast_file2, $tag, $tag2, $store) = @_;

    open IN, $blast_file; my @blast_f = <IN>; close IN;

    my $element; my %blast_h1; my $last = '';
    foreach $element (@blast_f) {
        my @array = split(/\t/, $element);
        unless ($array[0] eq $last) {
            $blast_h1{$array[0]} = $array[1];
            $last = $array[0];
        }
    }
    open IN, $blast_file2; my @blast_f2 = <IN>; close IN;

    my %blast_h2;
    $last = '';
    foreach $element (@blast_f2) {
        my @array = split(/\t/, $element);
        unless ($array[0] eq $last) {
            $blast_h2{$array[0]} = $array[1];
            $last = $array[0];
        }
    }

    my $blast_query; my %blast_recip;
    foreach $blast_query (keys %blast_h1) {
        if (defined $blast_h1{$blast_query}) {
            my $hit = $blast_h1{$blast_query};
            if ((defined $blast_h2{$hit}) && ($blast_h2{$hit} eq $blast_query)) {
                $blast_recip{$blast_query} = $blast_h1{$blast_query};
            }
        }
    }

    return \%blast_recip;
}



### For rounds of syntenoblast ###
sub synt {
    my ($new_pil_sub_ref, $map_sub_ref, $map_stat_sub_ref) =@_;
    my %new_pil_sub = %{$new_pil_sub_ref};
    my %map_sub = %{$map_sub_ref};
    my %map_stat_sub = %{$map_stat_sub_ref};

    my %count_2hit;
    my %rem1; my %rem2; my $key4; my %new_pil_sub2;
    foreach $key (sort {$a<=>$b} keys %new_pil_sub) {  ###cycle by pillar
        foreach my $key2 (keys %{$new_pil_sub{$key}}) { ###cycle by species within pillar
            my $query = $new_pil_sub{$key}{$key2};
            my %N_l; my %N_r;
            foreach $key3 (keys %blast_q) { ###cycle over blast result file e.g. 1v2
                if (defined $blast_q{$key3}{$query}) {
                    my $hit = $blast_q{$key3}{$query};
                    my $val = $blast_E{$key3}{$query};
                    my $tmp_l_gene = $query; my $tmp_r_gene = $query;
                    my $tmp_l_hit = $hit; my $tmp_r_hit = $hit;
                    my %count_N; my %count_NP;
                    for (my $i=1; $i<=$neighbour_step; $i++) {
                        $N_r{$query}{$i} = $adj_r_all{$tmp_r_gene}; ##Make list of left+right neighbours for query gene
                        my $pil_Rgene = $map_sub{$tmp_r_gene};
                        $count_N{$pil_Rgene}++;
                        $tmp_r_gene = $adj_r_all{$tmp_r_gene};
                        $N_l{$query}{$i} = $adj_l_all{$tmp_l_gene};
                        my $pil_Lgene = $map_sub{$tmp_l_gene};
                        $count_N{$pil_Lgene}++;
                        $tmp_l_gene = $adj_l_all{$tmp_l_gene};
                        $N_r{$hit}{$i} = $adj_r_all{$tmp_r_hit}; #List of l+r neighbours for gene hit
                        $tmp_r_hit = $adj_r_all{$tmp_r_hit};
                        $N_l{$hit}{$i} = $adj_l_all{$tmp_l_hit};
                        $tmp_l_hit = $adj_l_all{$tmp_l_hit};
                    }
                    my $synt=0;
                    foreach $key (keys %{$N_l{$hit}}) { ### for left and right, check if query and hit have neighbouring pillars in common - if yes synt+1
                        my $left = $N_l{$hit}{$key};
                        if (defined $count_N{$map_sub{$left}}) {
                            $synt++;
                        }
                    }
                    foreach $key (keys %{$N_r{$hit}}) {
                        my $right = $N_r{$hit}{$key};
                        if (defined $count_N{$map_sub{$right}}) {
                            $synt++;
                        }
                    }
                    my $syntP=0;
                    if (defined $new_pil_sub{$map_sub{$hit}}{$species_all{$query}}) { ### if exists gene from query species in pillar hit, figure out if it's more syntenic with hit than query.
			my $Pilhit = $map_sub{$hit};
			my $Pgene = $new_pil_sub{$Pilhit}{$species_all{$query}};
			my $tmp_l_Pgene = $new_pil_sub{$Pilhit}{$species_all{$query}};  my $tmp_r_Pgene = $new_pil_sub{$Pilhit}{$species_all{$query}};
			for (my $i=1; $i<=$neighbour_step; $i++) {
			    $N_r{$Pgene}{$i} = $adj_r_all{$tmp_r_Pgene};  #List of l+r neighbours for gene from query species in pillar hit
			    my $pil_RPgene = $map_sub{$tmp_r_Pgene};
			    $count_NP{$pil_RPgene}++;
			    $tmp_r_Pgene = $adj_r_all{$tmp_r_Pgene};
			    $N_l{$Pgene}{$i} = $adj_l_all{$tmp_l_Pgene};
			    my $pil_LPgene = $map_sub{$tmp_l_Pgene};
			    $count_NP{$pil_LPgene}++;
			    $tmp_l_Pgene = $adj_l_all{$tmp_l_Pgene};
			}
			foreach $key (keys %{$N_l{$hit}}) {
			    my $left = $N_l{$hit}{$key};
			    if (defined $count_NP{$map_sub{$left}}) {
				$syntP++;
			    }
			}
			foreach $key (keys %{$N_r{$hit}}) {
			    my $right = $N_r{$hit}{$key};
			    if (defined $count_NP{$map_sub{$right}}) {
				$syntP++;
			    }
			}
		    }
		    if ($synt >0) { ### if query and hit have neighbouring pillars in common.
			my $pil = $map_sub{$hit};
			if (defined $new_pil_sub{$pil}) {
			    unless ($pil == $key) { ###make sure query is not hitting genes already in a pillar with query.
				my $count_cont=0;
				foreach $key4 (keys %{$new_pil_sub{$key}}) {
				    if ((defined $new_pil_sub{$pil}{$key4}) && (defined $new_pil_sub{$key}{$key4})) {
					$count_cont++;
				    }
				}
				if ($count_cont == 0) { ## if there are no cases where query pillar and hit pillar both contain genes from the same species, we can collapse hit pillar into query pillar
				    foreach $key4 (keys %{$new_pil_sub{$pil}}) {
					if (defined $new_pil_sub{$pil}{$key4}) {
					    $new_pil_sub{$key}{$key4} = $new_pil_sub{$pil}{$key4};
#					    print "Collapse1 $pil $key4 $new_pil_sub{$pil}{$key4} into $key\n";
					    my $del_gene = $new_pil_sub{$pil}{$key4};
					    $rem1{$pil}{$key4} = $del_gene; #hash of removed pillars
					    $map_sub{$del_gene} = $key; #updating mapping
					}
				    }
				    delete $new_pil_sub{$pil};
				} else { ### query pillar and hit pillar contain genes from same species - cannot collapse
				    if (defined $new_pil_sub{$pil}{$species_all{$query}}) {
#					print "$query $map_sub{$query} hits $hit from pillar $pil in blast search however $hit already in pillar with $new_pil_sub{$pil}{$species_all{$query}}\n";
				    }
				    $count_2hit{'all'}++;
				    
				    if (defined $new_pil_sub{$pil}{$species_all{$query}}) { ## if gene from query species exists in hit pillar
					my $file = $species_all{$query}."_vs_".$species_all{$hit}; ## open query v hit species blast file
					unless ($map_stat{$new_pil_sub{$pil}{$species_all{$hit}}} == $map_stat{$new_pil_sub{$pil}{$species_all{$query}}}) { ### don't touch existing pillars if they are included in original 'strict reciprocal-synt' set
					    if ($synt > $syntP) { ### if hit has more shared synteny with query than existing (query genome) gene in hit pillar, move it to query pillar and delete from hit pillar
					    $count_2hit{'synt'}++;
#					    print "$synt > $syntP $val $blast_E{$key3}{$new_pil_sub{$pil}{$species_all{$query}}} $query $hit $new_pil_sub{$pil}{$species_all{$query}} Should replace!\n";
					    if (defined $new_pil_sub{$key}{$species_all{$hit}}) { ## if query pillar already contains gene from hit genome, delete and place in singles pillar
						$count_key++;
						$new_pil_sub{$count_key}{$species_all{$hit}} = $new_pil_sub{$key}{$species_all{$hit}};
						$map_sub{$new_pil_sub{$key}{$species_all{$hit}}} = $count_key; #updating mapping
#						print "Making new pillar $count_key $new_pil_sub{$count_key}{$species_all{$hit}}\n";
					    }
					    $new_pil_sub{$key}{$species_all{$hit}} = $hit;
					    my $del_gene = $hit;
					    $rem1{$pil}{$species_all{$hit}} = $del_gene;
					    $map_sub{$del_gene} = $key; #updating mapping
					    delete $new_pil_sub{$pil}{$species_all{$hit}}; #delete hit from hit pillar
					}
					    
					    if ($synt == $syntP) { ### if equal synteny, only move+delete if stronger e value for query to hit than for hitpillargene to hit.
						if ($val < $blast_E{$key3}{$new_pil_sub{$pil}{$species_all{$query}}}) {
						    $count_2hit{'eval'}++;
#						    print "$synt = $syntP $val $blast_E{$key3}{$new_pil_sub{$pil}{$species_all{$query}}} $query $hit $new_pil_sub{$pil}{$species_all{$query}} Should replace!\n";
#						    print "1EMove $pil $species_all{$hit} $hit into $key\n";
						    if (defined $new_pil_sub{$key}{$species_all{$hit}}) {
							$count_key++;
							$new_pil_sub{$count_key}{$species_all{$hit}} = $new_pil_sub{$key}{$species_all{$hit}};
							$map_sub{$new_pil_sub{$key}{$species_all{$hit}}} = $count_key; #updating mapping
#							print "Making new pillar $count_key $new_pil_sub{$count_key}{$species_all{$hit}}\n";
						    }
						    $new_pil_sub{$key}{$species_all{$hit}} = $hit;
						    my $del_gene = $hit;
						    $rem1{$pil}{$species_all{$hit}} = $del_gene;
						    $map_sub{$del_gene} = $key; #updating mapping
						    delete $new_pil_sub{$pil}{$species_all{$hit}}; #delete hit from hit pillar                              \
						    
						}
					    }
					}
				    }
				}
				
			    }
			}
			
		    }
		}
	    }
	}
	
    }
    my %count_sub; my %pillarR_toadd_sub;
    foreach $key (sort {$a<=>$b} keys %new_pil_sub) {
        my $count_np2=0;
        foreach my $key2 (keys %{$new_pil_sub{$key}}) {
            $count_np2++;
        }
        unless (($count_np2 == 0) || ($count_np2 == $count_spec)) { ###put pillars containing 1-4 genes into new_pil2
            foreach my $key2 (keys %{$new_pil_sub{$key}}) {
                $new_pil_sub2{$key}{$key2} = $new_pil_sub{$key}{$key2};
            }
        }
        if ($count_np2 == $count_spec) { ### put pillars containing a gene from each species genes into 'pillarR_to add'
            $count_sub{$count_spec}++;
            foreach my $key2 (keys %{$new_pil_sub{$key}}) {
                my $gene = $new_pil_sub{$key}{$key2};
                $pillarR_toadd_sub{$key}{$key2} = $gene;
                $count_PR{$gene}++;
            }
        }
    }

    foreach $key (sort {$a<=>$b} keys %new_pil_sub2) {
        my $count_ind=0;
        foreach my $key2 (keys %{$new_pil_sub2{$key}}) {
            $count_ind++;
        }
        for (my $i=1; $i<$count_spec; $i++) { #from 1 to $count_spec-1;
            if ($count_ind == $i) {
                $count_sub{$i}++;
            }
        }
    }
    my @out = (\%pillarR_toadd_sub, \%new_pil_sub2, \%map_sub, \%rem1, \%count_sub, \%count_2hit);
    return (\@out);
}

close ERROR;    

exit;
