#!/usr/bin/perl

# Name: contig_target.pl
# Author: Pi-hsia Su
# Date: Oct. 23, 2002

use strict;
use Getopt::Long;

my $seq_file="";
my $assembly_all="";
my $trimmed_all="";

if ($#ARGV < 0) {
	&usage();
}

GetOptions ("x=s" => \$seq_file,
	"a=s" => \$assembly_all);

if (!$seq_file || !$assembly_all) {
	&usage();
}

my $homedir = `pwd`;
chomp $homedir;
my $dirname = "$seq_file.load";
my $assem_dir = "$seq_file";
my @ID=();
my @target=();
my @exclude=();
my @contig_len = ();

&Check_dir();
`mkdir "$seq_file.load"`;
&Copy_all_files();
&ID_info();
&Transfer_seq();
chdir "$dirname";
opendir DIR, ".";
my @assembly = readdir(DIR);
my $temp1 = shift(@assembly);
$temp1 = shift(@assembly);

my $assem="";
my $seq_id="";
my $len=0;
my $seq="";

&Process();
print "\n";
print "Finished primer design.\n";
&Clean_all();
&Clean_others();
closedir DIR;
chdir "$homedir";
opendir HOME, ".";
# `rmdir "$seq_file.load"`;
closedir HOME;

sub usage {
	print <<END;

	Invalid input!  Please refer to the following:

	contig_target.pl -x <spreadsheet> -a <assembly>
	
	<spreadsheet> contains 8 columns(tab delimited): ID,L,M,R,range(=R-L),Lx,Rx,length,
	<assembly> assembly tab-delimited file (from phpMyAdmin),
END
exit;
}

sub Copy_all_files {
	`cp $homedir/$assembly_all $homedir/$dirname/$assembly_all`;
}

sub Check_dir {
	if ((-e "$seq_file.out") == 1) {
		print "\nPlease rename the existing $seq_file.out using mv.\n\n";
		exit;
	}
	if ((-e "$seq_file.load") == 1) {
		print "\nPlease rename the existing $seq_file.load using mv.\n\n";
		exit;
	}
}

sub ID_info {
    my $x = 0;
    my @input_id = ();
    my @target_L = ();
    my @target_M = ();
    my @target_R = ();
    my @target_len = ();
    my @target_range = ();
    my @target_set = ();
    my @target_Excl_L = ();
    my @target_Excl_R = ();
    my @Excl_L = ();
    my @Excl_R = ();
    my @Excl_R_range = ();
    my @exclude_set = ();
    open (IN, $seq_file) || die ("Can't open file: $seq_file $! \n");
    while (<IN>) {
	if (($_ =~ /\w/) && ($_ =~ /C/)) {
		chomp;
		$x++;
		($input_id[$x], $target_L[$x], $target_M[$x], $target_R[$x], $target_len[$x], $target_Excl_L[$x], $target_Excl_R[$x], $contig_len[$x]) = split (/\t/, $_);
		s/ //g for $input_id[$x];
		s/ //g for $target_L[$x];
		s/ //g for $target_M[$x];
		s/ //g for $target_R[$x];
		s/ //g for $target_len[$x];
		s/ //g for $target_Excl_L[$x];
		s/ //g for $target_Excl_R[$x];
		s/ //g for $contig_len[$x];
		
		print "\nInput record $x: \t$input_id[$x]\n";
		push @ID, $input_id[$x];
		$target_range[$x]=$target_R[$x]-$target_L[$x];
		$target_set[$x] = "$target_L[$x]".","."$target_range[$x]"." ";
		push @target, $target_set[$x];
#		print "Target $x: $target_set[$x]\n";
		$Excl_L[$x] = "1,"."$target_Excl_L[$x]"." ";
		$Excl_R_range[$x] = $contig_len[$x]-$target_Excl_R[$x];
		$Excl_R[$x] = "$target_Excl_R[$x]".","."$Excl_R_range[$x]"." ";
		$exclude_set[$x] = "$Excl_L[$x]"." "."$Excl_R[$x]";
		push @exclude, $exclude_set[$x];
#		print "Exclude $x: $exclude_set[$x]\n";
	}
    }
    close IN;
}

sub Transfer_seq {
	my $name="";
	my $count=0;
	print "\n";
	chdir "$dirname";
	opendir DIR, ".";
	foreach $name (@ID) {
		$count++;
		open (OUT, ">$name.assem");
		open (DBASSEM, $assembly_all);  
		while (<DBASSEM>) {
			chomp;
			my ($id, $len, $seq) = split (/\t/, $_);
			print OUT $_ if ($name eq $id);	
		}
		print ".","$count";
		close DBASSEM;
		close DBTRIM;
		close OUT;
	}
	print "\nFinished transfering sequence infomation.\n";
	closedir DIR;
}

sub Process {
my $y=0;
my $z=0;
   foreach $assem (@ID) {
	chomp $assem;
	open (ASSEM, "$assem.assem");
	while (<ASSEM>) {
		chomp;
		($seq_id, $len, $seq) = split (/\t/, $_);
	}
	$y++;
      	&Primer_target(1, $seq, $len, $target[$z], $exclude[$z]);  
      	`primer3_core < $seq_id.primer.1 > $seq_id.plist.1`;
	&Table_heading() if ((-e "$assem_dir.out") == 0);
	&Primer_table($y, "$seq_id.plist.1", $len, "", $target[$z], $exclude[$z]);
	$z++;
	print "=","$y";
   }
}

sub Clean_all {
	unlink $assembly_all;
}

sub Clean_others {
	`cp "$assem_dir.out" ".."`;
	# ` rm *.*`; 
	}

sub Primer_target {
    my ($n, $sequence, $seq_len, $p_target, $p_exclude) = @_;
    open (LIST, ">$seq_id.primer.$n");
    print LIST "PRIMER_SEQUENCE_ID=$seq_id\n";
    print LIST "SEQUENCE=$sequence\n";
    print LIST "PRIMER_COMMENT=$seq_len\n";
    print LIST "TARGET=", $p_target, "\n";
    print LIST "EXCLUDED_REGION=", $p_exclude, "\n";
    print LIST "PRIMER_PRODUCT_SIZE_RANGE=100-400\n";
    print LIST "PRIMER_MAX_POLY_X=3\n";
    print LIST "=\n";
    close LIST;
}

sub Table_heading {
    my $heading = "Input_Record\tID\tLength\tTarget\tExclude\tPair_No\tPrimer_L_Seq\tPrimer_R_Seq\tPrimer_L_Pos_Len\tPrimer_R_Pos_Len\tPrimer_L_Tm\tPrimer_R_Tm\tPrimer_L_GC\tPrimer_R_GC\tProduct_Size\n";
    open (TH, ">$assem_dir.out");
    print TH $heading;
    close TH;
}

sub Primer_table {
    my ($record, $input, $t_len, $t_ssr, $t_target, $t_exclude) = @_;
    my $t_id="";

    open (PT, ">>$assem_dir.out");
    open (IN, "$input") if (-e "$input");
    while (<IN>) {
	if (/^PRIMER_SEQUENCE_ID=(\S+)/) {
		$t_id = $1;
		print PT "$record\t";
		print PT "$t_id\t";
		print PT "$t_len\t";
		print PT "$t_target\t";
		print PT "$t_exclude\t";
	}  elsif (/^PRIMER_LEFT_SEQUENCE=(\S+)/) {
		print PT "1\t";
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT_SEQUENCE=(\S+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_LEFT=(\d+\,\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT=(\d+\,\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_LEFT_TM=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT_TM=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_LEFT_GC_PERCENT=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT_GC_PERCENT=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_PRODUCT_SIZE=(\d+)/) {
		print PT "$1\n";
	}  elsif (/^PRIMER_LEFT_(\d)_SEQUENCE=(\S+)/) {
		print PT "$record\t";
		print PT "$t_id\t";
		print PT "$t_len\t";
		print PT "$t_target\t";
		print PT "$t_exclude\t";
		print PT ($1+1), "\t$2\t";
	}  elsif (/^PRIMER_RIGHT_\d_SEQUENCE=(\S+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_LEFT_\d=(\d+\,\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT_\d=(\d+\,\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_LEFT_\d_TM=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT_\d_TM=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_LEFT_\d_GC_PERCENT=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_RIGHT_\d_GC_PERCENT=(\d+\.\d+)/) {
		print PT "$1\t";
	}  elsif (/^PRIMER_PRODUCT_SIZE_\d=(\d+)/) {
		print PT "$1\n";
	} 	
    }
    print PT "\n";
    close IN;
    close PT;	
}
1;
