#!/usr/bin/python

####################################################
#                                                  #
#   SUBSEQUENCE EXTRACTOR FROM TCLBLAST REPORT     #
#                                                  #
#       COPYRIGHT, ALEXANDER KOZIK, 2005           #
#                                                  #
####################################################

def Seqs_Extractor(in_name1, in_name2, in_name3, out_name):

	print "====================================="
	print "INPUT FILE 1 (INFO2) :   " + in_name1
	print "INPUT FILE 2 (FRW)   :   " + in_name2
	print "INPUT FILE 3 (REV)   :   " + in_name3
	print "OUTPUT FILE          :   " + out_name
	print "====================================="

	time.sleep(2)

	in_file1  = open(in_name1, "rb")
	in_file2  = open(in_name2, "rb")
	in_file3  = open(in_name3, "rb")
	out_file  = open(out_name, "wb")

	frw_array = {}
	rev_array = {}

	# FORWARD
	#################################
	while 1:
		t = in_file2.readline()
		if t == '':
			break
		if '\n' in t:
			t = t[:-1]
		if '\r' in t:
			t = t[:-1]
		t = t.split('\t')

		id = t[0]
		seq = t[2]
		frw_array[id] = seq

		print id

	# REVERSE
	#################################
	while 1:
		t = in_file3.readline()
		if t == '':
			break
		if '\n' in t:
			t = t[:-1]
		if '\r' in t:
			t = t[:-1]
		t = t.split('\t')

		id = t[0]
		seq = t[2]
		rev_array[id] = seq

		print id

	#################################
	#       READ BLAST  DATA        #
	#################################
	while 1:
		t = in_file1.readline()
		if t == '':
			break
		if '\n' in t:
			t = t[:-1]
		if '\r' in t:
			t = t[:-1]
		t = t.split('\t')
		#################
		id  = t[0]
		# print 'id' + '\t' + id
		hit = t[1]
		# print 'hit' + '\t' + hit

		# DESCRIPTION #
		ds  = t[2]
		# print 'ds' + '\t' + ds
		if ds != 'no_hits_found':

			# IDENTITY #
			aln_idn = t[4]

			# HIT NUMBER #
			hnb = t[7]
			hnb = int(hnb)
			# print 'hnb' + '\t' + hnb

			# FRAME #
			fr  = t[8]
			# print 'fr' + '\t' + fr

			# QUERY #
			qst = t[9]
			qst = int(qst)
			# print 'qst' + '\t' + qst
			qen = t[10]
			qen = int(qen)
			# print 'qen' + '\t' + qen

			# SUBJECT #
			sst = t[11]
			sst = int(sst)
			# print 'sst' + '\t' + hst
			sen = t[12]
			sen = int(sen)

			# GAPS #
			# print 'sen' + '\t' + hen
			gaps = t[14]

			#################################
			#    SUBSEQUENCE EXTRACTION     #
			#################################
			if hit == id:
				########################
				if sen > sst:
					direction = 'forward'
					sequence = frw_array[hit]
					seq_len = len(sequence)
					sub_seq  = sequence[(sst-1):(sen-0)]
					sub_len  = len(sub_seq)
				if sen < sst:
					direction = 'reverse'
					sequence = rev_array[hit]
					seq_len = len(sequence)
					sub_seq  = sequence[(seq_len-sst):(seq_len-sen + 1)]
					# sub_seq  = sequence[(sen-1):(sst-0)]
					sub_len  = len(sub_seq)
				# print sub_seq
				mod_3 = math.fmod(sub_len, 3)
				if mod_3 != 0:
					print ""
					print "MOD IS NOT 3 !!!"
					print direction
					print ""
					time.sleep(2)
				out_file.write(">" + hit + " " + direction + " [" + `sst` + "-" + `sen` + "] LENGTH: " + `sub_len` + '\n' + sub_seq + '\n')
				print hit

	in_file1.close()
	in_file2.close()
	in_file3.close()
	out_file.close()

##################
#                #
#   MAIN BODY    #
#                #
##################

import math
import re
import sys
import string
import time
import os

if __name__ == "__main__":
	if len(sys.argv) <= 4 or len(sys.argv) > 5:
		print "Program usage: "
		print "input_file1(INFO2) input_file2(FRW) input_file3(REV) output_file"
		sys.exit()
	if len(sys.argv) == 5:
		in_name1  = sys.argv[1]
		in_name2  = sys.argv[2]
		in_name3  = sys.argv[3]
		out_name  = sys.argv[4]
		Seqs_Extractor(in_name1, in_name2, in_name3, out_name)
### THE END ###
