#!/usr/bin/python

#################################################################
#                                                               #
#                          MAD MAPPER                           #
#                                                               #
#                      MAD MAPPING PROGRAM                      #
#                                                               #
#                      DIGITAL  CLUSTERING                      #
#                                                               #
#                  COPYRIGHT  2004 2005 2006                    #
#                        Alexander Kozik                        #
#                                                               #
#                      http://www.atgc.org/                     #
#                                                               #
#             UCD Genome Center. R.Michelmore group             #
#                                                               #
#################################################################


##################################################################################
#1   5   10   15   20   25   30   35   40   45   50   55   60   65   70   75   80#
#-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5-=3+5#
##################################################################################


##################################################################################
#                           --+============+--                                   #
#                              README_FIRST:                                     #
#                            PYTHON MAD MAPPER                                   #
#                        HOW IT WORKS AND WHAT IT DOES:                          #
#                  RULES AND ALGORITHMS OF MADMAPPER APPROACH                    #
#             ---++==========================================++---               #
#                                                                                #
#   --------------------------------------------------------------------------   #
#  NOTE:                                                                         #
#         LABELS LIKE ### README_00X ### ARE INCORPORATED INTO THE SOURCE CODE   #
#         TO HELP TO FIND FUNCTIONS AND VARIABLES DESCRIBED IN THIS DOCUMENT     #
#   --------------------------------------------------------------------------   #
#                                                                                #
#     ### README_015 ###                                                         #
#     DFS (Depth First Search) procedure is repeated 25 times with different     #
#     recombination (haplotype distance) cutoff values starting with 1.0 and     #
#     ending with 0.0. Markers are grouped together based on transitive linkage. #
#     If marker 'M1' is linked to marker 'M2' and marker 'M2' is linked to 'M3'  #
#     then all three markers 'M1' 'M2' and 'M3' belong to the same linkage group #
#     even if 'M1' is not linked directly to marker 'M3'.                        #
#                                                                                #
#  = CLUSTERING/GROUPING OUTPUT FILES =                                          #
#     Information about grouping is stored in three files per iteration:         #
#     '*.matrix'     - pairwise distances for a given group                      #
#     '*.adj_list'   - adjacency list                                            #
#     '*.group_info' - group info                                                #
#                                                                                #
#     STRUCTURE OF '*.matrix' FILE:                                              #
#    * first column:   marker ID "A"                                             #
#    * second column:  marker ID "B"                                             #
#    * third column:   recombination value between markers "A" and "B"           #
#    * fourth column:  BIT score for markers "A" and "B"                         #
#    * fifth column:   datapoints value                                          #
#                     (fraction of datapoints for pair "A" and "B")              #
#                                                                                #
#     STRUCTURE OF '*.group_info' FILE:                                          #
#    * first column:   marker ID                                                 #
#    * second column:  length of an adjacency list for given marker or how many  #
#                      other markers are linked to given marker directly         #
#    * third column:   size of the given group (how many markers in this         #
#                      particular linked group)                                  #
#    * fourth column:  arbitrary group number                                    #
#    * fifth column:   visual mark ("*****" separates different group)           #
#    * sixth column:   information about framework markers                       #
#    * seventh column: type of graph [SINGLETON/LINKED/COMPLETE].                #
#      If node is a singleton (is not connected to any other node) then it is    #
#      labeled as 'SINGLE____NODE'                                               #
#      If nodes form complete graph (all nodes linked to each other directly)    #
#      then such group is labeled as 'COMPLETE_GRAPH'                            #
#      If group is not a complete graph (some nodes do not have direct links     #
#      or connections to other nodes then such group is labeled as               #
#      'LINKED___GROUP'.                                                         #
#    * eighth column:  type of node (SATURATED or DILUTED). If a node has all    #
#      possible connections to all other nodes in a group [in other words: node  #
#      is connected directly to all other nodes in a group] then such node is    #
#      labeled as 'SATURATED_NODE'. 'DILUTED___NODE' is an indication that a     #
#      group does not form complete graph. Group can be considered as a bin if   #
#      all nodes in a given group are SATURATED and graph is COMPLETE.           #
#  ____________________________________________________________________________  #
#                                                                                #
#  = CLUSTERING/GROUPING SUMMARY FOR ALL 16 ITERATIONS =                         #
#     [ DENDRO-CLUSTERING ]                                                      #
#     STRUCTURE OF *.x_tree_clust FILE:                                          #
#    * 1-st column: group ID for clustering with cutoff 0.20                     #
#    * 2-nd column: group ID for clustering with cutoff 0.18                     #
#    * 3-d  column: group ID for clustering with cutoff 0.16                     #
#    * 4-th column: group ID for clustering with cutoff 0.14                     #
#    * 5-th column: group ID for clustering with cutoff 0.12                     #
#    * 6-th column: group ID for clustering with cutoff 0.10                     #
#    * 7-th column: group ID for clustering with cutoff 0.09                     #
#    * 8-th column: group ID for clustering with cutoff 0.08                     #
#    * 9-th column: group ID for clustering with cutoff 0.07                     #
#    *10-th column: group ID for clustering with cutoff 0.06                     #
#    *11-th column: group ID for clustering with cutoff 0.05                     #
#    *12-th column: group ID for clustering with cutoff 0.04                     #
#    *13-th column: group ID for clustering with cutoff 0.03                     #
#    *14-th column: group ID for clustering with cutoff 0.02                     #
#    *15-th column: group ID for clustering with cutoff 0.01                     #
#    *16-th column: group ID for clustering with cutoff 0.00                     #
#    *17-th column: type of graph (COMPLETE or LINKED) for the last iteration    #
#    *18-th column: type of node (SATURATED or DILUTED) for the last iteration   #
#    *19-th column: '***' - visual mark                                          #
#    *20-th column: linkage group from frame work marker list                    #
#    *21-st column: position on the map of frame marker (if available)           #
#    *22-nd column: '***' - visual mark                                          #
#    *23-d  column: "ABC" (alphabetical) order of markers prior clustering       #
#    *24-th column: '***' - visual mark                                          #
#    *25-th column: "LG" - reserved field for manipulation in excel like editor  #
#    *26-th column: marker ID                                                    #
#    *27-th column: '*' - visual mark                                            #
#    *28-th column: sum of 'A' scores (A+D)                                      #
#    *29-th column: sum of 'B' scores (B+C)                                      #
#    *30-th column: total number of possible scores (number of RILs)             #
#    *31-st column: '*' - visual mark                                            #
#    *32-nd column: order of markers according to dendro-clustering              #
#                                                                                #
#  ____________________________________________________________________________  #
#                                                                                #
#                                                                                #
#                                                                                #
#   *** GROUP INFO FILE (iteration #16 madmapper_scores.loc.out.group_info_16):  #
#                                                                                #
# M1   1    7     1     *****   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M2   3    7     1     -----   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M3   4    7     1     -----   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M4   4    7     1     -----   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M5   1    7     1     -----   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M6   2    7     1     -----   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M7   3    7     1     -----   _NONE_  LINKED___GROUP_00001    DILUTED___NODE   #
# M8   0    1     2     *****   _NONE_  SINGLE____NODE_00002    SATURATED_NODE   #
# M9   0    1     3     *****   _NONE_  SINGLE____NODE_00003    SATURATED_NODE   #
# MF   0    1     4     *****   _NONE_  SINGLE____NODE_00004    SATURATED_NODE   #
# MG   0    1     5     *****   _NONE_  SINGLE____NODE_00005    SATURATED_NODE   #
# MH   4    5     6     *****   _NONE_  COMPLETE_GRAPH_00006    SATURATED_NODE   #
# MI   4    5     6     -----   _NONE_  COMPLETE_GRAPH_00006    SATURATED_NODE   #
# MJ   4    5     6     -----   _NONE_  COMPLETE_GRAPH_00006    SATURATED_NODE   #
# MK   4    5     6     -----   _NONE_  COMPLETE_GRAPH_00006    SATURATED_NODE   #
# ML   4    5     6     -----   _NONE_  COMPLETE_GRAPH_00006    SATURATED_NODE   #
# MN   0    1     7     *****   _NONE_  SINGLE____NODE_00007    SATURATED_NODE   #
# MP   0    1     8     *****   _NONE_  SINGLE____NODE_00008    SATURATED_NODE   #
# MR   0    1     9     *****   _NONE_  SINGLE____NODE_00009    SATURATED_NODE   #
# MS   0    1    10     *****   _NONE_  SINGLE____NODE_00010    SATURATED_NODE   #
# MT   0    1    11     *****   _NONE_  SINGLE____NODE_00011    SATURATED_NODE   #
# MV   0    1    12     *****   _NONE_  SINGLE____NODE_00012    SATURATED_NODE   #
# MW   0    1    13     *****   _NONE_  SINGLE____NODE_00013    SATURATED_NODE   #
# MX   0    1    14     *****   _NONE_  SINGLE____NODE_00014    SATURATED_NODE   #
# MY   0    1    15     *****   _NONE_  SINGLE____NODE_00015    SATURATED_NODE   #
# MZ   0    1    16     *****   _NONE_  SINGLE____NODE_00016    SATURATED_NODE   #
#                                                                                #
#   NOTE THE MAJOR DIFFERENCE BETWEEN GROUP 1 AND GROUP 6:                       #
#       GROUP 1 is a 'LINKED GROUP'                                              #
#       GROUP 6 is a 'COMPLETE GRAPH'                                            #
#                                                                                #
#                                                                                #
##################################################################################

def Set_CutOff_Values_Type_1():

	### GLOBAL ###
	global cutoff_value_1
	global cutoff_value_2
	global cutoff_value_3
	global cutoff_value_4
	global cutoff_value_5
	global cutoff_value_6

	global cutoff_value_7
	global cutoff_value_8
	global cutoff_value_9
	global cutoff_value_A
	global cutoff_value_B
	global cutoff_value_C

	global cutoff_value_D
	global cutoff_value_E
	global cutoff_value_F
	global cutoff_value_G
	global cutoff_value_H
	global cutoff_value_I

	global cutoff_value_J
	global cutoff_value_K
	global cutoff_value_L
	global cutoff_value_M
	global cutoff_value_N
	global cutoff_value_O

	### VALUES ###
	cutoff_value_1 = 0.800
	cutoff_value_2 = 0.600
	cutoff_value_3 = 0.500
	cutoff_value_4 = 0.400
	cutoff_value_5 = 0.300
	cutoff_value_6 = 0.250

	cutoff_value_7 = 0.200
	cutoff_value_8 = 0.180
	cutoff_value_9 = 0.160
	cutoff_value_A = 0.140
	cutoff_value_B = 0.120
	cutoff_value_C = 0.100

	cutoff_value_D = 0.090
	cutoff_value_E = 0.080
	cutoff_value_F = 0.070
	cutoff_value_G = 0.060
	cutoff_value_H = 0.050
	cutoff_value_I = 0.040

	cutoff_value_J = 0.030
	cutoff_value_K = 0.020
	cutoff_value_L = 0.015
	cutoff_value_M = 0.010
	cutoff_value_N = 0.005
	cutoff_value_O = 0.000

def Set_CutOff_Values_Type_2():

	### GLOBAL ###
	global cutoff_value_1
	global cutoff_value_2
	global cutoff_value_3
	global cutoff_value_4
	global cutoff_value_5
	global cutoff_value_6

	global cutoff_value_7
	global cutoff_value_8
	global cutoff_value_9
	global cutoff_value_A
	global cutoff_value_B
	global cutoff_value_C

	global cutoff_value_D
	global cutoff_value_E
	global cutoff_value_F
	global cutoff_value_G
	global cutoff_value_H
	global cutoff_value_I

	global cutoff_value_J
	global cutoff_value_K
	global cutoff_value_L
	global cutoff_value_M
	global cutoff_value_N
	global cutoff_value_O

	### VALUES ###
	cutoff_value_1 = 0.600
	cutoff_value_2 = 0.500
	cutoff_value_3 = 0.400
	cutoff_value_4 = 0.350
	cutoff_value_5 = 0.300
	cutoff_value_6 = 0.250

	cutoff_value_7 = 0.225
	cutoff_value_8 = 0.200
	cutoff_value_9 = 0.175
	cutoff_value_A = 0.150
	cutoff_value_B = 0.125
	cutoff_value_C = 0.100

	cutoff_value_D = 0.090
	cutoff_value_E = 0.080
	cutoff_value_F = 0.070
	cutoff_value_G = 0.060
	cutoff_value_H = 0.050
	cutoff_value_I = 0.040

	cutoff_value_J = 0.030
	cutoff_value_K = 0.020
	cutoff_value_L = 0.015
	cutoff_value_M = 0.010
	cutoff_value_N = 0.005
	cutoff_value_O = 0.000

def Set_CutOff_Values_Type_3():

	### GLOBAL ###
	global cutoff_value_1
	global cutoff_value_2
	global cutoff_value_3
	global cutoff_value_4
	global cutoff_value_5
	global cutoff_value_6

	global cutoff_value_7
	global cutoff_value_8
	global cutoff_value_9
	global cutoff_value_A
	global cutoff_value_B
	global cutoff_value_C

	global cutoff_value_D
	global cutoff_value_E
	global cutoff_value_F
	global cutoff_value_G
	global cutoff_value_H
	global cutoff_value_I

	global cutoff_value_J
	global cutoff_value_K
	global cutoff_value_L
	global cutoff_value_M
	global cutoff_value_N
	global cutoff_value_O

	### VALUES ###
	cutoff_value_1 = 0.600
	cutoff_value_2 = 0.550
	cutoff_value_3 = 0.500
	cutoff_value_4 = 0.450
	cutoff_value_5 = 0.400
	cutoff_value_6 = 0.350

	cutoff_value_7 = 0.300
	cutoff_value_8 = 0.275
	cutoff_value_9 = 0.250
	cutoff_value_A = 0.225
	cutoff_value_B = 0.200
	cutoff_value_C = 0.175

	cutoff_value_D = 0.150
	cutoff_value_E = 0.140
	cutoff_value_F = 0.130
	cutoff_value_G = 0.120
	cutoff_value_H = 0.110
	cutoff_value_I = 0.100

	cutoff_value_J = 0.075
	cutoff_value_K = 0.050
	cutoff_value_L = 0.025
	cutoff_value_M = 0.020
	cutoff_value_N = 0.010
	cutoff_value_O = 0.000

def Read_Data_File(in_name, out_name, rec_cut, bit_cut, dat_cut, frame_file_name):

        ### GLOBAL ###

	### CUTOFF VALUES ###
	global cutoff_value_1
	global cutoff_value_2
	global cutoff_value_3
	global cutoff_value_4
	global cutoff_value_5
	global cutoff_value_6

	global cutoff_value_7
	global cutoff_value_8
	global cutoff_value_9
	global cutoff_value_A
	global cutoff_value_B
	global cutoff_value_C

	global cutoff_value_D
	global cutoff_value_E
	global cutoff_value_F
	global cutoff_value_G
	global cutoff_value_H
	global cutoff_value_I

	global cutoff_value_J
	global cutoff_value_K
	global cutoff_value_L
	global cutoff_value_M
	global cutoff_value_N
	global cutoff_value_O
	#####################

	global max_diff_per_point

	#####################

	global abs_loss
	global round_scale
	global map_construction

	###################

	print "============================================="
	print "    RUN PARAMETERS:  "
	print " 1. INPUT  FILE:  " + in_name
	print " 2. OUTPUT FILE:  " + out_name
	print " 3. RECM CUTOFF:  " + str(rec_cut)
	print " 4. BITS CUTOFF:  " + str(bit_cut)
	print " 5. DATA CUTOFF:  " + str(dat_cut)
	print " 7. MISSING DAT:  " + str(abs_loss)
	print " 8. FRAME  LIST:  " + frame_file_name

	in_file  = open(in_name,  "rb")
	if map_construction != "NR_SET":
		out_file1 = open(out_name + '.pairs_all', "wb")
	############# GOOD AND BAD SET ###############
	out_file4 = open(out_name + '.set_uniq', "wb")
	out_file5 = open(out_name + '.set_dupl', "wb")
	##############################################
	out_file6 = open(out_name + '.x_log_file', "wb")
	##############################################
	global out_file8
	if map_construction != "NR_SET":
		out_file8 = open(out_name + '.x_tree_clust', "wb")
	##############################################
	# if map_construction == "TRUE":
	out_file3a = open(out_name + '.z_nr_scores.loc', "wb")
	out_file3b = open(out_name + '.z_scores_dupl', "wb")
	##############################################
	###      TWO DIMENSIONAL MATRIX            ###
	##############################################
	out_file1_2d = open(out_name + '.pairs_all.2D_Matrix', "wb")

	out_file6.write("=============================================" + '\n')
	out_file6.write("    RUN PARAMETERS: " + '\n')
	out_file6.write(" 1. INPUT  FILE:  " + in_name + '\n')
	out_file6.write(" 2. OUTPUT FILE:  " + out_name + '\n')
	out_file6.write(" 3. RECM CUTOFF:  " + str(rec_cut) + '\n')
	out_file6.write(" 4. BITS CUTOFF:  " + str(bit_cut) + '\n')
	out_file6.write(" 5. DATA CUTOFF:  " + str(dat_cut) + '\n')
	out_file6.write(" 7. MISSING DAT:  " + str(abs_loss) + '\n')
	out_file6.write(" 8. FRAME  LIST:  " + frame_file_name + '\n')

	time.sleep(2)

	global id_list
	global id_array
	global pairs_array
	global pairs_array_N
	global frame_array
	global tree_clust_array
	global graph_depth
	global marker_depth
	global sequence_array_bin
	global init_len
	global nr_good_list

	id_list = []		; # LIST OF ALL NON-REDUNDANT IDs
	id_array = {}		; # TO CHECK AND CREATE NON-REDUNDANT LIST
	matrix_array = {}	; # PAIRWISE DATA FOR GOOD PAIRS ( <= 0.8 )

	########################### EXAMPLE DEFAULT VALUES 
	matrix_array_1 = {}	; # 0.800  01
	matrix_array_2 = {}	; # 0.600  02
	matrix_array_3 = {}	; # 0.500  03
	matrix_array_4 = {}	; # 0.400  04
	matrix_array_5 = {}	; # 0.300  05
	matrix_array_6 = {}	; # 0.250  06
	matrix_array_7 = {}	; # 0.200  07
	matrix_array_8 = {}	; # 0.180  08
	matrix_array_9 = {}	; # 0.160  09
	matrix_array_A = {}	; # 0.140  10
	matrix_array_B = {}	; # 0.120  11
	matrix_array_C = {}	; # 0.100  12
	matrix_array_D = {}	; # 0.090  13
	matrix_array_E = {}	; # 0.080  14
	matrix_array_F = {}	; # 0.070  15
	matrix_array_G = {}	; # 0.060  16
	matrix_array_H = {}	; # 0.050  17
	matrix_array_I = {}	; # 0.040  18
	matrix_array_J = {}	; # 0.030  19
	matrix_array_K = {}	; # 0.020  20
	matrix_array_L = {}	; # 0.015  21
	matrix_array_M = {}	; # 0.010  22
	matrix_array_N = {}	; # 0.005  23
	matrix_array_O = {}	; # 0.000  24
	##################################################

	pairs_array  = {}	; # ARRAY OF ALL POSITIVE PAIRS IN THE MATRIX
	pair_counter = 0	; # COUNTER OF POSITIVE PAIRS IN THE MATRIX
	sequence_array = {}	; # ARRAY OF ALL SEQUENCES
	sequence_array_bin = {}	; # ARRAY OF ALL SEQUENCES IN BINARY MODE: A or B and -
	frame_array = {}	; # ARRAY OF FRAME MARKERS
	frame_position = {}	; # ARRAY WITH FRAME MARKERS COORDINATES
	bit_sum_array = {}	; # SUMMARY OF BIT SCORES
	rec_sum_array = {}	; # SUMMARY OF REC SCORES
	bit_abs_array = {}	; # SUMMARY OF BIT SCORES
	rec_abs_array = {}	; # SUMMARY OF REC SCORES
	tree_clust_array = {}	; # SUMMARY OF CLUSTERING ARRAY

	nr_scores_array = {}	; # ARRAY NON-REDUNDANT MARKER SCORES I
	nr_markers_array = {}	; # ARRAY NON-REDUNDANT MARKER SCORES II
	all_scores_array = {}	; # ARRAY WITH ALL MARKER SCORES

	graph_depth  = {}	; # TYPE OF GRAPH: COMPLETE or CONNECTED
	marker_depth = {}	; # MARKER DEPTH: IS IT CONNECTED DIRECTLY TO ALL OTHER MARKERS IN A GROUP?

	global print_frame
	print_frame = "FALSE"
	### TRY TO READ FRAME MARKERS LIST ###
	### README_001 ###
	try:
		frame_file = open(frame_file_name, "rb")
		print "USING FRAME MARKERS LIST"
		while 1:
			u = frame_file.readline()
			if u == '':
				break
			if '\n' in u:
				u = u[:-1]
			if '\r' in u:
				u = u[:-1]
			u = u.split('\t')
			fm = u[1]
			fl = u[0]
			try:
				fp = u[2]
			except:
				fp = "-"
			frame_array[fm] = fl
			frame_position[fm] = fp
			print_frame = "TRUE"
	except:
		print "DID NOT FIND FRAME MARKERS FILE:  " + frame_file_name
		print "WORKING WITHOUT FRAME MARKERS LIST"
		# continue

	### READ DATA FILE ###
	print "============================================="
	time.sleep(2)
	out_file6.write("=============================================" + '\n')
	n = 0
	d = 0
	l = 1
	init_len = 10000
	duplic_id = []
	while 1:
		### README_001 ###
		t = in_file.readline()
		if t == '':
			break
		if '\n' in t:
			t = t[:-1]
		if '\r' in t:
			t = t[:-1]
		tl = t.split('\t')
		curr_len = len(tl)
		if tl[0][0] == ";":
			print "============================================="
			print tl
			print "JUNK LINE"
			print "============================================="
			out_file4.write(t + '\n')
			out_file3a.write(t + '\n')
			time.sleep(2)
		if l == 1 and curr_len >= 12 and tl[0][0] != ";":
			init_len = curr_len
		if curr_len == init_len and tl[0][0] != ";":
			####
			id = tl[0]
			# if id not in id_list:
			try:
				id_test = id_array[id]
				duplic_id.append(id)
				out_file5.write(t + '\n')
				print '\n'
				print id + "    IS DUPLICATED -=- CHECK DATA FOR DUPLICATION"
				out_file6.write( id + "    IS DUPLICATED -=- CHECK DATA FOR DUPLICATION" + '\n')
				d = d + 1
				time.sleep(2)
			except:
				id_array[id] = 1
				id_list.append(id)
				out_file4.write(t + '\n')
				q = 1

				## COLLECTING DATAPOINTS
				while q < init_len:
					data_point = tl[q]
					try:
						point_value = float(data_point)
						test_float = "TRUE"
					except:
						test_float = "FALSE"

					if test_float == "TRUE":
						### DIGITAL DATA ###
						sequence_array[id,q] = point_value

					if test_float == "FALSE":
						### MISSING DATA ###
						sequence_array[id,q] = "-"

					q = q + 1
				sys.stdout.write(".")
				n = n + 1
			l = l + 1
		if curr_len != init_len and l > 1:
			print '\n'
			print "WRONG NUMBER OF DATA POINTS"
			print "CHECK LINE:  " + `l` + '\n'
			print t
			print "============================================="
			print "TERMINATED.........."
			print "============================================="
			out_file6.write("TERMINATED")
			out_file6.write(".........." + '\n')
			sys.exit()

	print '\n'
	print "============================================="
	print `n` + " UNIQ IDs IN THE SET FOUND"
	print `d` + " IDs ARE DUPLICATED"
	out_file6.write("=============================================" + '\n')
	out_file6.write(`n` + " UNIQ IDs IN THE SET FOUND" + '\n')
	out_file6.write(`d` + " IDs ARE DUPLICATED" + '\n')

	duplic_id.sort()
	print duplic_id
	out_file6.write("=============================================" + '\n')

	print "CONTINUE ANALYSIS WITH " + `n` + " SEQUENCES OUT OF " + `n+d`
	out_file6.write("CONTINUE ANALYSIS WITH " + `n` + " SEQUENCES OUT OF " + `n+d` + '\n')
	print "============================================="
	print "SUCCESS!!!"
	print "============================================="
	out_file6.write("=============================================" + '\n')

	time.sleep(2)

	######### PAIRWISE COMPARISON #########

	## SORTED ID LIST ##
	id_list.sort()
	## IS IT BETTER NOT TO SORT ? ##

	### SET SUMMARY VALUES FOR MARKERS ####

	nr_scores_list = []
	nr_marker_list = []
	nr_good_list   = []
	really_bad     = []	; # ALL BAD MARKERS
	really_bad_data   = []	; # BAD WITH LOSS OF DATA
	r_count = 0
	dupl_markers_list = {}
	good_string = {}

	### SET ZERO FOR MAX DIFF VALUE ###
	max_diff_per_point = 0

	for item in id_list:
		tree_clust_array[item] = []
		### REMOVE ### rec_abs_array[item] = 0
		### REMOVE ### bit_abs_array[item] = 0

	### START ###

	dummy_cat = 1
	dummy_len = len(id_list)
	dummy_value = dummy_len*dummy_len
	print `dummy_value` + "  PAIRWISE COMPARISONS HAVE TO BE DONE"

	### README_008 ###
	count_item_a = 0
	for item_a in id_list:
		count_item_a = count_item_a + 1
		print item_a + "   -=-   " + `count_item_a` + "  MAX DIFF IS: " + str(round(max_diff_per_point,6))
		for item_b in id_list:
			p = 1
			diff_score = 0
			data_loss = 0
			data_points = 0

			line_tick_update = math.fmod(dummy_cat, 1000)
			if line_tick_update == 0:
				print `dummy_cat` + "  COMPARISONS DONE OUT OF  " + `dummy_value`

			while p < init_len:

				score_a = sequence_array[item_a,p]
				score_b = sequence_array[item_b,p]

				try:
					test_float_a = float(score_a)
				except:
					test_float_a = "-"
				try:
					test_float_b = float(score_b)
				except:
					test_float_b = "-"

				if test_float_a != "-" and test_float_b != "-":
					### PRINT DATA ###
					### print `score_a` + " :SCORE A  -=-  SCORE B: " + `score_b`
					diff_score_ab = score_a - score_b
					diff_score = diff_score + abs(diff_score_ab)
					### print "DIFF VALUE A-B :  " + `diff_score_ab` + "  |||  TOTAL DIFF: " + `diff_score`
					### print ""
					data_points = data_points + 1
				if test_float_a == "-" or test_float_b == "-":
					### print ".....MISSING DATA....."
					### print ""
					data_loss = data_loss + 1
				#####################################

				p = p + 1

			dummy_cat = dummy_cat + 1
			### DATA POINT TEST ###
			if data_points + data_loss != init_len - 1:
				print "...SOMETHING IS WRONG..."
				sys.exit()

			### README_009 ###
			### ANALYZE ONLY VALUABLE CRAP ###
			data_fr = data_points*1.00/(data_points + data_loss)
			# data_fr = round(data_fr,round_scale)
			data_fr = round(data_fr,4)
			data_fr_str = str(data_fr)
			if data_points >= dat_cut:

				### MATRIX DATA  ###

				diff_per_point = diff_score/data_points
				diff_per_point = round(diff_per_point,round_scale)
				diff_per_point_str = str(diff_per_point)

				if diff_per_point > max_diff_per_point:
					max_diff_per_point = diff_per_point
					print "FOUND NEW MAX DIFF VALUE:  " + str(round(max_diff_per_point,6)) + "  FOR PAIR: " + item_a + " -=- " + item_b

				diff_score = round(diff_score,round_scale)
				diff_score_str = str(diff_score)

				pair_counter = pair_counter + 1
				current_pair = [item_a, item_b]
				pairs_array[pair_counter] = current_pair
				matrix_values = [diff_score, data_points, diff_per_point]
				matrix_array[item_a,item_b] = matrix_values
				#### REALLY DUMMY SOLUTION ####
				matrix_array_1[item_a,item_b] = matrix_values
				matrix_array_2[item_a,item_b] = matrix_values
				matrix_array_3[item_a,item_b] = matrix_values
				matrix_array_4[item_a,item_b] = matrix_values
				matrix_array_5[item_a,item_b] = matrix_values
				matrix_array_6[item_a,item_b] = matrix_values
				matrix_array_7[item_a,item_b] = matrix_values
				matrix_array_8[item_a,item_b] = matrix_values
				matrix_array_9[item_a,item_b] = matrix_values
				matrix_array_A[item_a,item_b] = matrix_values
				matrix_array_B[item_a,item_b] = matrix_values
				matrix_array_C[item_a,item_b] = matrix_values
				matrix_array_D[item_a,item_b] = matrix_values
				matrix_array_E[item_a,item_b] = matrix_values
				matrix_array_F[item_a,item_b] = matrix_values
				matrix_array_G[item_a,item_b] = matrix_values
				matrix_array_H[item_a,item_b] = matrix_values
				matrix_array_I[item_a,item_b] = matrix_values
				matrix_array_J[item_a,item_b] = matrix_values
				matrix_array_K[item_a,item_b] = matrix_values
				matrix_array_L[item_a,item_b] = matrix_values
				matrix_array_M[item_a,item_b] = matrix_values
				matrix_array_N[item_a,item_b] = matrix_values
				matrix_array_O[item_a,item_b] = matrix_values
				### README_010 ###
				out_file1.write(item_a + '\t' + item_b + '\t' + diff_score_str + '\t' + \
					"*[DFR>*" + '\t' + data_fr_str + '\t' + "*[DPP>*" + '\t' + diff_per_point_str + '\t' + "*[DSM>*" + \
					'\t' + `data_points` + '\t' + `data_loss` + '\t' + \
					`data_points + data_loss` + '\n')
				########################################

			# if data_fr < dat_cut:
			if data_points < dat_cut:
				print "DATA POINTS ARE LESS THAN CUTOFF FOR PAIR: " + item_a + " " + item_b + " " + \
										`data_points` + "   " + data_fr_str
			########################################

		#####################
		sys.stdout.write(".")
		#####################
	print ""
	print "============================================="
	print "GLOBAL MATRIX CREATED"
	print "============================================="

	print ""
	print "============================================="
	print "MAX DIFF VALUE:  " + str(round(max_diff_per_point,6))
	print "============================================="

	#### POSITIVE CLUSTERING ####

	### README_014 ###

	print ""
	print "STARTING POSITIVE CLUSTERING"
	print "============================================="

	global out_file14

	out_file14 = open(out_name + '.group_info' + "_Summary", "wb")

	time.sleep(2)

	Seqs_Clustering(cutoff_value_1, "01", matrix_array_1)
	print "ROUND 1 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 01)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 01)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_2, "02", matrix_array_2)
	print "ROUND 2 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 02)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 02)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_3, "03", matrix_array_3)
	print "ROUND 3 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 03)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 03)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_4, "04", matrix_array_4)
	print "ROUND 4 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 04)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 04)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_5, "05", matrix_array_5)
	print "ROUND 5 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 05)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 05)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_6, "06", matrix_array_6)
	print "ROUND 6 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 06)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 06)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_7, "07", matrix_array_7)
	print "ROUND 7 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 07)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 07)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_8, "08", matrix_array_8)
	print "ROUND 8 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 08)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 08)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_9, "09", matrix_array_9)
	print "ROUND 9 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 09)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 09)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_A, "10", matrix_array_A)
	print "ROUND 10 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 10)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 10)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_B, "11", matrix_array_B)
	print "ROUND 11 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 11)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 11)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_C, "12", matrix_array_C)
	print "ROUND 12 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 12)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 12)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_D, "13", matrix_array_D)
	print "ROUND 13 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 13)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 13)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_E, "14", matrix_array_E)
	print "ROUND 14 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 14)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 14)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_F, "15", matrix_array_F)
	print "ROUND 15 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 15)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 15)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_G, "16", matrix_array_G)
	print "ROUND 16 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 16)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 16)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_H, "17", matrix_array_H)
	print "ROUND 17 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 17)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 17)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_I, "18", matrix_array_I)
	print "ROUND 18 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 18)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 18)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_J, "19", matrix_array_J)
	print "ROUND 19 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 19)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 19)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_K, "20", matrix_array_K)
	print "ROUND 20 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 20)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 20)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_L, "21", matrix_array_L)
	print "ROUND 21 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 21)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 21)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_M, "22", matrix_array_M)
	print "ROUND 22 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 22)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 22)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_N, "23", matrix_array_N)
	print "ROUND 23 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 23)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 23)" + '\n')

	time.sleep(2)

	Seqs_Clustering(cutoff_value_O, "24", matrix_array_O)
	print "ROUND 24 DONE"
	print `group_count` + "  GROUPS WERE FOUND (STEP 24)"
	out_file6.write(`group_count` + "  GROUPS WERE FOUND (STEP 24)" + '\n')

	### NEW ROUNDS ###

	##################

	time.sleep(2)

	out_file6.write("=============================================" + '\n')

	### GROUP LIST ###
	f = 0
	for item in id_list:
		### TRY FRAME MARKER ID ###
		if print_frame == "TRUE":
			try:
				frame_group = frame_array[item]
			except:
				frame_group = "-"
			try:
				frame_coords = frame_position[item]
			except:
				frame_coords = "-"
		if print_frame == "FALSE":
			frame_group  = "-"
			frame_coords = "-"
		tree_clust_array[item].append("***")
		tree_clust_array[item].append(frame_group)
		tree_clust_array[item].append(frame_coords)
		tree_clust_array[item].append("***")
		tree_clust_array[item].append(str(f))
		tree_clust_array[item].append("***")
		tree_clust_array[item].append("LG")
		tree_clust_array[item].append(item)
		tree_clust_array[item].append("*")
		tree_clust_array[item].append("=1=")
		tree_clust_array[item].append("=2=")
		tree_clust_array[item].append("=3=")
		tree_clust_array[item].append("*")
		f = f + 1
		print tree_clust_array[item]

	print ""
	print "============================================="	
	print "            FINAL STEP                       "
	print "       PROCESSING MEGALIST                   "
	print "         DENDRO SORTING                      "
	print "============================================="
	print ""

	time.sleep(2)

	### MEGA LIST ###
	mega_list = []
	for item in id_list:
		mega_list.append(tree_clust_array[item])
	# print mega_list
	mega_list.sort()
	f = 0
	for item in mega_list:
		current_list = []
		item.append(str(f))
		for subitem in item:
			subitem = str(subitem)
			current_list.append(subitem)
		current_list = "\t".join(current_list)
		print current_list
		out_file8.write(current_list + '\n')
		f = f + 1

	### TWO DIMENSIONAL MATRIX ###

	out_file1_2d.write(";" + '\t')

	mega_length = len(mega_list)

	### FIRST ROW ###
	counter = 0
	for item in mega_list:
		current_id = item[33]
		out_file1_2d.write(current_id)
		counter = counter + 1
		if counter < mega_length:
			out_file1_2d.write('\t')
		if counter == mega_length:
			out_file1_2d.write('\n')

	### PAIRWISE DATA ROWS ###
	for item in mega_list:
		current_id = item[33]
		out_file1_2d.write(current_id + '\t')
		counter = 0
		for other_item in mega_list:
			counter = counter + 1
			other_id = other_item[33]
			try:
				cur_diff = float(matrix_array[current_id,other_id][2])
				cur_diff_fr = cur_diff/max_diff_per_point
				cur_value = str(round(cur_diff_fr,round_scale))
			except:
				cur_value = "-"
			out_file1_2d.write(cur_value)
			if counter < mega_length:
				out_file1_2d.write('\t')
			if counter == mega_length:
				out_file1_2d.write('\n')


	##########################################

	print ""
	print "============================================="	
	print "            ANALYSIS DONE                    "
	print "            ENJOY MAPPING                    "
	print "============================================="
	print ""

	in_file.close()
	out_file1.close()
	out_file4.close()
	out_file5.close()
	out_file6.close()
	out_file8.close()
	out_file14.close()
	out_file3a.close()
	out_file3b.close()
	out_file1_2d.close()
	if map_construction == "TRUE":
		out_file3c.close()
		out_file3d.close()
		out_file3e1.close()
		out_file3e2.close()
		out_file3f.close()
		out_file3g.close()
		out_file3h.close()

#### POSITIVE CLUSTERING ####

def Seqs_Clustering(cutoff_value, x_counter, matrix_array_current):

	global max_diff_per_point

	global id_list
	global id_array
	global adj_array
	global already_done
	global group_count
	global dfs_counter
	global working_group
	global group_depth
	global node_count
	global node_array
	global sequence_array
	global sequence_array_bin
	global tree_clust_array
	global nr_good_list

	global pairs_array
	global frame_array
	global print_frame
	global round_scale

	global graph_depth
	global marker_depth

	global out_file14
	global out_file8

	global init_len

	node_array = {}
	adj_array    = {}
	already_done = []
	group_count = 0
	pair_matrix_count = 0
	pair_matrix_array = {}

	print ""
	print "ALREADY DONE"
	print already_done
	print ""
	print ""
	print "MATRIX ARRAY"
	print len(matrix_array_current)
	print ""
	time.sleep(2)

	out_file10 = open(out_name + '.matrix' + "_" + x_counter, "wb")
	out_file12 = open(out_name + '.adj_list' + "_" + x_counter, "wb")
	out_file13 = open(out_name + '.group_info' + "_" + x_counter, "wb")

	out_file14.write("### CLUSTERING RESULTS ITERATION " + x_counter + " CUTOFF: " + str(cutoff_value) + " ###" + '\n')

	########################################
	###          CLUSTERING              ###
	########################################

	# print id_list
	print "------------------------------"
	print "FOUND " + `len(id_list)` + " UNIQ IDs"
	print "------------------------------"

	r = 0

	### CREATE NON-REDUNDANT MATRIX ###

	for key in pairs_array:
		id_a = pairs_array[key][0]
		id_b = pairs_array[key][1]
		try:
			cur_data1 = int(matrix_array_current[id_a,id_b][1])
			cur_diff1 = float(matrix_array_current[id_a,id_b][2])
			cur_diff1_fr = cur_diff1/max_diff_per_point
			query1 = 1
			print key
		except:
			# print "ALREADY PROCESSED"
			sys.stdout.write(".")
			query1 = 0

		try:
			cur_data2 = int(matrix_array_current[id_b,id_a][1])
			cur_diff2 = float(matrix_array_current[id_b,id_a][2])
			cur_diff2_fr = cur_diff2/max_diff_per_point
			r = r + 1
			# print `r` + " REVERSE PAIR FOUND"
			query2 = 1
		except:
			# print "NO REVERSE PAIR FOUND"
			query2 = 0

		if query1 == 1 and query2 == 0 and cur_diff1_fr <= cutoff_value and id_a != id_b:
			### STRING CONVERSION ###
			cur_diff1_str = str(round(cur_diff1,round_scale))
			cur_diff1_fr_str = str(round(cur_diff1_fr,round_scale))
			#########################
			out_file10.write(id_a + '\t' + id_b + '\t' + cur_diff1_str + '\t' + \
				`cur_data1` + '\t' + cur_diff1_fr_str + '\n')
			pair_matrix_count = pair_matrix_count + 1
			current_matrix_pair = [id_a, id_b]
			pair_matrix_array[id_a,id_b] = current_matrix_pair
			### NEED TO UNSET ARRAY
			try:
				del matrix_array_current[id_a,id_b]
			except:
				# print "ALREADY REMOVED"
				sys.stdout.write(".")

		if query1 == 1 and query2 == 1 and id_a != id_b:
			if cur_diff1_fr <= cur_diff2_fr:
				# print "CASE 1"
				if cur_diff1_fr <= cutoff_value:
					### STRING CONVERSION ###
					cur_diff1_str = str(round(cur_diff1,round_scale))
					cur_diff1_fr_str = str(round(cur_diff1_fr,round_scale))
					#########################
					out_file10.write(id_a + '\t' + id_b + '\t' + cur_diff1_str + '\t' + \
						`cur_data1` + '\t' + cur_diff1_fr_str + '\n')
					pair_matrix_count = pair_matrix_count + 1
					current_matrix_pair = [id_a, id_b]
					pair_matrix_array[id_a,id_b] = current_matrix_pair
					### NEED TO UNSET ARRAY
					try:
						del matrix_array_current[id_a,id_b]
						del matrix_array_current[id_b,id_a]
					except:
						# print "ALREADY REMOVED"
						sys.stdout.write(".")
			if cur_diff1_fr > cur_diff2_fr:
				# print "CASE 2"
				if cur_diff2_fr <= cutoff_value:
					### STRING CONVERSION ###
					cur_diff2_str = str(round(cur_diff2,round_scale))
					cur_diff2_fr_str = str(round(cur_diff2_fr,round_scale))
					#########################
					out_file10.write(id_a + '\t' + id_b + '\t' + cur_diff2_str + '\t' + \
						`cur_data2` + '\t' + cur_diff2_fr_str + '\n')
					pair_matrix_count = pair_matrix_count + 1
					current_matrix_pair = [id_b, id_a]
					pair_matrix_array[id_b,id_a] = current_matrix_pair
					### NEED TO UNSET ARRAY
					try:
						del matrix_array_current[id_a,id_b]
						del matrix_array_current[id_b,id_a]
					except:
						# print "ALREADY REMOVED"
						sys.stdout.write(".")

	print "-------------------------------------"
	print `pair_matrix_count` + " PAIRS IN REDUNDANT MATRIX"
	print "-------------------------------------"
	print "BEGIN CLUSTERING"

	time.sleep(2)

	item_count = 0
	id_list.sort()
	### CREATE ADJACENCY LIST ###
	for item in id_list:
		item_count = item_count + 1
		print `item_count` + '\t' + item

		item_list = [item]
		for key in pair_matrix_array:
			id_a = pair_matrix_array[key][0]
			id_b = pair_matrix_array[key][1]
			if id_a == item:
				item_list.append(id_b)
			if id_b == item:
				item_list.append(id_a)
		adj_array[item] = item_list
		item_string = " ".join(item_list)
		out_file12.write(item_string + '\n')

	print "GROUP ANALYSIS"
	time.sleep(2)

	### GROUP ANALYSIS ###
	node_count = 0
	for item in id_list:
		# if item not in already_done:
		try:
			node_test = node_array[item]
		except:
			node_array[item] = 1
			group_count = group_count + 1
			already_done.append(item)
			node_count = node_count + 1
			working_group = [item]
			current_adj_list = adj_array[item]
			current_adj_len = len(current_adj_list)

			q = 0
			while q <= (current_adj_len - 1):
				current_adj_item = current_adj_list[q]
				# print `q` + '\t' + current_adj_item
				if current_adj_item in already_done:
					go_to_dfs = 0
				# if current_adj_item not in already_done:
				try:
					node_test = node_array[current_adj_item]
				except:
					node_array[current_adj_item] = 1
					already_done.append(current_adj_item)
					node_count = node_count + 1
					if current_adj_item not in working_group:
						working_group.append(current_adj_item)
					go_to_dfs = 1
					dfs_counter = 0
					# print 'Processing Group:  ' + `group_count`
					### README_015 ###
					DFS_procedure(current_adj_item)
				q = q + 1
			# if item not in already_done:
			#	already_done.append(item)
			working_group.sort()
			# print working_group
			print 'Processing Group:  ' + `group_count`
			print 'Number of processed nodes:  ' + `node_count`
			i = 0
			group_suffix = str(group_count)
			suffix_len = len(group_suffix)
			if suffix_len < 5:
				if suffix_len == 1:
					group_suffix = "0000" + group_suffix
				if suffix_len == 2:
					group_suffix = "000"  + group_suffix
				if suffix_len == 3:
					group_suffix = "00"   + group_suffix
				if suffix_len == 4:
					group_suffix = "0"    + group_suffix
				if suffix_len == 5:
					group_suffix = group_suffix
			## GRAPH TYPE: CONNECTED OR COMPLETE
			## NODE  TYPE: SATURATED OR DILUTED
			graph_type = "COMPLETE_GRAPH_" + group_suffix
			graph_depth[group_count] = graph_type
			for egg in working_group:
				current_adj_list7 = adj_array[egg]
				current_adj_len7 = len(current_adj_list7)
				current_group_len7 = len(working_group)
				# marker_depth[egg] = "SATURATED_NODE"
				marker_depth[egg] = "UNDEFINED_NODE"
				if current_adj_len7 == current_group_len7:
					marker_depth[egg] = "SATURATED_NODE"
				if current_adj_len7 != current_group_len7:
					graph_type = "LINKED___GROUP_" + group_suffix
					graph_depth[group_count] = graph_type
					marker_depth[egg] = "DILUTED___NODE"
				if current_group_len7 == 1:
					graph_type = "SINGLE____NODE_" + group_suffix
					graph_depth[group_count] = graph_type
					# marker_depth[egg] = "SINGLE____NODE"
			for element in working_group:
				current_adj_list1 = adj_array[element]
				current_adj_len1 = len(current_adj_list1)
				current_group_len1 = len(working_group)
				### TRY FRAME MARKER ID ###
				if print_frame == "TRUE":
					try:
						frame_marker = frame_array[element]
						frame_marker = ' ' + frame_marker + '_LinkageGroup '
					except:
						frame_marker = " __LinkageGroup "
				if print_frame == "FALSE":
					frame_marker = "_NONE_"
				###########################
				# tree_clust_array[element].append(str(group_count))
				tree_clust_array[element].append(group_count)
				if x_counter == "24":
					tree_clust_array[element].append(graph_depth[group_count])
					tree_clust_array[element].append(marker_depth[element])
				print tree_clust_array[element]
				###########################
				### WRITE DATA TO OUTPUT GROUP-INFO FILES ###
				if i == 0:
					out_file13.write(element + '\t' + `(current_adj_len1 - 1)` + '\t' \
						+ `current_group_len1` + '\t' + `group_count` + '\t' + '*****' \
						+ '\t' + frame_marker + '\t' + graph_depth[group_count] + '\t' \
						+ marker_depth[element] + '\n')
					out_file14.write(element + '\t' + `(current_adj_len1 - 1)` + '\t' \
						+ `current_group_len1` + '\t' + `group_count` + '\t' + '*****' \
						+ '\t' + frame_marker + '\t' + "*" + x_counter + "*" + '\t' \
						+ graph_depth[group_count] + '\t' + marker_depth[element] + '\n')
				if i != 0:
					out_file13.write(element + '\t' + `(current_adj_len1 - 1)` + '\t' \
						+ `current_group_len1` + '\t' + `group_count` + '\t' + '-----' \
						+ '\t' + frame_marker + '\t' + graph_depth[group_count] + '\t' \
						+ marker_depth[element] + '\n')
					out_file14.write(element + '\t' + `(current_adj_len1 - 1)` + '\t' \
						+ `current_group_len1` + '\t' + `group_count` + '\t' + '-----' \
						+ '\t' + frame_marker + '\t' + "*" + x_counter + "*" + '\t' \
						+ graph_depth[group_count] + '\t' + marker_depth[element] + '\n')
				i = i + 1
				###########################
	print '======================'
	# print already_done
	print len(already_done)
	print `group_count` + '\t' + 'GROUPS FOUND'
	print '======================'

	#######################################

	out_file10.close()
	out_file12.close()
	out_file13.close()

#############################
### README_015 ###
def DFS_procedure(current_adj_item):

	global adj_array
	global already_done
	global group_count
	global dfs_counter
	global working_group
	global group_depth
	global node_count
	global node_array

	print "DFS" + '\t' + `dfs_counter`

	for key in adj_array:
		current_adj_list = adj_array[key]
		current_adj_len = len(current_adj_list)
		if current_adj_item in current_adj_list:
			q = 0
			while q <= (current_adj_len - 1):
				current_adj_item1 = current_adj_list[q]
				# print `q` + '\t' + current_adj_item1
				if current_adj_item1 in already_done:
					go_to_dfs = 0
				# if current_adj_item1 not in already_done:
				try:
					node_test = node_array[current_adj_item1]
				except:
					node_array[current_adj_item1] = 1
					already_done.append(current_adj_item1)
					node_count = node_count + 1
					if current_adj_item1 not in working_group:
						working_group.append(current_adj_item1)
					go_to_dfs = 1
					DFS_procedure(current_adj_item1)
					### HOW MANY TIMES LOOP INSIDE ITSELF ###
					dfs_counter = dfs_counter + 1
					print "DFS" + '\t' + `dfs_counter`
					group_depth = dfs_counter
				q = q + 1

	######################################################################

import math
import re
import sys
import string
import time

global map_construction
global double_limit
global allele_dist
global abs_loss
global round_scale

map_construction = "TRUE"
# map_construction = "FALSE"
##################

# round_scale = 2
# round_scale = 4
round_scale = 6
# round_scale = 8

if __name__ == "__main__":
	### README_006 ###
	if len(sys.argv) <= 10 or len(sys.argv) > 11:
		print "                                                                     "
		print "   PROGRAM  USAGE:                                                   "
		print "   MAD MAPPER TAKES 10 ARGUMENTS/OPTIONS IN THE FOLLOWING ORDER:     "
		print "  (1)input_file[LOC_DATA/MARKER SCORES]          (2)output_file[NAME]"
		print "  (3)rec_cut[0.2]          (4)bit_cut[100]       (5)data_cut[25]     "
		print "  (6)group_file[OPTIONAL]  (7)allele_dist[0.33]  (8)missing_data[50] "
		print "  (9)trio_analysis[TRIO/NOTRIO/NR_SET]           (10)double_cross[3] "
		print "                                                                     "
		print "   if group_file does not exist just enter [X]                       "
		print "                                                                     "
		print "   DEFAULT VALUES:    IN  OUT  0.2  100  25  X  0.33  50  TRIO  3    "
		print "                                                                     "
		input = raw_input("   TYPE \"HELP\" FOR HELP [ \"EXIT\" TO EXIT ] : ")
		if input == "HELP" or input == "help":
			print "                                                                             "
			print "              MAD MAPPER ARGUMENTS/OPTIONS - BRIEF EXPLANATION:              "
			print "                                                                             "
			print "       INPUT/OUTPUT FILES:                                                   "
			print "   [1] - Input File Name  (locus file with raw marker scores)                "
			print "   [2] - Output File Name (master name [prefix] for 80 or so output files)   "
			print "                                                                             "
			print "       CLUSTERING PARAMETERS (WILL AFFECT CLUSTERING/GROUPING ONLY):         "
			print "   [3] - Recombination Value (Haplotype Distance) cutoff:  0.20 - 0.25       "
			print "         (NOTE: TRIO analysis (see below) works with 0.2 rec_cut value only) "
			print "   [4] - BIT Score cutoff: 60-1000 [100 is default and highly recommended]   "
			print "         (Check README_MADMAPPER for BIT Scoring Matrix system and values)   "
			print "   [5] - Overlap Data cutoff (data_cut): minimum number of scores between    "
			print "         two markers to be compared to assign pairwise distance              "
			print "                                                                             "
			print "   [6] - Optional Frame Work Marker Map (very useful for clustering analysis "
			print "                              to assign new markers to known linkage groups) "
			print "                                                                             "
			print "       FILTERING PARAMETERS (WILL AFFECT MARKER FILTERING, CREATION OF GOOD  "
			print "                             NON-REDUNDANT SET OF MARKERS AND TRIO ANALYSIS):"
			print "   [7] - Allele Distortion: to filter markers with high allele distortion    "
			print "   [8] - Missing Data: how many missing scores are allowed per marker        "
			print "                                                                             "
			print "       TRIO (TRIPLET) ANALYSIS -                                             "
			print "       FINDING OF TIGHTLY LINKED MARKERS AND THEIR RELATIVE ORDER:           "
			print "   [9] - TRIO/NOTRIO (If TRIO option is chosen then TRIPLET analysis will    "
			print "                      take place. Is not recommended to use for large set    "
			print "                      markers, 1000 or greater)                              "
			print "         if NR_SET chosen there is no CLUSTERING at all,                     "
			print "            Non-Redundant locus file will be created only                    "
			print "   [10] - Number of Double Crossovers cutoff value for TRIPLET analysis:     "
			print "          3 - default for noisy data; 0 is recommended for perfect scores    "
			print "                                                                             "
			print "   CHECK README_MADMAPPER FOR DETAILED DESCRIPTION OF OPTIONS                "
			print "                           AND OUTPUT FILES FORMATS/STRUCTURE                "
			print "                                                                             "
			sys.exit()
		if input != "HELP" and input != "help":
			sys.exit()
		sys.exit()
	if len(sys.argv) == 11:
		in_name  = sys.argv[1]
		out_name = sys.argv[2]
		rec_cut  = sys.argv[3]
		bit_cut  = sys.argv[4]
		dat_cut  = sys.argv[5]
		frame_file_name = sys.argv[6]
		allele_d = sys.argv[7]
		missingd = sys.argv[8]
		trio_analysis = sys.argv[9]
		double_x = sys.argv[10]
		try:
			rec_cut = float(rec_cut)
		except:
			print ""
			print "3-d argument rec_cut must be a number between 0.20 - 0.25"
			print "    default:  0.2 is recommended                         "
			print ""
			sys.exit()
		try:
			bit_cut = int(bit_cut)
		except:
			print ""
			print "4-th argument bit_cut must be a number between 60 - 1000"
			print "    default:   100 is recommended                       "
			print ""
			sys.exit()
		# dat_cut = float(dat_cut)
		try:
			dat_cut = int(dat_cut)
		except:
			print ""
			print "5-th argument data_cut must be a number between 10 - 1000"
			print "    default:    25 is recommended                        "
			print "    default:    50 for large sets (50 is better than 25) "
			print ""
			sys.exit()
		try:
			allele_d = float(allele_d)
		except:
			print ""
			print "7-th argument allele_dist must be a number between 0.1 - 0.9"
			print "    default:  0.33 is recommended                           "
			print "              0.25 can be used with caution                 "
			print ""
			sys.exit()
		try:
			missingd = int(missingd)
		except:
			print ""
			print "8-th argument missing_data must be a number between 0 - 100"
			print "    default:  50 is recommended                            "
			print "    default:  10 for perfect scores                        "
			print ""
			sys.exit()
		try:
			double_x = int(double_x)
		except:
			print ""
			print "10-th argument double_cross must be a number between 0 - 25"
			print "    default:    3 for noisy data (mis-scored markers)      "
			print "    default:    0 for perfect scores                       "
			print ""
			sys.exit()

		if rec_cut > 0.25 or rec_cut < 0.2:
			print "======================================================"
			print "  Recombination cutoff must be between 0.25 and 0.20  "
			print "          default:  0.20 is recommended               "
			print "======================================================"
			sys.exit()

		# if dat_cut > 1.00 or dat_cut < 0.1:
		if dat_cut > 1000 or dat_cut < 10:
			print "======================================================"
			print "   DataPoints cutoff must be between 10 and 1000      "
			print "          default:   25 is recommended                "
			print "          default:   50 for perfect scores            "
			print "======================================================"
			sys.exit()

		if bit_cut > 1000 or bit_cut < 60:
			print "======================================================"
			print "     BIT Score cutoff must be between 1000 and 60     "
			print "          default:  100 is recommended                "
			print "======================================================"
			sys.exit()

		if allele_d < 0.1 or allele_d > 0.9:
			print "======================================================"
			print "     allele distortion must be between 0.1 and 0.9    "
			print "          default:  0.33 is recommended               "
			print "                    0.25 can be used with caution     "
			print "======================================================"
			sys.exit()

		if missingd < 0 or missingd > 100:
			print "======================================================"
			print "     missing data must be between 0 and 100           "
			print "          default:   50 is recommended                "
			print "          default:   10 for perfect scores            "
			print "======================================================"
			sys.exit()

		if double_x < 0 or double_x > 25:
			print "======================================================"
			print "  double rec trio cutoff must be between 0 and 25     "
			print "          default:   3 for noisy data                 "
			print "          default:   0 for perfect scores             "
			print "======================================================"
			sys.exit()

		if trio_analysis != "TRIO" and trio_analysis != "NOTRIO" and trio_analysis != "NR_SET":
			print "======================================================"
			print "      trio_analysis must be TRIO or NOTRIO            "
			print "     for large set of markers (greater than 1000)     "
			print "  TRIO analysis may work really long (several hours)  "
			print "======================================================"
			sys.exit()

		if trio_analysis == "TRIO":
			map_construction = "TRUE"
		if trio_analysis == "NOTRIO":
			map_construction = "FALSE"
		if trio_analysis == "NR_SET":
			map_construction = "NR_SET"

		allele_dist = allele_d
		abs_loss = missingd
		double_limit = double_x

		sys.setrecursionlimit(10000)

		# Set_CutOff_Values_Type_1()
		Set_CutOff_Values_Type_2()
		# Set_CutOff_Values_Type_3()

		Read_Data_File(in_name, out_name, rec_cut, bit_cut, dat_cut, frame_file_name)

#### THE END ####
