/usr/local/CPAN/Bioinf/Bioinf.pm


package Bioinf;
require 5.000;
require Exporter;
use Carp;

@ISA = qw(Exporter);
@EXPORT= qw( ISS_server ONE_TO_THREE_LETTER One_To_Three_Letter Richardson_alpha_matrix
        Roman Sheraga_alpha_matrix abs_numerically add_columns
        add_ranges_in_msp_line amino_acid_compos_id_percent amino_acid_compos_id_percent_trend amino_acid_homology_matrix
        arabic array_average array_chk array_least_occur
        array_median array_most_occur array_sum ask_for_ENV_vars
        assign_options_to_variables attach_classification_to_pdb_seq average_array average_of_array
        beep bla_to_msf break_down_clu_file by_values
        calc_compos_id_hash calc_factorial calculate_protein_volume capitalize_sentence
        capitalize_word cc check_common_elements_in_array check_file_exists_in_path
        check_homology_of_seq_pair check_if_defined check_if_files_exist check_if_sec_str_form_hash
        check_input_file_extension check_linkage_of_2_similar_seqlet_sets check_parf_files chop_word
        cls clu_to_sso_to_msp cluster_merged_seqlet_sets com_gap_pos_hash
        common_compos_2_hash common_compos_id_hash compare_sec_template_with_db compos_id_percent_array
        compos_id_percent_hash composition_table compress_files_by_gzip condense_number_string
        condense_script convert_1_to_3_letter convert_3_to_1_letter convert_arr_and_str_2_hash
        convert_array_to_hash convert_bla_multaln_to_msf convert_bla_to_msf convert_bla_to_msp
        convert_char_to_0_or_1_hash convert_clu_to_msp convert_clu_to_sso_to_msp convert_dna_to_protein
        convert_hmmls_to_msp_files convert_mmp_to_mrg convert_msp_line_to_mmp_line convert_num_0_or_1_hash_opposite
        convert_num_to_0_or_1_hash convert_rna_to_protein convert_sso_to_msp convert_string_to_hash
        convert_to_anti_sense corelation_coefficient correct_head_box count_num_of_char
        cp create_sorted_cluster ctime default_help
        define_secondary_structure_segments delbut detect_file_format_type die_if_file_not_present
        diff_dates digitize_char dir_name dir_path
        dir_search dir_search_single divide_array divide_clusters
        divide_string do_hmm_sequence_search do_intermediate_sequence_search do_psi_blast_search
        do_self_blastp_search do_sequence_search do_window_scan encrypt_passwd
        exchange_query_with_match_in_msp exchange_query_with_match_in_msp extract_num_to_array extract_ori_seq
        extract_words fasta_append fasta_kt1_search fasta_out_seq_no
        fasta_output fasta_permute_array_write fasta_permute_hash_write fetch_seq
        fetch_sequence_from_db fetch_subroutines fetch_swiss_seq file_size
        fill_ending_space filter_by_string_length filter_hash_by_num_value filter_intermediates_by_E_value
        filter_seq_DB_by_seq_length find_central_seq_msp_chunk find_central_sequence find_low_complexity_region
        find_program_in_path find_seq_file_old find_seq_files find_source_perl_library
        follow_seqlet_link fromJulian full_pwd_path geanfammer
        geanfammer_main get_added_matched_regions_in_msp get_all_dirs_from_ENV get_all_msp_files
        get_av_and_sd_seq_length get_av_seq_length get_average_sequence_size get_averaged_prediction
        get_base_names get_column get_common_array_entry get_common_column
        get_common_hash_keys get_correct_percent_alignment_rate get_date get_dir_names_only
        get_domain_inside_domain get_each_posi_diff_hash get_extension_names get_false_positive_seq_matches
        get_file_dir_names get_file_extensions get_first_seq_in_alignment get_full_dir_names
        get_full_file_name get_full_path_dir_names get_full_pwd_path get_gap_positions
        get_hash_value_average get_high_score_blocks get_homology_info_of_seq_pairs get_host_by_addr
        get_host_by_name get_id_among_2 get_id_among_2_1 get_id_among_2_2
        get_internal_dup_in_a_cluster get_isearch_result_stat get_largest_element get_largest_file
        get_linked_sequence get_linux_kernel_version get_longest_str_size get_max_hash_by_value
        get_median get_median get_msp_enquiry_sequence get_msp_matched_sequence
        get_msp_range get_multiple_array_entry get_occurances_of_char get_occurances_of_shift_type_hash
        get_occurances_of_shift_type_hash_all get_overlapping_range get_overlapping_seq_match_size get_pair_homol_array
        get_pair_homol_hash get_path_dirs_from_ENV get_pdb_file_start_number get_peptide_occurance
        get_percent_homo_hash get_percent_homol_arr get_percentage get_perl_keywords
        get_posi_diff get_posi_diff_abs get_posi_diff_and_rms_hash get_posi_diff_hash
        get_posi_rates_hash_out get_posi_rates_hash_out get_posi_rates_hash_out_compact get_posi_rates_hash_out_jp
        get_posi_rates_hash_out_msf get_posi_sans_gaps get_posi_shift_hash get_posi_shift_hash_rms
        get_posi_shift_rate get_posi_shift_rms_hash get_posi_shift_rms_whole get_position_shift_rate
        get_probable_half get_pwd_dir get_pwd_dir_name get_residue_error_rate
        get_scop_correcting_pairs get_sd_of_length_diff get_segment_shift_rate get_seq_fragments
        get_seq_hash_sans_gaps get_seq_identity get_seqblock get_sequence_complexity
        get_sequence_number get_shortest_str_size get_smallest_file get_stat_FASTA_search_result_in_msp_0_files
        get_sub_hash get_subdir_names get_subroutine_calls get_time
        get_total_memory_size_in_linux get_unix_shell_name get_whole_pwd_path get_windows_cs_rate_array
        get_windows_sc_rate_array get_wrong_segment_rate handle_arguments handle_arguments_old
        hash_average hash_catenate hash_chk hash_common
        hash_common2 hash_common_by_keys hash_no_common hash_output_chk
        hash_stat_for_all hash_substract_by_keys hostname if_file_older_than_x_days
        import_ENV_vars initialize_code insert_gaps_in_seq_hash insert_lines_anywhere
        interm_lib_search is_html isroman key_ready
        link_ranges load_mount_info mail_it make_2D_aa_residue_matrix_array
        make_2D_identity_matrix make_2D_identity_matrix_array make_6_frame_dna_sequences make_cdf_file
        make_clustering_summary make_composition_ratio_table make_composition_ratio_table_simple make_composition_table
        make_fasta_files_from_msp_1_files make_filtered_list make_hmm_from_alignment make_intermediate_sequence_library
        make_one_array make_pairs_from_hash make_random_sequence make_reverse_seq_database
        make_scrambled_seq_database make_seq_alignment_length_even make_seq_index_file make_sequence_match_table
        make_singlet_list_from_pdb_entries make_standalone_subroutines make_swiss_index make_template_from_sec_str
        max max_elem_array max_elem_string_array max_elem_string_array
        max_str_key_hash max_str_value_hash maximum merge_array
        merge_arrays_by_common_elements merge_hash merge_many_arrays merge_sequence_alignments
        merge_sequence_in_msp_chunk merge_sequence_in_msp_file merge_similar_ranges merge_similar_seqlets
        merge_superfam_fasta_files_for_ISL min min_elem_array min_str_key_hash
        min_str_value_hash minimum msf_permute_array_write msf_permute_hash_write
        msp_single_link_hash mv n normalize_numbers
        numerically occurances one_to_three_letter open_ali_files
        open_aln_files open_brk_files open_cel_files open_clu_files
        open_dna_files open_dssp_files open_embl_files open_fasta_files
        open_fil_file open_hlx_files open_hmmfs_files open_hmmls_files
        open_jp_files open_lottery_file open_msf_files open_msf_jp_files
        open_msp_files open_out_files open_pdb_files open_pdbg_files
        open_phd_files open_pir_files open_predator_files open_rms_files
        open_rms_files2 open_sdb_files open_self open_seq_alignment_files
        open_seq_files open_sequence_index_files open_slx_files open_sso_files
        open_sst_files open_sst_files_with_gap open_stride_dat_files open_stride_dat_files
        open_subdir_and_go_in_and_do open_swissprot_seq_files open_tem_files opendir_and_go
        opendir_and_go_in_and_do_something opendir_and_go_rand_fasta opendir_and_go_rand_fasta_and_clustal overlay_seq_by_certain_chars
        overlay_seq_for_identical_chars overlay_seq_hash pair_percent_id_trend pairwise_iden_pos
        pairwise_percent_id parse_arguments permute permute_binary
        pick_random_files pick_random_hash_pairs pir_permute_array_write pir_permute_hash_write
        pir_write pir_write plot_histogram_horizontally plot_vertically
        print_clusfile_from_hash print_in_block print_seq_in_block print_seq_in_block_old
        print_seq_in_block_with_print print_seq_in_columns produce_random_numbers push_if_not_already
        push_if_not_already put_gaps_every_x_position_in_string put_gaps_every_x_position_in_string_special put_gaps_in_hash
        put_msp_lines_to_hash_from_bla put_position_back_to_str_seq put_slash_before_special_chars pwd_dir_name
        pwd_path rand_DNA_seq_generate rand_RNA_seq_generate rand_sequence_mul_array
        rand_sequence_one_array rand_sequence_one_string rand_word randomise_file_contents
        randomise_lines read_all_head_boxes read_any_dir read_any_dir2
        read_any_dir_for_dir read_any_dir_simple read_any_seq_files read_blast_hits
        read_correct_head_box read_dir_names_only read_file_extension_names_only read_file_names_only
        read_first_head_box read_fssp_files read_full_dir_names read_head_box
        read_head_box2 read_head_boxes read_hssp_no_inserts read_machine_readable_sso_lines
        read_machine_unreadable_sso_lines read_option_table read_seq_matrix_files read_sso_lines
        read_sst_files read_subroutines remov_com_column remov_com_column2
        remov_common_gap remove_dup_in_array remove_dup_in_hash remove_dup_match_in_msp_files
        remove_dup_seq_entry remove_elements_by_name remove_elements_by_pattern remove_elements_by_position
        remove_file_extension remove_mail_header_in_files remove_non_char remove_repetitives_in_array
        remove_similar_seqlets remove_small_files remove_text replace_lines
        replace_subroutines replace_text reset_all_the_vars reset_shell_environment
        rev_abs_numerically rev_lines_pdb rev_sequence_mul_array reverse_hash
        reverse_sequences roman rotate_seq round
        round_number round_numbers run_fasta_sequence_search scale_for_horizontal_histogram
        scan_win_and_get_cs_rate_pairs scan_win_and_get_sc_rate_pairs scan_win_get_average scan_window_and_calc_average
        scan_window_and_calc_something scan_windows_and_get_compos_seqid_rate scoring scramble_array
        scramble_sequences sd se search_files_in_subdir
        search_palindromes search_self self_self_search send_mail
        sep seq_comp_percent1 seq_comp_percent2 seq_id_percent_array
        seq_to_regexp set_debug set_debug_option set_special_options
        shift_word shift_word_recursively show_array show_default_help
        show_hash show_in_fasta show_options show_subclusterings
        smaller_one sort_by_E_values sort_by_cluster_size sort_by_column
        sort_by_column_bigger_first sort_by_digits_in_string sort_by_hash_values sort_by_keys
        sort_files_by_size sort_files_by_time sort_hash_by_keys sort_hash_by_value
        sort_hash_by_value_and_make_array sort_hash_value_by_column sort_string_by_length sort_words_in_string
        split_fasta_files split_file_by_string split_files split_sequence
        sqrt_array square_array sso_to_msp ssp_permute_array_write
        ssp_permute_hash_write ssp_write steve_permute_array strip_rotated_seq
        strip_sequence_ranges substract_array substract_hash_by_keys substract_hash_by_values
        sum_array sum_digits_in_string sum_hash_values sum_hash_values_of_string
        sum_of_array sum_of_squared_array sum_x_mul_y_arrays superpose_hash
        superpose_seq_hash take_file_name takeout_subroutines tally_2_hashes
        tell_seq_length tempname three_to_one_letter tidy_secondary_structure_segments
        time_date toJulian transform_values trim_numbers
        update_subroutines weighted_av weighted_average word_wrap
        write_aln_files write_ardf_files write_c3al_files write_c3ss_files
        write_dof_files write_evss_files write_fasta write_fasta_array
        write_fasta_seq_by_seq write_gcg_file write_gcg_genbank_file write_genbank_file
        write_good_bad_list_in_divide_clusters write_head_box write_html_headbox write_iss_file
        write_jp write_modeller_ali_file write_modeller_top_file write_mprf_files
        write_msf write_msp3_files write_msp_files write_nhco_files
        write_parf_files write_pdbg_files write_pir_file write_prdl_files
        write_pred_files write_primer_file write_rdif2_files write_rdif_files
        write_reverse_seq_files write_sdb_file write_seq_files write_staden_file
        write_subroutines x_mul_y_arrays );

#!/usr/bin/perl
#______________________________________________________________________
# Title     : Bio::Bioinf (for Bio Perlogical) or bio_lib.pl
# Usage     : require "Bioinf.pl"; ##<-- This is very slow, so you'd better
#             use Bio::Bioinf;   <-- When you have Bioinf.pm module installed under Bio
#              or
#             use Bioinf;
#
#             or, copy the subroutines in your prog. or make a smaller lib files
#               which are classified according to functions(like, Bio_Seq.pl
#               for sequence handling, Bio_Array.pl for various array
#               subroutines..), or make your own module out of this, do whatever
#               you want....
#
# Function  : 1) This is a comprehensive perl subroutine library developed
#                 under Bioperl project and others.
#                 URL: http://cyrah.med.harvard.edu/Bioperlsub/
#
#             2) The design of this module is for simple layer biological
#                 module than multilevel object oriented module.
#
#             3) This also serves as the depository database for various
#                 perl subroutines or algorithms developed in
#                 Bioinformatics and Genome projects.
#
#             4) You can copy any of the sub routines in this file, modify, use
#                 in yours...
#                 PLEASE MODIFY AS FREELY AS YOU WANT !! All has the
#                 same PERL copyright
#
#             5) All the subroutines are tested in small files
#                 If you want to have such single example program
#                 to see how they really work, pls contact me( A Biomatic )
#                 For example, a file called  'handle_arguments.pl' exists to
#                 test the subroutine 'handle_arguments'. Usually you can find them
#                 in  http://cyrah.med.harvard.edu/Bioinf.pl.html
#
# Example   : require "Bioinf.pl"; BUT, I recommand you take subroutines out and
#             use it directly or modify in your programs.
#             use Bioinf;
#
# Warning   : For the enhancement of Biology, Biomatics, and Science.
#             This is a development companion.
#             Class is for classification of my subroutines. If it is Bio, it can
#             be useful for biological sequence data handling. If it's Utility,
#             it can also be used for general purpose file handling stuff.
#             File, Array, Hash,... are my classification items.
# Keywords  : Biology, perl library, sequence handling lib
# Options   : nothing (used as subroutine library or as Bioinf.pm module)
#
# Author    : J. Park, Jason Johnson, Sarah Teichmann, Alex Bateman,
#               Astrid Reinhardt, and anybody contributed.
#               jong@salt2.med.harvard.edu
# Category  : Bioinf
# Version   : 2.0    (Aug/1/1998)
#------------------------------------------------------------------


print "\n ################################################################\n";
print   " #                                                              #\n";
print   " # Using Bioperl subroutine Module for Bioinformatics & Biology #\n";
print   " #                                                              #\n";
print   " ################################################################\n";

#&parse_arguments(1);


## The following box is used as the header for any subroutines developed to
##  give information on the subroutines. It is used by Jong as a template.


#______________________________________________________________________________
# Title     :
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------


## Following variables in 'my' are very commonly used ones. I have
## put those to be copied into any new subroutines to be developed
## This is because, in Perl, every variable is global unless you mark them
## to be inside the subroutines. Many BUGs are coming from not localizing vars.
## This array variables are used  as a defalt insertion for the subroutine
## 'handle_arguments'. If you add this box in any sub, 'handle_arguments'
## subroutine will be called and any arguments passed to the subroutine will
## be classified to file, dir, string, hash(as reference), array(as reference),
## pure number, or option(with -) prefix. etc. For more detail look at
## handle_argument's header.


sub IGNORE_THIS{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
}

#________________________________________________________________________
# Title     : handle_arguments
# Usage     : Just put the whole box delimited by the two '###..' lines below
#             to inside of your subroutines. It will call 'handle_arguments'
#             subroutine and parse all the given input arguments.
#             To use, claim the arguments, just use the variable in the box.
#             For example, if you had passed 2 file names for files existing
#             in your PWD(or if the string looks like this: xxxx.ext),
#             you can claim them by $file[0], $file[1] in
#             your subroutine.
# Function  : Sorts input arguments going into subroutines and returns default
#             arrays of references for various types (file, dir, hash, array,,,,)
#             If you give (\@out, @file), it will put @out into @array as a ref
#             and also the contents of @out will be dereferenced and put to
#             raw_string regardless what is in it).
#
# Example   : 'handle_arguments(\@array, $string, \%hash, 8, 'any_string')
# Warning   :
# Keywords  : handling arguments, parsing arguments,
# Options   :
# Returns   : Following GLOBAL variables
#
#             $num_opt,    @num_opt     @file          @dir
#             $char_opt,   @char_opt    %vars          @array,
#             @hash        @string,     @raw_string    @range,
#
#             $num_opt has 10,20
#             @num_opt has (10, 20)
#             @file has  xxxx.ext
#             @dir has  dir  or /my/dir
#             $char_opt has 'A,B'
#             @char_opt has (A, B)
#             @array has  (\@ar1, \@ar2)
#             @hash has (\%hash1, \%hash2)
#             @string  ('sdfasf', 'dfsf')
#             @raw_string (file.ext, dir_name, 'strings',,)
#             @range has values like  10-20
#             %vars deals with x=2, y=3 stuff.
#
# Argument  : any type, any amount
# Category  : general programming
# Version   : 4.8
#--------------------------------------------------------------------
sub handle_arguments{
     my($c, $d, $e, $f, $i, $j, $k, $l, $s, $t, $x, $y, $z, $char_opt, $dir, @hash,
             $file, $in_dir, $num_opt, @char_opt, @dir, @file, @string, @file_dir, @k,
             @num_opt, @raw_string,@string, @array, %vars, @range, @temp, $temp,
             @char_options);

	&set_debug_option;
	if(@_<1){ print chr(7),"\n This is handle_arguments. No args Passed, Error?\n"}
	elsif( (@_ ==1)&& (ref($_[0]) eq 'ARRAY') ){ # when there is only 1 argument
	  push(@array, $_[0]);
	  push(@k, $_[0]);
	}elsif( (@_==1)&&( !ref($_[0]) ) ){
	  if(-f $_[0]){ push(@file, $_[0]);   push(@string, $_[0]) }
	  elsif(-d $_[0]){ push(@dir, $_[0]); push(@string, $_[0]) }
	  elsif($_[0]=~/^\d+$/){ push(@num_opt, $_[0]); $num_opt.=$_[0] }
	  elsif($_[0]=~/^\w+$/){ push(@string, $_[0]); }
	}elsif(@_ >=1){ @k = @_ }

	#####______Start of  general argument handling______######
	for($k=0; $k < @k ;$k++){
	  if( !ref($k[$k]) ){
 		  if($k[$k]=~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){  push(@char_opt, $1); $char_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\-([a-zA-Z]+)$/){          ## When multiple option is given,
                      @char_options = split(/\,|/, $1);  push(@char_opt, @char_options);
                      $char_opt .= join("\,", @char_options); ## '-' should be used. eg. '-HEGI'
		  }elsif($k[$k]=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
		  }elsif($k[$k]=~ /^(\-?\d+)$/){ push(@num_opt, $1);  $num_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\d+\.?\d*\-\d+\.?\d*$/){  push(@range,  $k[$k] );
		  }elsif(-f $k[$k]){                          push(@file,   $k[$k] );
		  }elsif(-d $k[$k]){                          push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /\/[\w\d\.\-]+[\/].+[\/]$/){push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^\/[\w\d\.\-]+[\/]*$/){    push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^[\/\w\d\-\.]+\.\w+$/){    push(@file,   $k[$k] );
		  }elsif($k[$k]=~ /\S\/[\/\w\d\-\.]+\.\w+$/){ push(@file,   $k[$k] );
		  }elsif($k[$k]=~/^\w+[\/\\\w\d\.\-]+$/){     push(@string, $k[$k] );
		      # string does not have space, but includes '\', '/', '.'
		  }else{                                      push(@raw_string, $k[$k] );  }

	  }elsif( ref($k[$k]) ){
		  if( ref($k[$k]) eq "SCALAR"){
			 if(${$k[$k]} =~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){ push(@char_opt, $1); $char_opt  .= "$1\,";
				}elsif(${$k[$k]}=~ /^\-([a-zA-Z]+)$/){ push(@char_opt, @char_options);
					$char_opt  .= join("\,", @char_options);  ## as an option string.
				}elsif(${$k[$k]}=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
				}elsif(${$k[$k]}=~ /^(\-?\d+)$/){ $num_opt .= "$1\,";  push(@num_opt, $1);
			    }elsif(${$k[$k]}=~ /^\d+\.?\d*\-\d+\.?\d*$/){    push(@range,  $k[$k] );
				}elsif(-f ${$k[$k]}){                            push(@file,   ${$k[$k]} );
				}elsif(-d ${$k[$k]}){                            push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\/[\/\w\d\.\-]+[\/]*$/){     push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /^[\/\w\d\-\.]+\.\w+$/){      push(@file,   ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\w+[\w\d\.\-]+$/){           push(@string, ${$k[$k]} );
				}else{                                           push(@raw_string, ${$k[$k]}); }
		  }elsif(ref($k[$k]) eq "ARRAY"){ my @temp_arr = @{$k[$k]}; push(@array, $k[$k]);
			for ($i=0; $i<@temp_arr; $i++){
			   if(-f $temp_arr[$i]){                            push(@file, $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/^\d+\.?\d*\-\d+\.?\d*$/){ push(@range,$temp_arr[$i] );
			   }elsif(-d $temp_arr[$i]){                        push(@dir , $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\/[\/\w\d\.\-]+[\/]*$/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^[\/\w\d\-\.]+\.\w+$/){   push(@file,$temp_arr[$i] );
																push(@string,$temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\w+[\w\d\.\-]+$/){       push(@string,$temp_arr[$i]);
			   }else{                                           push(@raw_string, $temp_arr[$i]); }
			 }
		  }elsif(ref($k[$k]) eq "HASH"){                             push(@hash,   $k[$k] ); }
	  }
	}
	@raw_string=(@raw_string, @string);
	@file = @{&remove_dup_in_arrayH(\@file)};
	#-----------------------------------------------------
	 sub remove_dup_in_arrayH{  my($i, @nondup, @out_ref, %duplicate, @orig, @out_ref);
	     for($i=0; $i<@_; $i++){  undef(%duplicate);
	        if(ref($_[$i]) eq 'ARRAY'){    @orig = @{$_[$i]};    }
		   @nondup = grep { ! $duplicate{$_}++ } @orig; push(@out_ref, \@nondup);
             }
	     if(@out_ref ==1){ return($out_ref[0]);}
	     elsif(@out_ref >1){  return(@out_ref); }
	 }
	#-----------------------------------------------------
	return(\@hash, \@array, \@string, \@dir, \@file, \@num_opt,
		\@char_opt, \$num_opt, \$char_opt, \@raw_string, \%vars, \@range );
}

#________________________________________________________________________
# Title     : set_debug_option
# Usage     : &set_debug_option;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug_option #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Category  :
# Version   : 1.8
#--------------------------------------------------------------------
sub set_debug_option{
    my($j, $i, $level);
    unless( defined($debug) ){
       for($j=0; $j < @ARGV; $j ++){
          if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
              print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
              $debug=1;
                       print chr(7);
              print __LINE__," \$debug  is set to ", $debug, "\n";
              splice(@ARGV,$j,1); $j-- ;
              $level = length($1)+1;
              for($i=0; $i < $level; $i++){
                 ${"debug$i"}=1;
                 print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
              }
          }
       }
    }
}



#__________________________________________________________________________
# Title     : sort_by_E_values
# Usage     : @out=@{&sort_by_E_values(\@input_line_array)};
# Function  : it sorts by the 2nd column(E-value, in msp file), small comes top
# Example   :
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub sort_by_E_values{
     my (@in);
     if(ref $_[0] eq 'ARRAY'){
          @in = @{$_[0]};
     }else{
          @in = @_;
     }
     @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map {/^ {0,3}\S+ +(\S+)/ && [$_, $1] } @in;
     return(\@in);
}


#__________________________________________________________________________
# Title     : sort_hash_value_by_column
# Usage     : @out=@{&sort_by_column(\%input_line_hash, <column num>)};
# Function  : it sorts values of hash by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#             It returnns ARRAY of the keys of the input HASH!!!
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : Above will sort the file xxxx.msp by its 3rd column(numerically)
#               small numbers will come to the top.
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues, sort_hash_by_column, sort_value_by_column,
# Options   :
#      s  for sorting stringwise
#      d  for sorting by digit
#      n  for sorting by digit(numerically)
#   numerically  an alias of n
#
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_hash_value_by_column{
      my (%in, $i, $col, $sort_numerically, $sort_non_numerically, @keys);
      $sort_numerically=1;
      if(@_ < 2  ){ print "\n# FATAL: sort_by_column needs 2 arguments\n"; exit }
      for (@_){
          if(ref $_ eq 'HASH'){ %in =%{$_}; }
          elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
          elsif(/^\d+$/){ $col=$_ }
          elsif(/^ *[nd] *$/i){ $sort_numerically=1; $sort_non_numerically=0; }
          elsif(/^ *n[umerically]* *$/i){ $sort_numerically=1; $sort_non_numerically=0; }
          elsif(/^ *s *$/i){ $sort_non_numerically=1; $sort_numerically=0; }
      }
      $col--;

      @keys= keys %in;
      if($sort_numerically ){
                       @keys= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($in{$_}=~/(\S+)/g)[$col] ] } @keys;
      }else{ # here let's do the sring sort
                       @keys= map {$_->[0]} sort { $a->[1] cmp $b->[1] } map { [$_, ($in{$_}=~/(\S+)/g)[$col] ] } @keys;
      }
      return(\@keys);
}


#__________________________________________________________________________
# Title     : sort_by_column
# Usage     : @out=@{&sort_by_column(\@input_line_array, <column num>)};
# Function  : it sorts by the given column , small comes top. Unless number is
#             is given, it sorts by the first column.
#
#             It can handle gzipped file. It called gunzip to open and sort.
#
# Example   : sort_by_column.pl 3 xxxx.msp
#               Above will sort the file xxxx.msp by its 3rd column(numerically)
#               small numbers will come to the top.
# Keywords  : sort_by_2nd_column, sort_by_second_column, sort_by_e_values,
#             sort_by_evalues,
# Options   :
#      s  for sorting stringwise
#      d  for sorting by digit
#      n  for sorting by digit(numerically)
# Category  :
# Version   : 1.4
#----------------------------------------------------------------------------
sub sort_by_column{
      my (@in, @M, $col, $sort_numerically, $sort_non_numerically);
      unless(@_ ==2  ){ print "\n# FATAL: sort_by_column needs 2 arguments\n"; exit }
      $sort_numerically=1;
      for (@_){
          if(ref $_ eq 'ARRAY'){ @in =@{$_}; }
          elsif( ref $_ eq 'SCALAR'){ $col=${$_}; }
          elsif(/^\d+$/){ $col=$_ }
          elsif(/^ *[nd] *$/i){ $sort_numerically=1; $sort_non_numerically=0; }
          elsif(/^ *s *$/i){ $sort_non_numerically=1; $sort_numerically=0; }
      }
      $col--;
      if($sort_numerically ){ ## if the first and last elements are digits?
          @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
      }else{ # here let's do the sring sort
          @in= map {$_->[0]} sort { $a->[1] cmp $b->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
      }
      return(\@in);
}

#__________________________________________________________________________
# Title     : sort_by_cluster_size
# Usage     : @out=@{&sort_by_cluster_size(\@input_line_array)};
# Function  : it sorts by the 1st digit before '-'  as in 2-183_cluster, 2-140_cluster,
#               etc.
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#             sort_by_pattern
# Options   :
# Category  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub sort_by_cluster_size{
    my (@in, @M, $col);
    if(@_ < 1  ){ print "\n# FATAL: sort_by_cluster_size needs 1 argument\n"; exit }
    if(ref $_[0] eq 'ARRAY'){        @in = @{$_[0]};      }else{     @in = @_;    }
    $col=0;
    @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/^(\S+)\-/)[$col] ] } @in;
    return(\@in);
}


#__________________________________________________________________________
# Title     : sort_by_column_bigger_first
# Usage     : @out=@{&sort_by_column_bigger_first(\@input_line_array, 1)};
# Function  : it sorts by the 2nd column(E-value, in msp file), small comes top
#             by the help of  ts <decoux@moulon.inra.fr>
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#
# Options   :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub sort_by_column_bigger_first{
    my (@in, @M);
    unless(@_ ==2  ){ print "\n# FATAL: sort_by_column_bigger_first needs 2 arguments\n"; exit }
    if(ref $_[0] eq 'ARRAY'){        @in = @{$_[0]};      }else{     @in = @_;    }
    if(ref $_[1] eq 'SCALAR'){       $col=${$_[1]};       }else{     $col=$_[1];  }
    $col--;
    @in= map {$_->[0]} sort { $b->[1] <=> $a->[1] } map { [$_, ($_=~/(\S+)/g)[$col] ] } @in;
    return(\@in);
}

#______________________________________________________________________________
# Title     : make_6_frame_dna_sequences
# Usage     : %six_dna_frame_seqs=%{&make_6_frame_dna_sequences(\%input_seq)};
# Function  : It makes 3 different types of frames for input seq
#               Then it reverses the input seq and makes another 3 frames!!
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub make_6_frame_dna_sequences{
    my(%six_frame_seqs, %original_input_seq, $orig_name,
       $frame_fr1_name, $frame_fr2_name, $frame_fr3_name,
       $frame_rf1_name, $frame_rf2_name, $frame_rf3_name,
       $frame_fr1, $frame_fr2, $frame_fr3);
    %original_input_seq=%{$_[0]};
    ($orig_name, $frame_fr1)=%original_input_seq;
    $frame_fr1_name="$orig_name\_fr1";
    $frame_fr2_name="$orig_name\_fr2";
    $frame_fr3_name="$orig_name\_fr3";
    $frame_rf1_name="$orig_name\_rf1";
    $frame_rf2_name="$orig_name\_rf2";
    $frame_rf3_name="$orig_name\_rf3";

    $frame_fr1     =$frame_fr1;
    $frame_fr2     =substr($frame_fr1, 1);
    $frame_fr3     =substr($frame_fr1, 2);

    $frame_rf1 =reverse($frame_fr1);
    $frame_rf2 =substr($frame_rf1, 1);
    $frame_rf3 =substr($frame_rf1, 2);

    %six_frame_seqs=($frame_fr1_name, $frame_fr1,
                     $frame_fr2_name, $frame_fr2,
                     $frame_fr3_name, $frame_fr3,
                     $frame_rf1_name, $frame_rf1,
                     $frame_rf2_name, $frame_rf2,
                     $frame_rf3_name, $frame_rf3 );

    return(\%six_frame_seqs);
}



#_____________________________________________________________________
# Title     : make_scrambled_seq_database
# Usage     : &make_reverse_seq_database(\@input_database_fasta_file);
# Function  :
# Example   :
# Warning   :
# Keywords  : scramble_seq_database, create_scrambled_seq_database
# Options   :
# Category  :
# Version   : 1.1
#-------------------------------------------------------------------
sub make_scrambled_seq_database{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my (%seqs, %scrambled_seqs, $fasta_file_for_DB,$base,$ext,$out_file_name );

     for($i=0; $i< @file; $i++){
          $fasta_file_for_DB =$file[$i];
          $base=${&get_base_names($fasta_file_for_DB)};
          #~~~~~~~~~~~~~~~~~~~ To prevent growing of _sc_sc_sc... ~~~~~~~~~~~~`
          if($base=~/^(\S+)_sc[\_sc]*$/){ $base=$1 }

          $ext =${&get_file_extensions($file[$i])};
          if($ext=~/\S/){
                 $out_file_name="$base\_sc\.$ext";
                 $out_bak_file ="$base\_sc_bak\.$ext";
          }else{
             $out_file_name="$base\_sc\.fa";
                 $out_bak_file ="$base\_sc_bak\.fa";
                 print "\n# There was no file ext for $base, attaching \"fa\" as default\n";
          }
          if(-s $out_file_name){
                 print "\n# $out_file_name already exists, moving it to $out_bak_file\n";
          }
          %seqs=%{&open_fasta_files(\$fasta_file_for_DB)};
          %reversed_seqs=%{&scramble_sequences(\%seqs)};
          &write_fasta(\%reversed_seqs, $out_file_name );
          if(-s $out_file_name){
                 print "\n# make_scrambled_seq_database: Supposedly wrote new file: $out_file_name\n";
          }else{
                 print "\n# make_scrambled_seq_database: Error in writing: $out_file_name\n";
          }
     }
     print "\n# make_scrambled_seq_database sub finished \n";
}


#__________________________________________________________________________
# Title     : make_2D_identity_matrix_array
# Usage     : @matrix=@{&make_2D_identity_matrix(\@seq1, \@seq2)};
# Function  : @matrix is like  $matrix[1][2]=1;
#             This assigns number 1 to array element
#             If one array is given, it makes self to self matrix.
#             When 2 are given, make matrix for the 2
# Example   :
# Keywords  : make_matrix
# Options   :
#    $skip_gap_char = g  for skipping gap char (any special char)
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#----------------------------------------------------------------------------
sub make_2D_identity_matrix_array{
     my (@matrix, $skip_gap_char, $k, $l, @seq_1, @seq_0);
     for($i=0; $i< @_; $i++){
         if($_[$i]=~/g/){
              $skip_gap_char='g';
              splice (@_, $i, 1);
              $i--;
         }elsif(ref($_[$i]) eq 'ARRAY'){
              push(@seqs, $_[$i]);
         }
     }
     @seq_0=@{$seqs[0]};
     @seq_1=@{$seqs[1]};
     unless(@seq_1){ @seq_1=@seq_0; };
     for($k=0; $k< @seq_0; $k++){
        for($l=0; $l< @seq_1; $l++){
               if($seq_1[$l] =~/\W/ and $skip_gap_char){ next };
               if($seq_0[$k] eq $seq_1[$l]){
                      $matrix[$k][$l]=1;
                      print "# X\[$k\] Y\[$l\] = 1 \n";
               }
        }
     }
     return(\@matrix);
}


#__________________________________________________________________________
# Title     : make_2D_aa_residue_matrix_array
# Usage     : @matrix=@{&make_2D_aa_residue_matrix_array(\@seq)};
# Function  : @matrix is like  $matrix[1][2]='A'; when aa residue is identical
#             This assigns identical residue to array element
#             If one array is given, it makes self to self matrix.
#             When 2 are given, make matrix for the 2
# Example   :
# Keywords  : make_matrix
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub make_2D_aa_residue_matrix_array{
      my @seq=@{$_[0]};
      my @seq2=@{$_[1]};
      if(@_ == 1){ @seq2=@seq };

      my (@residue_matrix, $k, $l);
      for($k=0; $k< @seq; $k++){
         for($l=0; $l< @seq2; $l++){
             if($seq[$k] eq $seq2[$l]){
                 $residue_matrix[$k][$l]="$seq[$k]";
                 print "# $seq[$k] = $l \n";
             }
         }
      }
      return(\@residue_matrix);
}


#__________________________________________________________________________
# Title     : make_2D_identity_matrix
# Usage     : @matrix=@{&make_2D_identity_matrix(\$seq, [\$seq2] )};
# Function  : @matrix is like  $matrix[1][2]=1;
#             This assigns number 1 to array element
# Example   :
# Keywords  : make_matrix, make_identity_matrix
# Options   :
#        s  for show axis
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#----------------------------------------------------------------------------
sub make_2D_identity_matrix{

     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my (@matrix, @seq1, @seq2 );
     if(@_ > 1){     @seq1=split(//, $string[0]);   @seq2=split(//, $string[1]);
     }else{      @seq2=@seq1= split(//, $string[0]); }

     if($char_opt=~/s/){
        @matrix = map {  $m = $_; "$m  ".join('', map {$_ eq $m or ' '} @seq1 )."\n";  } @seq2;
     }else{
        @matrix = map {  $m = $_; join('', map {$_ eq $m or ' '} @seq1 )."\n";  } @seq2;
     }
     foreach (@matrix) {        print ;      }
     return(\@matrix);
}

#________________________________________________________________________________
# Title     : amino_acid_homology_matrix
# Usage     : $yes_no=${&amino_acid_homology_matrix('E', 'D')};
# Function  :
# Example   :
# Keywords  : are_they_homologous, amino_acid_homology_table, compare_amino_acid_homology
# Options   :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub amino_acid_homology_matrix{
    my ($amino_acid1, $amino_acid2,  $hydrophobic_group, $neural_polar,
        $acidic_group, $basic_group, $proline);
    $amino_acid1=${$_[0]} || $_[0];
    $amino_acid2=${$_[1]} || $_[1];
    $hydrophobic_group='LIFV'; # A excluded by me
    $neural_polar ='STCNQ'; # M excluded by me
    $acidic_group='ED';
    $basic_group='KRH';
    $proline='P';
    @groups=($hydrophobic_group, $neural_polar, $acidic_group, $basic_group);
    for($i=0; $i< @groups; $i++){
        if($groups[$i] =~/$amino_acid1/ and $groups[$i] =~/$amino_acid2/){
            return(\1);
        }
    }
    return(\0);
}

#______________________________________________________________________________
# Title     : write_reverse_seq_files
# Usage     :
# Function  :
# Example   :
# Keywords  : write_rev_seq_files, write_reverse_msf_files
# Options   :
# Author    : jong@salt2.med.harvard.edu
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub write_reverse_seq_files{
    my $inputfile=$_[0];
    my $base=${&get_base_names(\$inputfile)};
    my $ext=${&get_extension_names(\$inputfile)};
    my $out_rev_file="$base\_rv\.$ext";
    my ($hash_out, $order_array)=&open_msf_files(\$inputfile, 'o');
    my %seq=%{$hash_out};
    my @order=@{$order_array};
    my @file_written=@{&write_msf(\%seq, \$out_rev_file, \@order)};
    print "\n# (INFO)  \@file_written: @file_written\n";
    return(\@file_written);
}



#_____________________________________________________________________
# Title     : make_reverse_seq_database
# Usage     : &make_reverse_seq_database(\@input_database_fasta_file);
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub make_reverse_seq_database{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    $| = 1;
    for($i=0; $i< @file; $i++){
        my $fasta_file_for_DB =$file[$i];
        my $base=${&get_base_names($fasta_file_for_DB)};
        my $ext =${&get_file_extensions($file[$i])};
        my($out_file_name, %seqs, %reversed_seqs);
        if($ext=~/\S/){
               $out_file_name="$base\_rv\.$ext";
        }else{
           $out_file_name="$base\_rv\.fa";
        }
        %seqs=%{&open_fasta_files(\$fasta_file_for_DB)};
        %reversed_seqs=%{&reverse_sequences(\%seqs)};
        &write_fasta(\%reversed_seqs, $out_file_name );
        if(-s $out_file_name){
               print "\n# make_reverse_seq_database: Supposedly wrote: $out_file_name\n";
        }else{
               print "\n# make_reverse_seq_database: Error in writing: $out_file_name\n";
        }
    }
    print "\n# make_reverse_seq_database sub finished \n";
}


#__________________________________________________________________________
# Title     : make_hmm_from_alignment
# Usage     : @out_hmm_file_names=@{&make_hmm_from_alignment(\@file, "$over_write")};
# Function  :
# Example   :
# Keywords  : HMM, hidden markov model, make_HMM_from_alignment,
#             make_hmm_from_msf_file, create_hmm_from_alignment,
#             create_hmm_from_msf_file,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub make_hmm_from_alignment{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	local (@out_hmm_files, $base, $out_hmm_file);
	for($i=0; $i< @file; $i++){
	   if($file[$i]=~/\.msf/){
               $base=${&get_base_names($file[$i])};
               $out_hmm_file= "$base\.hmm";
               if($char_opt=~/o/ or !(-s $out_hmm_file) ){
                      system("hmmb -P BLOSUM62 -B 200 -w $out_hmm_file $file[$i]");
                      push(@out_hmm_files, $out_hmm_file);
               }else{
                      print "\n# The $out_hmm_file file already exists. To overwrite use -o opt\n";
               }
	   }
	}
	if(@out_hmm_files > 1){
	   return(\@out_hmm_files);
	}else{
	   return(\$out_hmm_files[0]);
	}
}



#__________________________________________________________________
# Title     : get_false_positive_seq_matches
# Usage     : %seq=%{&get_false_positive_seq_matches(\%msp_1, \%msp2)};
# Function  : gets sequences which are wrongly matched from intermediate seq search
# Example   :
#
#  OUTPUT looks like the following;
#	d1dvh__=d1fcdc1     7.1e-08
#	d1fcdc1=d1dvh__     7.1e-08
#	d5cytr_=d351c__     5.3e-08
#	d351c__=d5cytr_     5.3e-08
#	d1cyi__=d2mtac_     9.1e-06
#	d2mtac_=d1cyi__     9.1e-06
#	d1cyi__=d5cytr_     0.00045
#	d5cytr_=d1cyi__     0.00045
#
# Warning   : The default is to show the best E value(lowest that is)
#
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub get_false_positive_seq_matches{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%iss_input)= %{$hash[0]};
	my(%final_table_Evalue, %final_table_score, $inter_seq_seq_name, $inter_seq_score,
	   $inter_seq_E_value, $match_seq_seq_name, $match_seq_score, $match_seq_E_value,
	   $name_combi1, $name_combi2, $each_iss_line, $all_enquiry_seqs);
	@iss_lines = sort values %iss_input;

	if(@array > 0){ ## When the names of enquiry was given as an array, use it!
		$all_enquiry_seqs=join(' ', sort @{$array[0]} );
	}else{    ## otherwise, detect yourself.
            for($i=0; $i< @iss_lines; $i++){
               $each_iss_line=$iss_lines[$i];
               if($each_iss_line=~/^ *(\S+) +/){
                      $all_enquiry_seqs{$1}++;
               }
            }
            $all_enquiry_seqs=join(' ', sort keys %all_enquiry_seqs );
	}

	for($i=0; $i< @iss_lines; $i++){
	   $each_iss_line=$iss_lines[$i];
	   if($each_iss_line=~/^ *(\S+) +(\S+)\((\d+)\)\((\S+)\) +(\S+)\((\d+)\)\((\S+)\)/){
                $inter_seq_seq_name= $2;
                $inter_seq_score   = $3;
                $inter_seq_E_value = $4;
                $match_seq_seq_name= $5;
                $match_seq_score   = $6;
                $match_seq_E_value = $7;
                $name_combi1="$1\=$match_seq_seq_name";
                $name_combi2="$match_seq_seq_name\=$1";
                if($all_enquiry_seqs !~/$match_seq_seq_name/){
                       $false_positive_matches{$name_combi1}="$inter_seq_score $inter_seq_E_value";
                       next;
                }
	   }
	}
	if($char_opt=~/v/){
	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   # Wrting the false positives
	   #__________________________________
	   @keys_false=sort keys %false_positive_matches;
	   print "\n\n# False positives: " if @keys_false > 0;
	   for $key_false (@keys_false){
                if($key_false =~/\S+/){
                       printf ("\n%-30s %-30s",  $key_false,  $false_positive_matches{$key_false});
                }
	   }
	   print "\n";
	}
	return(\%false_positive_matches);
}


#__________________________________________________________________
# Title     : make_sequence_match_table
# Usage     : %sequence_match_table=%{&make_sequence_match_table(\%msp_1, \%msp2)};
# Function  : makes a table of match with the values for E values.
# Example   :
#
#  INPUT looks like this: (the iss file format), first column is key
#
#   d1ten__(110)(0.00031)     d1fna__    d1fna___1-91(578)(6.9e-37)       d1ten__(110)(0.00031)
#   d1cfb_2(255)(7.8e-16)     d1cfb_2    HSU55258_741-838(255)(5.6e-12)   d1cfb_2(255)(7.8e-16)
#
#  OUTPUT looks like the following;
#   d1dvh__=d1fcdc1    Correct: 7.1e-08
#	d1fcdc1=d1dvh__    Correct: 7.1e-08
#	d5cytr_=d351c__    Correct: 5.3e-08
#	d351c__=d5cytr_    Correct: 5.3e-08
#	d1cyi__=d2mtac_    Wrong:   9.1e-06
#
# Keywords  : make_sequence_match_Evalue_table, Evalue_table, make_Evalue_table
#             make_iss_sequence_match_table
# Options   : _  for debugging.
#             #  for debugging.
#             s  for skip SELF to SELF match entries
#             w  for Smith-Waterman score result out than E value out
#             r  for reflexive output
#
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#-------------------------------------------------------------------------------
sub make_sequence_match_table{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%iss_input)= %{$hash[0]};
	my(%final_table_Evalue, %final_table_score, $inter_seq_seq_name, $inter_seq_score,
	   $inter_seq_E_value, $match_seq_seq_name, $match_seq_score, $match_seq_E_value, $key_seq,
	   $each_iss_line, $all_enquiry_seqs, $name_combi1, $name_combi2, @sorted_names, $name_sorted,
	   %final_table_interm_and_matched_score, %final_table_interm_and_matched_Evalue, %scop_bugs);
	@iss_lines = sort values %iss_input;

	if($char_opt=~/v/){ print "\n# make_sequence_match_table: \$char_opt is $char_opt\n" ; }

	if($char_opt=~/r/){ $non_reflexive=0;
	}else{    $non_reflexive=1; } # default , not to print result in two ways

	if(@array > 0){ ## When the names of enquiry was given as an array, use it!
            $all_enquiry_seqs=join(' ', sort @{$array[0]} );
	}else{    ## otherwise, detect yourself.
            for($i=0; $i< @iss_lines; $i++){
               $each_iss_line=$iss_lines[$i];
               if($each_iss_line=~/^ *(\S+) +/){  $all_enquiry_seqs{$1}++;             }
            }
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # $all_enquiry_seqs contains all the sequences in the group
            #____________________________________________________________________
            $all_enquiry_seqs=join(' ', sort keys %all_enquiry_seqs );
	}

	for($i=0; $i< @iss_lines; $i++){
	   my $each_iss_line=$iss_lines[$i];
	   if($each_iss_line=~/^ *(\S+) +(\S+)\((\d+)\)\((\S+)\) +(\S+)\((\d+)\)\((\S+)\)/){
		  $key_seq=$1;
		  $inter_seq_seq_name= $2;
		  $inter_seq_score   = $3;
		  $inter_seq_E_value = $4;
		  $match_seq_seq_name= $5;
		  $match_seq_score   = $6;
		  $match_seq_E_value = $7;
		  if( $key_seq eq $match_seq_seq_name and $char_opt=~/s */ ){ next } ## avoiding self self match
		  @sorted_names=sort ($1, $match_seq_seq_name);
		  $name_combi1="$1\=$match_seq_seq_name";
		  $name_combi2="$match_seq_seq_name\=$1";
		  $name_sorted="$sorted_names[0]\=$sorted_names[1]";
		  if($all_enquiry_seqs !~/$match_seq_seq_name/){
                      if($non_reflexive){
                              $false_positive_matches{$name_sorted}="$inter_seq_score $inter_seq_E_value : $match_seq_score $match_seq_E_value";
                      }else{
                              $false_positive_matches{$name_combi1}="$inter_seq_score $inter_seq_E_value : $match_seq_score $match_seq_E_value";
                      }
                      next;
		  }elsif($final_table_score{$name_combi1} < $inter_seq_score or
		      $final_table_score{$name_combi2} < $inter_seq_score or
		      $final_table_score{$name_sorted} < $inter_seq_score){
                      $final_table_score{$name_combi1}=$inter_seq_score;
                      $final_table_score{$name_combi2}=$inter_seq_score;
                      $final_table_Evalue{$name_combi1}=$inter_seq_E_value;
                      $final_table_Evalue{$name_combi2}=$inter_seq_E_value;

                      if($non_reflexive){
                          $final_table_interm_and_matched_score{$name_sorted} = "$inter_seq_score $match_seq_score";
                          $final_table_interm_and_matched_Evalue{$name_sorted} = "$inter_seq_E_value $match_seq_E_value";
                      }else{
                          $final_table_interm_and_matched_score{$name_combi1} = "$inter_seq_score $match_seq_score";
                          $final_table_interm_and_matched_score{$name_combi2} = "$inter_seq_score $match_seq_score";
                          $final_table_interm_and_matched_Evalue{$name_combi1} = "$inter_seq_E_value $match_seq_E_value";
                          $final_table_interm_and_matched_Evalue{$name_combi2} = "$inter_seq_E_value $match_seq_E_value";
                      }
		  }
	   }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# printing out to screen
	#__________________________________________________________
	if($char_opt =~/w/){ ## returning Smith-waterman score than E value
	   @keys = sort keys %final_table_interm_and_matched_Evalue;
	   for $key (@keys){
               if($key =~/\S+/){
                      printf ("\n%-30s Correct: %-50s",  $key, $final_table_interm_and_matched_Evalue{$key});
               }
	   }
	}else{
	   @keys = sort keys %final_table_interm_and_matched_score;
	   for $key (@keys){
               if($key =~/\S+/){
                      printf ("\n%-30s Correct: %-50s",  $key,  $final_table_interm_and_matched_score{$key});
               }
	   }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Writing the false positives
	#__________________________________
	@keys_false=sort keys %false_positive_matches;
	#print "\n\n# False positives: " if @keys_false > 0;
	for $key_false (@keys_false){
	   if($key_false =~/\S+/){
	      if($scop_bugs{$key_false}){
	          printf ("\n%-30s Correct: %-50s",  $key_false,  $false_positive_matches{$key_false});
	      }else{
			  printf ("\n%-30s Wrong:   %-50s",  $key_false,  $false_positive_matches{$key_false});
		  }
		  %scop_bugs=qw(d2kauc1=d2kauc  1 d1pkya2=d1pkya1 1 d1pbe_1=d1pbe_2 1
                                d1dih_1=d1dih_2 1 d2ohxa2=d2ohxa1 1 d1poxa3=d1pvda2 1
                                d1efga1=d1efga2 1 d1bct__=d1brd__ 1 d1qora1=d1qora2 1
                                d2ohxa1=d2ohxa2 1);
	   }
	}
	print "\n";

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Returning the hash result
	#_____________________________________
	if($char_opt =~/w/){ ## returning Smith-waterman score than E value
	   return(\%final_table_interm_and_matched_score);
	}else{
	   return(\%final_table_interm_and_matched_Evalue);
	}
}



#__________________________________________________________________
# Title     : write_iss_file
# Usage     : &write_iss_file(\%msp1, \%msp2);  ## for 2 msp_x file input
# Function  : writes the intermediate sequence search file.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : write_interm_seq_search_file
#             v  for showing the output in STDOUT
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Category  :
# Version   : 1.2
#---------------------------------------------------------------------------
sub write_iss_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my(%msp_1, %msp_2, %merged_1, %merged_2);

	 %msp_1=%{$hash[0]};
	 %msp_2=%{$hash[1]};

	 @msp1_keys=sort keys  %msp_1;
	 @msp2_keys=sort keys  %msp_2;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
	 # removing the empty matches and merging matches according to the enquiry seqs.
	 #  Following input will become;
	 # ..
	 # xxxxx
	 # xxxxx  YYYYY(xx)(yy)
	 # xxxxx  zzzzz(xx)(yy) ttttt(xx)(yy)
	 # ..
	 #  ->  xxxxx  'YYYYY(xx)(yy) zzzzz(xx)(yy) ttttt(xx)(yy)'
	 #____________________________________________________________________________________
	 for($i=0; $i< @msp1_keys; $i++){
             $enquiry_seq = $msp1_keys[$i];
             #my ($seq_name,  $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;

             #-- if $msp_1{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
             if($msp_1{$enquiry_seq}=~/\S+/){
                    ($seq_name, $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
                     $merged_msp1{$seq_name} .=$msp_1{$enquiry_seq};
             }else{
                    next;
             }
	 }
	 for($i=0; $i< @msp2_keys; $i++){
             $enquiry_seq = $msp2_keys[$i];

             #-- if $msp_2{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
             if($msp_2{$enquiry_seq}=~/\S+/){
                     $merged_msp2{$enquiry_seq} .=$msp_2{$enquiry_seq};
             }else{
                    next;
             }
	 }

	 @merged_msp1_keys=sort keys  %merged_msp1;
	 @merged_msp2_keys=sort keys  %merged_msp2;

	 for($i=0; $i< @merged_msp1_keys; $i++){
	  $enquiry_seq=$merged_msp1_keys[$i];
	  @intermediate_seqs=sort split(/ +/, $merged_msp1{$enquiry_seq});
	  for($j=0; $j< @intermediate_seqs; $j++){

		 $intermediate_seq=$intermediate_seqs[$j];

		 ($inter_seq_name, $sw_score, $evalue)=$intermediate_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
		 @final_matches=sort split(/ +/,  $merged_msp2{$inter_seq_name});
		 for($k=0; $k < @final_matches; $k ++){
		     $final_matched_seq = $final_matches[$k];
		     if($char_opt=~/v/){
			 printf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		     }
		     $final_out{$final_matched_seq}=
	             sprintf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		 }
		 #print "\n";
	  }
	  #print "\n";
	 }
	 #print "\n";
	 return(\%final_out);
}

#______________________________________________________________________________
# Title     : get_perl_keywords
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_perl_keywords{
    my(%perl_keywords);
    my @keywords=qw( AUTOLOAD BEGIN CORE DESTROY END abs accept alarm and atan2 bind binmode bless caller chdir chmod chop chown chr chroot
       close closedir cmp connect continue cos crypt dbmclose dbmopen defined delete die do dump each else elsif endgrent endhostent endnetent endprotoent endpwent endservent
       eof eq eval exec exit exp fcntl fileno flock for foreach fork format formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
       getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid getservbyname
       getservbyport getservent getsockname getsockopt glob gmtime goto grep gt hex if index int ioctl join keys kill last lc lcfirst le length link listen local localtime log lstat
       lt m mkdir msgctl msgget msgrcv msgsnd my ne next no not oct open opendir or ord pack package pipe pop print printf push q qq quotemeta qw qx rand read readdir readline
       readlink readpipe recv redo ref rename require reset return reverse rewinddir rindex rmdir s scalar seek seekdir select
       semctl semget semop send setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent setservent setsockopt shift
       shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split sprintf sqrt srand stat
       study sub substr symlink syscall sysread system syswrite tell telldir tie time times tr truncate uc ucfirst
       umask undef unless unlink unpack unshift untie until use utime values vec wait waitpid wantarray
       warn while write x xor y
     );
     foreach(@keywords){
        $perl_keywords{$_}=$_;
     }
     return(\%perl_keywords);
}




#______________________________________________________________________________
# Title     :  get_homology_info_of_seq_pairs
# Usage     :
# Function  :
# Example   : %seq_pair_homology_table=%{&get_homology_info_of_seq_pairs(\%pairs_excluded,
#                                         \%pdbg_hash_table)};
#
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_homology_info_of_seq_pairs{
   my($i, %pairs_to_be_checked,%seq_pairs_homology_table, @pairs,
      $homology_info, %pdbg_hash_table);
   for($i=0; $i< @_; $i++){
      my %in_hash=%{$_[$i]};
      my @seq_names=keys %in_hash;
      if($in_hash{$seq_names[0]}=~/^\S+[\t ]+\S+$/){
          %pairs_to_be_checked=%in_hash; %in_hash=();
      }elsif($in_hash{$seq_names[0]}=~/^\S+$/){
          %pdbg_hash_table=%in_hash; %in_hash=();
      }
   }

   @pairs=keys %pairs_to_be_checked;

   for($i=0; $i< @pairs; $i++){
      if($pairs[$i]=~/^(\S+)[\t ]+(\S+)/){
          $homology_info=${&check_homology_of_seq_pair(\$pairs[$i], \%pdbg_hash_table)};
          $seq_pairs_homology_table{$pairs[$i]}=$homology_info;
          print "\n#>> $pairs[$i] $homology_info" if $verbose;
      }
   }
   return(\%seq_pairs_homology_table);
}



#________________________________________________________________________________
# Title     : get_overlapping_seq_match_size
# Usage     : $ovlapsize=${&get_overlapping_seq_match_size($st1, $en1, $st2, $en2)
# Function  :
# Example   :
# Keywords  : CF: get_overlapping_range, get_overlapping_seq_match
# Options   :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub get_overlapping_seq_match_size{
    my($start1, $end1, $start2, $end2, $overlapping_region_matched);
    if(@_ == 4){
             $start1=$_[0]; $end1 =$_[1];  $start2=$_[2]; $end2  =$_[3];
    }elsif(@_==2){
         if( $_[0]=~/(\d+)\-(\d+)/ ){
                         $start1=$1;      $end1  =$2;
         }elsif($_[1]=~/(\d+)\-(\d+)/ ){
                         $start2=$1;      $end2  =$2;
         }else{
               print "\n# (ERROR) get_overlapping_seq_match_size: I need 2 or 4 arguments for regions\n";
               print "   They look like ($start1, $end1, $start2, $end2) or ('10-100', '20-211')\n";
               print "   You got it, Sarah?? Try again my dear!\n";
         }
    }else{
         print "\n# (ERROR) get_overlapping_seq_match_size: I need 2 or 4 arguments for regions\n";
         print "   They look like ($start1, $end1, $start2, $end2) or ('10-100', '20-211')\n";
         print "   You got it, Sarah?? Try again my dear!\n";
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #     ---------
    #  ------
    #___________________________________
    if($start1 >= $start2 and $end1 >= $end2){
             $overlapping_region_matched=$end2-$start1;
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # ---------
    #     ----------
    #___________________________________
    elsif($start1 <= $start2 and $end1 <= $end2){
             $overlapping_region_matched=$end1-$start2;
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #      -----
    #    ----------
    #___________________________________
    elsif($start1 >= $start2 and $end1 <= $end2){
             $overlapping_region_matched=$end1-$start1;
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #  ---------
    #    ----
    #___________________________________
    elsif($start1 <= $start2 and $end1 >= $end2){
             $overlapping_region_matched=$end2-$start2;
    }
    return(\$overlapping_region_matched);
}

#______________________________________________________________________________
# Title     : get_unix_shell_name
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu, On commercial use issue, Email me.
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_unix_shell_name{
    my($shell_env);
    if($ENV{'SHELL'}=~/\/(\w+)$/ or $ENV{'SHELL'}=~/(\w+)$/){
        $shell_env=$1
    }else{
        print "\n# (ERROR) SHELL env setting is not on, I can not give you SHELL type\n";
        exit;
    }
    return(\$shell_env);
}


#______________________________________________________________________________
# Title     : get_stat_FASTA_search_result_in_msp_0_files
# Usage     : &get_stat_FASTA_search_result_in_msp_0_files(\@file);
# Function  :
# Example   :
# Keywords  : get_stat00_result, get_stat_msp0_files, get_stat_single_search_result
# Options   :
#
#  $E_value= by e=
#  $verbose=v by v
#  $show_options=o by o
#  $step   =  by s=
#  $score_thresh1=   by t1=
#  $score_thresh2=   by t2=
#  $E_mult_factor1 = by m1=
#  $E_mult_factor2 = by m2=
#
# Category  : statistics, search, bio
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_stat_FASTA_search_result_in_msp_0_files{
		my($num_enq_seq, $pdbg_file, %input_file_base, $score_thresh1, $score_thresh2,
				$E_mult_factor1, $E_mult_factor2, @seqs, @pdbg_seqs, @MSP0, @array,
				$E_value, %final_stat_big_hash, @bases, $i, $j, $k);
		my $leng_thresh=10;
		$score_thresh1 = 73;
		my $simple_pdbg_read_opt='b';
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Arguments handling
		#_____________________________________
		for($i=0; $i< @_; $i++){
			 if($_[$i]=~/e=(\S+)/){          $E_value=$1;
					splice(@_, $i, 1); $i--;
			 }elsif(ref($_[$i]) eq 'ARRAY'){
					 @array=@{$_[$i]};
					 for($j=0; $j < @array; $j++){
							 if($array[$j]=~/(\S+)\.pdbg$/){         $input_file_base{$1}=$1;
							 }elsif($array[$j]=~/(\S+)\.msp_?0$/){   $input_file_base{$1}=$1;
							 }
					 }
			 }elsif($_[$i]=~/(\S+)\.pdbg$/){
					 $input_file_base{$1}=$1;
			 }elsif($_[$i]=~/(\S+)\.msp_?0$/){
					 $input_file_base{$1}=$1;
			 }elsif($_[$i]=~/m1=(\S+)/){          $E_mult_factor1=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/m2=(\S+)/){          $E_mult_factor2=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/t1=(\S+)/){          $score_thresh1=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/t2=(\S+)/){          $score_thresh2=$1;
					splice(@_, $i, 1); $i--;
			 }elsif($_[$i]=~/o=(\S+)/){          $show_options=$1;
					splice(@_, $i, 1); $i--;
			 }
		}
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Handling options
		#_________________________________
		if($E_value=~/^ *$/){
			 $E_value=5;
			 print "\n# WARNING: you did not set 'e=x.xxx' option default $E_value used\n";
		}
		if($E_mult_factor1 !~/\S/){  $E_mult_factor1 =1; }
		if($E_mult_factor2 !~/\S/){  $E_mult_factor2 =1; }
		if($show_options=~/o/){
			 print "\n#---- \$step          : $step";
			 print "\n#---- \$score_thresh1 : $score_thresh1";
			 print "\n#---- \$score_thresh2 : $score_thresh2\n";
		}


		@bases=keys %input_file_base;
		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Main loop
		#__________________________________________
		for($i=0; $i< @bases; $i++){
				my(%msp_0, %msp_00, $score, $evalue, $enquiry, @keys0, @keys2, $j,
					 %stat, %stat2, $sum_correct, $sum_false, @non_dup, $base, $pdbg_file,
					 @seqs, @pdbg_seqs, $msp_0_file, @MSP0, @keys, $k, %correct );
				$base=$bases[$i];
				$pdbg_file ="$base\.pdbg";
				$msp_0_file="$base\.msp0";
				print "\n# $base $pdbg_file $msp_0_file\n" if $verbose;

				unless(-s $pdbg_file or -s $msp_0_file){
					 print "\n", __LINE__, "# file is missing. I need xxx.pdbg, xxx.msp_0\n\n";
				}

				@seqs=@pdbg_seqs= keys %{&open_pdbg_files($pdbg_file, $simple_pdbg_read_opt)};

				if(@pdbg_seqs < 2){
					 print "\n# too little sequneces @pdbg_seqs $pdbg_file\n";
					 exit;
				}

				print "\n# Result of open_pdbg_files, \@seqs are        : @seqs \n" if $verbose;
				open(MSP0, "$msp_0_file");
				@MSP0=<MSP0>;
				close(MSP0);

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# opening each msp0 file
				#______________________________________
				for(@MSP0){
						if(/^(\S+) +(\S+) +\S* *(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
								# Length checking
								#__________________________________________________
								$seq_leng1=$4-$3;
								$seq_leng2=$7-$6;
								if($seq_leng1 < $leng_thresh or $seq_leng2 < $leng_thresh){
									 if($verbose){
											 print "\n# LENG $seq_leng1, $seq_leng2: $seq_leng1 $seq_leng2  $5 => $8 $1 $2 skipping\n";
											 next;
									 }
								}

								$score=$1;
								$evalue=$2;
								$enquiry=$5;
								$match_seq=$8;
								#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
								#  Evalue checking
								#__________________________________________________
								if($evalue > $E_value ){
										if($enquiry=~/^(\S+)_\d+\-\d+/){
											 $msp_0{"$1"} ="" unless $msp_0{"$1"};
											 next;
										}else{
											 $msp_0{"$enquiry"} ="" unless $msp_0{"$enquiry"};;
											 next;
										}
								}

								if($score < $score_thresh1){     next;     }
								if($enquiry=~/^(\S+)_\d+\-\d+/){
										$msp_0{"$1"} .="$match_seq ";
								}else{
										$msp_0{"$enquiry"} .="$match_seq ";
										$msp_00{join(' ', sort($enquiry, $match_seq))} = " $score $evalue";
								}
						}
				}
				%stat=%msp_0;

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				# filtering duplicates
				#____________________________________________
				@keys=keys %stat;
				for($k=0; $k< @keys; $k++){
						@split=split(/ +/,$stat{$keys[$k]});
						@non_dup=@{&remove_dup_in_array(\@split)};
						for($j=0; $j<@non_dup; $j++){
								if($non_dup[$j]=~/^ *$/){
										splice(@non_dup, $j, 1);       $j--;
										next;
								}
								if($non_dup[$j] eq $keys[$k]){
										splice(@non_dup, $j, 1);       $j--;
										next;
								}
						}
						$stat2{$keys[$k]}=join(' ', @non_dup);
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Showing the actual matched sequences
				# %stat has following contents
				#    d1ash__ d1bam__ d1mba__ d2lhb__
				#    d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
				#    d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
				#______________________________________________________________________________
				if($verbose=~/v/){
					 @keys= sort keys %stat2;
					 for($k=0; $k< @keys; $k++){
							print "$keys[$k]: $stat2{$keys[$k]}\n";
					 }
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Getting statistics
				#_________________________________________
				$evalue=$s;
				$E_mult_factor1=1;
				@output=@{&get_isearch_result_stat(\%stat2, \@pdbg_seqs, \$evalue,
									\$base, \$E_mult_factor1,  $leng_thresh, \%msp_00)};
				%correct=%{$output[3]};
				%final_stat_big_hash=(%final_stat_big_hash, %correct);
				if($verbose){
						@keys=sort keys %correct;
						for($k=0; $k< @keys; $k++){
							 print "$keys[$k] $correct{$keys[$k]}\n";
						}
				}
		}
		return(\%final_stat_big_hash);
}


#________________________________________________________________________________
# Title     : get_scop_correcting_pairs
# Usage     : %correct=%{&get_scop_correcting_pairs()};
# Function  :
# Example   :
# Keywords  : get_pdb_correcting_pairs , correct_pairs_in_scop, correct_homology_pairs
# Options   :
# Category  :
# Version   : 1.4
#--------------------------------------------------------------------------------
sub get_scop_correcting_pairs{
    my (%correcting_pairs, @correcting_pairs);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # %correcting_pairs is a correcting table for old pdb40d file classi
    #_____________________________________________________________________
    @correcting_pairs=(  # should be pairs
        'd2kauc1 d2kauc2', 'd1pkya1 d1pkya2',
        'd1pvda2 d1trka1', 'd1pbe_1 d1pbe_2',
        'd1poxa3 d1pvda2', 'd1efga1 d1efga2',
        'd1dsba1 d1dsba2', 'd2gsta1 d2gsta2',
        'd1bct__ d1brd__', 'd1qora1 d1qora2',
        'd2ohxa1 d2ohxa2', 'd1efga2 d1eft_1',
        'd1tada1 d1tada2', 'd1gsea1 d1gsea2',
        'd1gesa2 d2tmda3', 'd1lvl_2 d2tmda3',
        'd2tmda3 d2tpra2', 'd1tde_1 d2tmda3',
        'd1nhp_2 d2tmda3', 'd1gesa1 d2tmda3',
        'd1lvl_1 d2tmda3', 'd2tmda3 d2tpra1',
        'd1fcda1 d2tmda3', 'd1nhp_1 d2tmda3',
        'd1tde_2 d2tmda3', 'd1pbe_1 d2tmda3',
        'd1ebha1 d1ebha2', 'd1gesa2 d2dlda2', ## 3.4.1  with
        'd1gesa2 d1psda2', 'd1nhp_2 d2dlda2',
        'd1ldm_1 d1tde_2', 'd1coy_1 d1ldb_1',
        'd1lvl_2 d1psda2', 'd1psda2 d1tde_2',
        'd1hyha1 d1tde_2', 'd1fcda1 d1ldm_1',
        'd1hdca_ d1nhp_2', 'd1fcda1 d1hlpa1',
        'd1llda1 d1lvl_2', 'd2dlda2 d2tpra2',
        'd1ldm_1 d1nhp_2', 'd1llda1 d1pbe_1',
        'd1gdha2 d2tpra1', 'd1ldb_1 d1nhp_2',
        'd1gesa2 d1scua2', 'd1fcda1 d1hyha1',
        'd1gesa1 d1hlpa1', 'd1gdha2 d1gesa2',
        'd1lvl_2 d2dlda2', 'd1gesa1 d2dlda2',
        'd1nhp_2 d2ohxa2', 'd1tde_2 d2dlda2', # 3.4.1. with 3.18.1, 3.17.1.
        'd1nhp_1 d2cmd_1', 'd1fcda1 d1ldb_1',
        'd1lvl_1 d2ohxa2', 'd1nhp_2 d2naca2',
        'd1pbe_1 d2ohxa2', 'd1gdha2 d1nhp_2',
        'd2cmd_1 d2tpra1', 'd1tde_1 d2cmd_1',
        'd1llda1 d1nhp_2', 'd1hlpa1 d1nhp_2',
        'd1nhp_1 d2dlda2', 'd1hyha1 d1nhp_2',
        'd1nhp_2 d1psda2', 'd1fcda1 d2cmd_1',
        'd1fcda1 d1llda1', 'd1lvl_2 d1udpa_',
        'd1psda2 d2tpra2', 'd1hdca_ d1lvl_2',
        'd1gesa2 d1llda1', 'd1nhp_2 d1qora2',
        'd1ldm_1 d2tpra1', 'd1coy_1 d2dlda2',
        'd2dlda2 d2tpra1', 'd1hdca_ d1pbe_1',
        'd1coy_1 d1gdha2', 'd1nhp_2 d2cmd_1',
        'd1llda1 d1tde_1', 'd1llda1 d1lvl_1',
        'd1bdma1 d2tpra1', 'd1gd1o1 d2tpra2',
        'd1ldb_1 d1lvl_1', 'd1hlpa1 d1tde_2',
        'd1coy_1 d1psda2', 'd1nhp_2 d1udpa_',
        'd1llda1 d1tde_2', 'd1tde_2 d2cmd_1',
        'd1llda1 d2tpra2', 'd1ldb_1 d1tde_1',
        'd1coy_1 d1hlpa1', 'd1coy_1 d2cmd_1',
        'd1bdma1 d1gesa2', 'd1hyha1 d2tpra2',
        'd1gesa2 d1hyha1', 'd1gesa2 d2ohxa2',
        'd1ldb_1 d1tde_2', 'd1hlpa1 d1pbe_1',
        'd1ldm_1 d2tpra2', 'd2ohxa2 d2tpra1',
        'd1ldb_1 d2tpra2', 'd1gesa2 d1ldm_1',
        'd1lvl_2 d1qora2', 'd1gesa1 d2naca2',
        'd1coy_1 d1llda1', 'd1coy_1 d1hyha1',
        'd1coy_1 d1ldm_1', 'd1ldm_1 d1lvl_2',
        'd1eny__ d1nhp_2', 'd1pbe_1 d2pgd_2',
        'd1ldb_1 d1pbe_1', 'd1ldb_1 d1lvl_2',
        'd1gesa2 d1hlpa1', 'd1dhr__ d1nhp_2',
        'd1hdca_ d1tde_1', 'd1gesa1 d1psda2',
        'd1pbe_1 d2cmd_1', 'd1tde_2 d1udpa_',
        'd1pbe_1 d2dlda2', 'd1hdca_ d1tde_2',
        'd1gesa2 d1ldb_1', 'd1psda2 d2tpra1',
        'd1gdha2 d1lvl_2', 'd1tde_1 d2dlda2',
        'd1ldm_1 d1pbe_1', 'd1pbe_1 d1scua2',
        'd1gesa1 d2ohxa2', 'd1lvl_2 d2naca2',
        'd1gd1o1 d1lvl_1', 'd1fvl__ d1kst__',
        'd1kst__ d2ech__', 'd1hsaa2 d1std__', ## d1hsaa.. is NOT homol, but to fix a problem in E_100_e_0.0005_j30_segged_2092
        'd1afp__ d1hfi__'
        );


     for($i=0; $i< @correcting_pairs; $i++){
                     $correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
     }
     return(\%correcting_pairs);
}

#__________________________________________________________________
# Title     : get_isearch_result_stat
# Usage     : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function  :
# Example   : Following input (hash eg: %stat2, input with the first word as key)
#              will become columnar output.
#
#    d1ash__ d1bam__ d1mba__ d2lhb__
#    d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
#    d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
#
#    Will become:
#      ....
#      d1ash__ d2lhb__ Homolog: G1   98 0.012
#      d1baba_ d1flp__ Homolog: G1   82 0.072
#      d1baba_ d1hbg__ Homolog: G1   79 0.13
#      d1baba_ d2lhb__ Homolog: G1   228 8e-12
#      d1baba_ d3aaha_ Nomolog: G1   74 2
#      d1baba_ d3sdha_ Homolog: G1   92 0.012
#      d1cola_ d1hbg__ Nomolog: G1   79 0.59
#      d1cpca_ d1cpcb_ Homolog: G1   176 4.9e-08
#      ....
#
# Keywords  : get_stat_interm_search, get_intermediate_search_stat
# Options   : _  for debugging.
#             #  for debugging.
# Package   : Bio
# Reference : http://sonja.acad.cai.cam.ac.uk/perl_for_bio.html
# Returns   : [$av_correct, $num_enq_seq]
# Tips      :
# Argument  :
# Todo      :
# Author    : A Scientist
# Category  :
# Version   : 2.2
#-----------------------------------------------------------------------------
sub get_isearch_result_stat{
	my (@keys, $num_enq_seq, @pdbg_seqs_ori, $c, $d, $i, %correct_pairs,
	    $sum_correct, $sum_false, $match_seq, $percent_correct, $correct, @correct,
	    $av_correct, $av_false, $actual_e_value, $correct_matched,
	    %correcting_pairs, @correcting_pairs, %correct);

	my %seqs=%{$_[0]};
	my @pdbg_seqs=@{$_[1]};
	my $evalue=${$_[2]};
	my $pdbg_base=${$_[3]} || $ARGV[3];
	my $E_mult_factor1=${$_[4]};
	my $E_mult_factor2=${$_[4]};
        if(ref($_[5])){  $leng_thresh =${$_[5]}  }else{ $leng_thresh=$_[5]; }
	my %msp_0=%{$_[6]};
	my %msp_00=%{$_[7]};

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# %correcting_pairs is a correcting table for old pdb40d file classi
		#_____________________________________________________________________
		@correcting_pairs=(  # should be pairs
                                'd2kauc1 d2kauc2',              'd1pkya1 d1pkya2',
                                'd1pvda2 d1trka1',                              'd1pbe_1 d1pbe_2',
				'd1poxa3 d1pvda2',				'd1efga1 d1efga2',
				'd1dsba1 d1dsba2',				'd2gsta1 d2gsta2',
				'd1bct__ d1brd__',				'd1qora1 d1qora2',
				'd2ohxa1 d2ohxa2',				'd1efga2 d1eft_1',
				'd1tada1 d1tada2',				'd1gsea1 d1gsea2',
				'd1gesa2 d2tmda3',				'd1lvl_2 d2tmda3',
				'd2tmda3 d2tpra2',				'd1tde_1 d2tmda3',
				'd1nhp_2 d2tmda3',				'd1gesa1 d2tmda3',
				'd1lvl_1 d2tmda3',				'd2tmda3 d2tpra1',
				'd1fcda1 d2tmda3',				'd1nhp_1 d2tmda3',
				'd1tde_2 d2tmda3',				'd1pbe_1 d2tmda3',
				'd1ebha1 d1ebha2',				'd1gesa2 d2dlda2', ## 3.4.1  with 3.15.1
				'd1gesa2 d1psda2',				'd1nhp_2 d2dlda2',
				'd1ldm_1 d1tde_2',				'd1coy_1 d1ldb_1',
				'd1lvl_2 d1psda2',				'd1psda2 d1tde_2',
				'd1hyha1 d1tde_2',				'd1fcda1 d1ldm_1',
				'd1hdca_ d1nhp_2',				'd1fcda1 d1hlpa1',
				'd1llda1 d1lvl_2',				'd2dlda2 d2tpra2',
				'd1ldm_1 d1nhp_2',				'd1llda1 d1pbe_1',
				'd1gdha2 d2tpra1',				'd1ldb_1 d1nhp_2',
                                'd1gesa2 d1scua2',              'd1fcda1 d1hyha1',
                                'd1gesa1 d1hlpa1',              'd1gdha2 d1gesa2',
                                'd1lvl_2 d2dlda2',              'd1gesa1 d2dlda2',
                                'd1nhp_2 d2ohxa2',              'd1tde_2 d2dlda2',
                                'd1nhp_1 d2cmd_1',              'd1fcda1 d1ldb_1',
                                'd1lvl_1 d2ohxa2',              'd1nhp_2 d2naca2',
                                'd1pbe_1 d2ohxa2',              'd1gdha2 d1nhp_2',
                                'd2cmd_1 d2tpra1',              'd1tde_1 d2cmd_1',
                                'd1llda1 d1nhp_2',              'd1hlpa1 d1nhp_2',
                                'd1nhp_1 d2dlda2',              'd1hyha1 d1nhp_2',
                                'd1nhp_2 d1psda2',              'd1fcda1 d2cmd_1',
                                'd1fcda1 d1llda1',              'd1lvl_2 d1udpa_',
                                'd1psda2 d2tpra2',              'd1hdca_ d1lvl_2',
                                'd1gesa2 d1llda1',              'd1nhp_2 d1qora2',
                                'd1ldm_1 d2tpra1',              'd1coy_1 d2dlda2',
                                'd2dlda2 d2tpra1',              'd1hdca_ d1pbe_1',
                                'd1coy_1 d1gdha2',              'd1nhp_2 d2cmd_1',
                                'd1llda1 d1tde_1',              'd1llda1 d1lvl_1',
                                'd1bdma1 d2tpra1',              'd1gd1o1 d2tpra2',
                                'd1ldb_1 d1lvl_1',              'd1hlpa1 d1tde_2',
                                'd1coy_1 d1psda2',              'd1nhp_2 d1udpa_',
                                'd1llda1 d1tde_2',              'd1tde_2 d2cmd_1',
                                'd1llda1 d2tpra2',              'd1ldb_1 d1tde_1',
                                'd1coy_1 d1hlpa1',              'd1coy_1 d2cmd_1',
                                'd1bdma1 d1gesa2',              'd1hyha1 d2tpra2',
                                'd1gesa2 d1hyha1',              'd1gesa2 d2ohxa2',
                                'd1ldb_1 d1tde_2',              'd1hlpa1 d1pbe_1',
                                'd1ldm_1 d2tpra2',              'd2ohxa2 d2tpra1',
                                'd1ldb_1 d2tpra2',              'd1gesa2 d1ldm_1',
                                'd1lvl_2 d1qora2',              'd1gesa1 d2naca2',
                                'd1coy_1 d1llda1',              'd1coy_1 d1hyha1',
                                'd1coy_1 d1ldm_1',              'd1ldm_1 d1lvl_2',
                                'd1eny__ d1nhp_2',              'd1pbe_1 d2pgd_2',
                                'd1ldb_1 d1pbe_1',              'd1ldb_1 d1lvl_2',
                                'd1gesa2 d1hlpa1',              'd1dhr__ d1nhp_2',
                                'd1hdca_ d1tde_1',              'd1gesa1 d1psda2',
                                'd1pbe_1 d2cmd_1',              'd1tde_2 d1udpa_',
                                'd1pbe_1 d2dlda2',              'd1hdca_ d1tde_2',
                                'd1gesa2 d1ldb_1',              'd1psda2 d2tpra1',
                                'd1gdha2 d1lvl_2',              'd1tde_1 d2dlda2',
                                'd1ldm_1 d1pbe_1',              'd1pbe_1 d1scua2',
                                'd1gesa1 d2ohxa2',              'd1lvl_2 d2naca2',
                                'd1gd1o1 d1lvl_1'
                                );


		for($i=0; $i< @correcting_pairs; $i++){
				$correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
		}
	if($E_mult_factor1=~/^ *$/){ $E_mult_factor1=1; };


	@keys=sort keys %seqs;
	@keys=@{&strip_sequence_ranges(\@keys)};
	@keys=@{&remove_dup_in_array(\@keys)};
	@pdbg_seqs_ori=@pdbg_seqs;
	$num_enq_seq=@pdbg_seqs;
	print "\n# In get_isearch_result_stat: PDBG seqs $num_enq_seq \n=> @pdbg_seqs\n\n" if $verbose;

	#@pdbg_seqs=@{&strip_sequence_ranges(\@pdbg_seqs)};
	#@pdbg_seqs=@{&remove_dup_in_array(\@pdbg_seqs)};

	if($num_enq_seq < 2){ print "\n# \$num_enq_seq is less than 2 @pdbg_seqs $base\n"; exit; }

	for($c=0; $c < @keys; $c++){
	   my($enq_seq, $correct, $false_positive);
	   $num_of_matched=@match_seqs=split(/ +/, $seqs{$keys[$c]});
			 $enq_seq=$keys[$c];

	   for($d=0; $d< @match_seqs; $d++){
                my($correct_matched, @sorted);

                $match_seq=$match_seqs[$d];
                                      $sorted=join(' ', sort ($enq_seq, $match_seq) );

                for($i=0; $i< @pdbg_seqs; $i++){
                     if($match_seq =~/d?$pdbg_seqs[$i]/i or $correcting_pairs{$sorted} ){
                          print "\n# \$match_seq = $match_seq, \$pdbg_seqs $pdbg_seqs[$i] \$enq_seq: $enq_seq\n"  if $verbose;
                          $correct++;
                          $correct_matched=1;
                          unless($correct{$sorted}){
                                   $correct_group{$base} .="Homolog: $sorted $base  $msp_0{$sorted}\n";
                          }
                          $correct{$sorted} = "Homolog: $base  $msp_0{$sorted}";
                     }
                }
                if($correct_matched !=1){
                     $false_positive++;
                     unless( $correct{$sorted} ){
                          $correct_group{$base} .="Nomolog: $sorted $base  $msp_0{$sorted}\n";
                     }
                     $correct{$sorted} = "Nomolog: $base  $msp_0{$sorted}";
                }
	   }
           if(@match_seqs == 0){ @match_seqs=1; $percent_correct=0; }
	   $sum_correct += $correct;
	   $sum_false   += $false_positive;
	}
	$av_correct = $sum_correct/$num_enq_seq;
	$av_false   = $sum_false  /($num_enq_seq);

	#### $actual_e_value becomes whatever $E_mult_factor1 defined ~~~~~~~~~~~~
	if($E_mult_factor1 != 1){
	   $actual_e_value=$evalue * $E_mult_factor1;
	}elsif($E_mult_factor2 != 1){
	   $actual_e_value= $evalue * $E_mult_factor2;
	}else{ $actual_e_value=$evalue }

	$num_enq_seq--;
	$sum_correct_for_additional = $num_enq_seq+1;
	$match_count=$sum_correct_for_additional * $av_correct;
	#$sum_correct= $sum_correct_for_additional;
	if($verbose){
           printf ("%-10s %-12s %-13f %-13f %-7s %-7s %-7s %-7s %-4s\n", $pdbg_base,
		$actual_e_value, $av_correct, $av_false, $num_enq_seq,
		$sum_correct_for_additional, $sum_false, $match_count, $leng_thresh);
	}

	@correct_new=@{&remove_dup_in_array(\@correct_new)};
	for($i=0; $i< @correct_new; $i++){
	    print "\n# correct new: $correct_new[$i]" ;
	}
	$num_correct=$match_count/2;

	print "Num of non-reflective correcct:  $num_correct  Nomolog: $sum_false  \n\n" if $verbose;
	return([$av_correct, $sum_correct, $num_enq_seq, \%correct, \%correct_group]);
}



#__________________________________________________________________
# Title     : strip_sequence_ranges
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : remove_sequence_ranges, remove_sequence_name_ranges,
#             remove_ranges_in_sequences, strip_sequence_name_ranges,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub strip_sequence_ranges{
    my (@out, $i);
    my @in=@{$_[0]} or @in=@_;
    for($i=0; $i< @in; $i++){
        if($in[$i]=~/^(\S+)_\d+\-\d+/){
	     push(@out, $1);
	}else{
	     push(@out, $in[$i]);
        }
    }
    return(\@out);
}



#__________________________________________________________________________
# Title     : open_sequence_index_files
# Usage     : open_sequence_index_files(<indexfilename>, <sequencename>);
# Function  : returns seqname with its seek pos in fasta sequence db file.
# Example   : %index=%{&open_sequence_index_files(\@INDEX_FILE, \@input_seq_names)};
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : open_seq_index_files, open_seq_idx_files, open_idx_files,
#             get_sequence_index, get_seq_index, get_sequence_with_index
# Options   : _ or # for debugging
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------------------
sub open_sequence_index_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my( %final_index, %long_index);

    for($i=0; $i< @file; $i++){
        open(INDEX, "$file[$i]");
	while(<INDEX>){
           if(/^(\S+) +(\S+)$/){
		$long_index{$1}=$2;
	   }
	}
	for($j =0; $j < @string; $j++){ #<<<< @string has the sequence NAMEs >>>>
            if($input_seq_names[$j]=~/^(\S+)_\d+\-\d+/){
                   $seq_with_index{$input_seq_names[$j]}=$long_index{$1};
            }else{
                   $seq_with_index{$input_seq_names[$j]}=$long_index{$input_seq_names[$j]};
            }
	}

    }
    return(\%final_index);
}

#__________________________________________________________________
# Title     : do_intermediate_sequence_search
# Usage     : &do_intermediate_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_msp, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Function  :
# Example   : &do_intermediate_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_msp, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_msp to save mem
#             s  for the big single output (msp file output I mean)
#             o  for overwrite existing xxxx.fa files for search
#             c  for create SSO file (sequence search out file)
#             R  for adding ranges to the enquiry sequences as well.
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             a= for choosing either fasta or ssearch algorithm
#
# Returns   : the names of files created (xxxxx.msp, yyy.msp,,)
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------------------
sub do_intermediate_sequence_search{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@final_out, $add_range, $single_big_msp, $base_name, $create_sso, @nondup,
	   $Single_msp_out_file, %duplicate, $Evalue_thresh, $Score_thresh, @SSO, $sequence_DB,
	   @sso, @temp, $algorithm, $margin, $out_msp_file, @MSP, @final_msp_file_names_out,
	   $upper_expect_limit, $lower_expect_limit, $k_tuple, %seq_input, %MSP, $add_range_to_enquiry );
	my ($E_val) = 5;  ## default 5 <<<<<<<<<<<<<<<<<<<<<

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# DEFAULTS
	#________________________________________
	$k_tuple=1;
	$algorithm='fasta';
	$upper_expect_limit=10;
	$lower_expect_limit=0;
	$Score_thresh      =75;
	$margin            =0;
	$add_range         ='';
	$sequence_DB       =$ENV{'PDB40D_FASTA'};

	if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
	if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
	if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
	if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
	if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
	if($vars{'m'}=~/\d+/){ $margin             = $vars{'m'}            };
	if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
	if($vars{'s'}=~/\S+/){ $single_big_msp     = 's'                   };
	if($vars{'DB'}=~/\S+/){ $sequence_DB       = $vars{'DB'}           };
	if($vars{'File'}=~/\S+/){ $input_file_name = $vars{'File'}         };
	if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
	if($vars{'e'}         =~/\S+/){ $E_val     = $vars{'e'}            };

	if($char_opt=~/r/){    $add_range            = 'r' }
	if($char_opt=~/R/){    $add_range_to_enquiry = 'R'  }
	if($char_opt=~/c/){    $create_sso           = 'c' }
	if($char_opt=~/s/){    $single_big_msp       = 's'; print "\n# Single file opt is set\n"; }
	if($char_opt=~/m/){    $msp_directly_opt     = 'm' }
	if($char_opt=~/i/){    $do_intermediate_search   = 'i' }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	 #  When, you didn't use "DB=$XXX" and "File=$FXXX" format, first file input is DB etc
	 #_______________________________________________________________________________________
	 if($input_file_name=~/^$/){  $input_file_name=$file[1];
	  print "\n# You did not use \"File=\$XXX\" format\n"  };
	 if($sequence_DB=~/^$/){      $sequence_DB    =$file[0];
	  print "\n# You did not use \"DB=\$XXX\" format\n"   };
	 print "\n# Finished writing the enquiry fasta files from \%seq_input by write_fasta_seq_by_seq";
	 print "\n# I am in do_sequence_search sub, Target database used :  $sequence_DB \n";

	 my $base = ${&get_base_names($input_file_name)};
         $out_msp_file="$base\.msp";
	 @temp=`$algorithm -m 10 -H  -E $E_val $input_file_name $sequence_DB $k_tuple`;
	 if(@temp < 40){	  print "\n# There must be error , \@temp is too small\n\n";   }
	 my @msp_hashes_from_temp = @{&open_sso_files(\@temp, $add_range,
												"u=$upper_expect_limit",
												"l=$lower_expect_limit",
												$add_range_to_enquiry)};
	 my @msp_from_temp= values %{$msp_hashes_from_temp[0]};
	 $MSP{$out_msp_file} = \@msp_from_temp;
	 open(MSPOUT, ">$out_msp_file");
	 for($i=0; $i< @msp_from_temp; $i++){
             print MSPOUT $msp_from_temp[$i];
             print $msp_from_temp[$i];
	 }
	 close MSPOUT;
	 return(\$out_msp_file);
}

#____________________________________________________________________________________
# Title     : do_sequence_search
# Usage     : &do_sequence_search("Query_seqs=\%pdb_seq", "DB=$sequence_db_fasta",
#  		         "File=$ARGV[0]", $single_msp, $over_write,
# 	        	 "u=$upper_expect_limit", "l=$lower_expect_limit",
#       		 "k=$k_tuple", $No_processing );
# Function  : do FASTA, SSEARCH or BLASTPGP(psi-blast) search
# Example   : &do_sequence_search(\%pdb_seq, $owl_db_fasta, $ARGV[0], $single_msp, $over_write,
#                    "u=$upper_expect_limit", "l=$lower_expect_limit", "k=$k_tuple" );
#
# Keywords  : sequence_search
# Options   :
#             Query_seqs=  for enquiry sequences eg)  "Query_seqs=$ref_of_hash"
#             DB=   for target DB  "DB=$DB_used"
#             File= to get file base(root) name.  "File=$file[0]"
#             m  for MSP format directly from FASTA or Ssearch result than through sso_to_msp to save mem
#             s  for the big single output (msp file output I mean)
#             s= for the single big msp file name
#             o  for overwrite existing xxxx.fa files for search
#             c  for create SSO file (sequence search out file)
#             d  for very simple run and saving the result in xxxx.gz format in sub dir starting with one char
#             r  for reverse the query sequence
#             R  for attaching ranges of sequences
#             k= for k-tuple value. default is 1 (ori. FASTA prog. default is 2)
#             u= for $upper_expect_limit
#             l= for $lower_expect_limit
#             a= for choosing either fasta or ssearch algorithm
#             d= for defining the size of subdir made. 2 means it creates
#                    eg, DE while 1 makes D
#             d  for $make_gz_in_sub_dir_opt, putting resultant sso files in gz format and in single char subdir
#             D  for $make_msp_in_sub_dir_opt, convert sso to msp and put in sub dir like /D/, /S/
#             n  for new format to create new msp file format with sso_to_msp routine
#          PVM=  for PVM run of FASTA (FASTA only)
#             M  for machine readable format -m 10 option
#             M= for machine readable format -m 10 option
#             N  for 'NO' do not do any processing but, do the searches only.
#       FILE_AGE for defining the age of file in days to be overwritten.
#             L  for Lean output(removes xxxx.fa query seq file)
#
# Returns   : the names of files created (xxxxx.msp, yyy.msp,,)
# Version   : 5.4
#----------------------------------------------------------------------------------------
sub do_sequence_search{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (@final_out, $add_range, $single_big_msp, $base_name, $create_sso, @nondup,
       $Single_msp_out_file, %duplicate, $Evalue_thresh, $Score_thresh, @SSO, $sequence_DB,
       @sso, @temp, $algorithm, $margin, $out_msp_file, @MSP, @final_msp_file_names_out,
       $upper_expect_limit, $lower_expect_limit, $k_tuple, %seq_input, %MSP, $No_processing,
       $new_format, $PVM_FASTA_run, $over_write, $sub_dir_size, $age_in_days_of_out_file,
       $over_write_by_age, $Lean_output, $gzipped_msp_file, $gzipped_sso_file,
       $defined_all_ok, $make_msp_in_sub_dir_opt );
    my ($E_val) = 5;  ## default 5 <<<<<<<<<<<<<<<<<<<<<

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # DEFAULTS
    #________________________________________
    $k_tuple           =1;  # 1 or 2, 1 is more sensitive
    $algorithm         ='fasta';
    $sub_dir_size      =2;  # the default char number taken from seq name to make sub dirs
    $upper_expect_limit=1;
    $lower_expect_limit=0;
    $Score_thresh      =75; # FASTA or SSSEARCH score
    $margin            =0;  # sequence region margin. If it is 2, 2 more edged residues will be added
    $add_range         ='';
    $pwd               =`pwd`; chomp($pwd);
    $age_in_days_of_out_file=1000; ## If the files being tested is older than this, let's make anew

    if($vars{'a'}=~/\S+/){ $algorithm          = $vars{'a'}            };
    if($vars{'u'}=~/\d+/){ $upper_expect_limit = $vars{'u'}            };
    if($vars{'l'}=~/\d+/){ $lower_expect_limit = $vars{'l'}            };
    if($vars{'k'}=~/\d+/){ $k_tuple            = $vars{'k'}            };
    if($vars{'t'}=~/\d+/){ $Score_thresh       = $vars{'t'}            };
    if($vars{'m'}=~/\d+/){ $margin             = $vars{'m'}            };
    if($vars{'d'}=~/\d+/){ $sub_dir_size       = $vars{'d'}            };
    if($vars{'r'}=~/\S+/){ $add_range          = 'r'                   };
    if($vars{'s'}=~/\S+/){ $single_big_msp     = 's'                   };
    if($vars{'DB'}=~/\S+/){            $sequence_DB=$vars{'DB'} ;
        if(-s $sequence_DB){
        }elsif(-s "../$sequence_DB"){  $sequence_DB= "../$sequence_DB"
        }elsif(-s "../../$sequence_DB"){  $sequence_DB= "../../$sequence_DB";
        }else{
            print "\n# (E) do_sequence_search: You set DB param, but I can\'t find $sequence_DB\n";
            exit;
        }
    }else{  print "\n# (E) do_sequence_search: I need DB param defined, sorry, aborting\n"; }

    if($vars{'FILE'}=~/\S+/){ $input_file_name = $vars{'FILE'}; push(@file,$input_file_name) };
    if($vars{'File'}=~/\S+/){ $input_file_name = $vars{'File'}; push(@file,$input_file_name) };
    if($vars{'FILE_AGE'}=~/\S+/){ $age_in_days_of_out_file= $vars{'FILE_AGE'};  };
    if($vars{'Query_seqs'}=~/\S+/){ %seq_input = %{$vars{'Query_seqs'}}};
    if($vars{'Query'}=~/\S+/){      %seq_input = %{$vars{'Query'}}};
    if($vars{'u'}    =~/\S+/){ $E_val          = $vars{'u'}            };
    if($vars{'PVM'}  =~/\S+/){ $PVM_FASTA_run  = $vars{'PVM'}; print "\n# PVM opt is set\n";     };
    if($vars{'M'}  =~/\S+/){ $machine_readable = $vars{'M'};           };

    if($char_opt=~/r/){    $add_range          = 'r' }
    if($char_opt=~/L/){    $Lean_output        = 'L' }
    if($char_opt=~/o/){    $over_write         = 'o' }
    if($char_opt=~/c/){    $create_sso         = 'c' }
    if($char_opt=~/s/){    $single_big_msp     = 's'; print "\n# Single file opt is set\n"; }
    if($char_opt=~/m/){    $msp_directly_opt   = 'm' }
    if($char_opt=~/M/){    $machine_readable   = 'M' }
    if($char_opt=~/d/){    $save_in_gz_in_sub_dir  = 'd' }
    if($char_opt=~/D/){$make_msp_in_sub_dir_opt= 'D' } # for simple search and storing msp file
    if($char_opt=~/N/){    $No_processing      = 'N'; $create_sso='c'; }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
    # When no %seq is given, but files
    #___________________________________________
    if(@hash==0 and @file > 0){
       print "\n# (i) do_sequence_search: You did not put sequences as in \%seq, but raw sequence file @file!\n";
       print "        I will run \'open_fasta_files\' sub to fetch sequences to store in \%seq_input\n";
       %seq_input=%{&open_fasta_files(\@file)};
    }else{
       print "\n# (i) do_sequence_search: I will use given seqs in \%seq_input from \%\{\$hash\[0\]\}\n";
       %seq_input=%{$hash[0]};
    }
    my (@seq_names) = keys %seq_input;
    $base_name = ${&get_base_names($input_file_name)};
    print "\n# (i) line:",__LINE__, ", do_sequence_search, \$algorithm => $algorithm, \$base_name:$base_name
                              $input_file_name <--> $sequence_DB\n";

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
   # If one of the files is missing, ask if it is for self self search and
   #  make query=target, else die
   #_______________________________________________________________________
   $defined_all_ok=&check_if_defined($input_file_name, $sequence_DB);
   unless($defined_all_ok){
       print "\n  Did you want to do self self search? ->(y/n) ";
       $answer_for_self_self=getc;
       if($answer_for_self_self =~/y/i){
           if($input_file_name){  $sequence_DB=$input_file_name }
           else{ $input_file_name=$sequence_DB }
       }else{
           print "\n# You seemed made a mistake, O.K., I will kill myself!\n\n";
           print chr(7);  exit;
       }
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # (0) If blast is chosen run Blast
   #_________________________________________________________
   if($algorithm=~/[psi\-]*[pb][last]*/i){
      print "\n# (i) Doing PSI search with @file\n";
      @final_out=@{&do_psi_blast_search(\@file, "d=$source_DB_file",
                           "i=$input_seq_file",  $over_write,
                           $make_msp_in_sub_dir_opt, $Lean_output)};
      return(\@final_out); #<<<<<<----------- F I N I S H
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # (1) Controlling which kind of search it should do. Do save_in_gz_in_sub_dir first if d is set
   #______________________________________________________________________________________________
   if( $make_msp_in_sub_dir_opt ){  ## convert sso to msp and put in sub dir like /D/, /S/

       for($x=0; $x < @seq_names; $x++){
          my ($over_write_sso_by_age, $over_write_msp_by_age,  %single_seq,
              $out_file_sso_gz_name, $out_file_msp_name, $out_file_gz_name, $existing_sso);
          my ($seq_name, $seq)= ($seq_names[$x], $seq_input{$seq_names[$x]});
          my $first_char= substr("\U$seq_name", 0, $sub_dir_size);
          mkdir ("$first_char", 0777) unless -d $first_char;
          chdir("$first_char");
          print "\n# (i) do_sequence_search: You set \'d\' or \'D\' opt\n";
          print "# (i) making subDIRs ($first_char) with $seq_name $sequence_DB to store MSP files\n";

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Let's make each fasta file for each seq to be used in searching
          #_____________________________________________________________________
          my $temp_file_name="$seq_name.fa";
          %single_seq=($seq_name, $seq_input{$seq_name});
          &write_fasta(\%single_seq, $temp_file_name ); ## e for writing each file

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Making output file name according to the option given
          #_______________________________________________________________________
          if($machine_readable and $algorithm=~/[fastassearch]+/){
                 $out_file_sso_name="$seq_name\.msso";
          }else{ $out_file_sso_name="$seq_name\.sso";      }
          $out_file_sso_gz_name="$out_file_sso_name\.gz";
          $out_file_msp_name="$seq_name\.msp";
          $out_file_gz_name="$seq_name\.msp\.gz";

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Check if SSO file already there
          #_______________________________________________________________________
          if(-s $out_sso_file){ $existing_sso=$out_file_sso_name }
          elsif(-s $out_sso_gz_name){ $existing_sso=$out_file_sso_gz_name }
          if(-s $out_msp_name){ $existing_msp=$out_file_msp_name }
          elsif(-s $out_gz_name){ $existing_msp=$out_file_gz_name }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # If the dates of files created are long ago, overwrite to refresh
          #____________________________________________________________________
          if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
               $over_write_sso_by_age='o';
          }
          if(  (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
               $over_write_msp_by_age='o';
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          #  To check if the target seq DB is in ../
          #________________________________________________
          if(-s $sequence_DB){
              print "\n# (i) Good, target \$sequence_DB $sequence_DB is in this working dir\n";
          }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB"; }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # (2) Searching: Making MSP files directly,  MSP file format is the major format used in geanfammer!
          #_________________________________________________________________________________________
          if($char_opt =~/D/){ #### To make MSP file
               if( !(-s $out_file_gz_name or -s $out_file_msp_name) or $over_write or $over_write_msp_by_age){
                    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                    # (2.1) Running  run_fasta_sequence_search !!
                    #_______________________________________________________
                    print "\n# (i) Running  run_fasta_sequence_search !!\n";
                    $gzipped_msp_file=${&run_fasta_sequence_search(
                                       "a=$algorithm",
                                       "O=$out_file_msp_name",
                                       "File=$temp_file_name", "e=$E_val",
                                       "DB=$sequence_DB", "k=$k_tuple", "$machine_readable")};
                    $gzipped_sso_file=${&compress_files_by_gzip($out_file_sso_name)};
               }else{
                   print "\n#  Line No. ", __LINE__,", $out_file_gz_name already exists or
                                                  \$over_write is set or NOT older than $age_in_days_of_out_file\n";
               }
          }
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # To make gzipped SSO files and MSP files
          #_______________________________________________
          elsif($create_sso or $char_opt=~/m/){ ### To make gzipped SSO files
               if( !(-s $out_file_sso_name or -s $out_file_sso_gz_name ) or $over_write or $over_write_sso_by_age){
                   print "\n# (i) Running  run_fasta_sequence_search with \"\$create_sso option\"!!\n\n";
                   $gzipped_msp_file=${&run_fasta_sequence_search(
                                       "a=$algorithm",
                                       "O=$out_file_msp_name", "$create_sso",
                                       "File=$temp_file_name", "e=$E_val",
                                       "DB=$sequence_DB", "k=$k_tuple", "$machine_readable")};

                   $gzipped_sso_file=${&compress_files_by_gzip($out_file_sso_name)};
               }else{
                   print "\n#  Line No. ", __LINE__,", $out_file_gz_name already exists or
                                                  \$over_write is set or NOT older than $age_in_days_of_out_file\n";
               }
          }else{
               if( !(-s $out_file_sso_name or -s $out_file_sso_gz_name ) or $over_write or $over_write_sso_by_age){
                   system(" $algorithm -m 10 -H  -E $E_val $temp_file_name $sequence_DB $k_tuple > $out_file_sso_name");
                   system("gzip $out_file_sso_name");
               }else{
                   print "\n#  Line No. ", __LINE__,", $out_file_gz_name already exists or
                                                  \$over_write is set or NOT older than $age_in_days_of_out_file\n";
               }
          }
          unlink("$seq_name.fa") if -s "$seq_name.fa";
          unlink("$first_char/$seq_name\.fa") if -s "$first_char/$seq_name\.fa";
          print "\n# Sub dir $first_char and $seq_name\.msp has been made, finishing do_sequence_search\n";
          chdir ('..');
      }
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # F I N I S H
      #________________________________________
      goto EXIT;
   } # if ($char_opt =~/[dD]/){  is finished


   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # (2) Writing on PWD. This is the big single MSP output
   #____________________________________________________________
   $Single_msp_out_file="$base_name\.msp" if($single_big_msp eq 's');
   if(-s $Single_msp_out_file and !$over_write ){
       print "\n# (i) $Single_msp_out_file exists, and no \$over_write is set, skipping \n";
       push(@final_out, $Single_msp_out_file);
   }else{  $over_write  ='o';  }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # Check if it is necessary to write each sequences.fa files
   #______________________________________________________
   if( $over_write ){  &write_fasta_seq_by_seq(\%seq_input, 'e'); } ## e for writing each seq file

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
   #  When, you didn't use "DB=$XXX" and "File=$FXXX" format, first file input is DB etc
   #_______________________________________________________________________________________
   $defined_all_ok=&check_if_defined($input_file_name, $sequence_DB);
   if(!$defined_all_ok){ print "\n# (E) FATAL: do_sequence_search: You did not use \"DB=\$XXX\" format\n"; exit   };

   print "\n# Finished writing the enquiry fasta files from \%seq_input by write_fasta";
   print "\n# I am in do_sequence_search sub, Target database used :  $sequence_DB with seqs of \'@seq_names\'\n";


   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  Main search with given @seq_names
   #______________________________________________________________
   for($j=0; $j< @seq_names; $j++){  # @seq_names has sequence names coming from  (@seq_names) = keys %seq_input;
       my ($over_write_sso_by_age, @temp, $existing_sso, $out_gz_name,
           $over_write_msp_by_age, $existing_msp, $out_msp_file, $seq_name);
       $seq_name=$seq_names[$j];
       $each_seq_fasta="$seq_name\.fa";
       $out_msp_file="$seq_name\.msp";
       $out_gz_name="$seq_name\.msp\.gz";
       $out_msso_file="$seq_name\.msso";

       &die_if_file_not_present($each_seq_fasta);

       print "\n# (i) :-) Found $each_seq_fasta is searched against $sequence_DB\n";
       if($algorithm=~/fasta/){       $out_sso_file="$seq_name\.fsso";
       }elsif($algorithm=~/ssearch/){ $out_sso_file="$seq_name\.ssso"; }
       $out_sso_gz_name="$out_sso_name\.gz";

       if(-s $out_sso_file){ $existing_sso=$out_sso_file }
       elsif(-s $out_sso_gz_name){ $existing_sso=$out_sso_gz_name }
       if(-s $out_msp_file){ $existing_msp=$out_msp_file }
       elsif(-s $out_gz_name){ $existing_msp=$out_gz_name }
       if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
            $over_write_sso_by_age='o';
       }
       if(  (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
            $over_write_msp_by_age='o';
       }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #  To check if the target seq DB is in ../
       #________________________________________________
       if(-s $sequence_DB){ print "\n# (i) \$sequence_DB $sequence_DB exists, Good\n";
       }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB";
       }elsif( -s "../../$sequence_DB"){ $sequence_DB="../../$sequence_DB"; }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # If MSP file already exist
       #_____________________________________________________________
       if( -s $out_msp_file and !$over_write_msp_by_age and !$over_write ){
            print "\n# (i) File: $out_msp_file exists, skipping, to overwrite use \'o\' opt or set days";
            push(@final_out, $out_msp_file);
       }else{  ## -E is for e value cutoff. -b is for num of seq fetched
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~
           #  K-tuple is 1 by default. If xxxx.sso exsts, skip running fasta or ssearch
           #________________________________________________________________________________
           if(-s $out_sso_file and !$over_write ){ ## If SSO is already present, JUST READ IT!
                print "\n# (i) Just opening existing $out_sso_file $out_sso_file $out_msp_file $over_write_msp_by_age $over_write\n";
                open(SSO_ALREADY, "$out_sso_file");
                @temp=<SSO_ALREADY>;
                print "\n# (i) \@temp has ", scalar(@temp), " lines\n";
                close(SSO_ALREADY);
                &compress_files_by_gzip($out_sso_file);
           }else{ ## Run FASTA HERE
                print "\n# (i) Running \"run_fasta_sequence_search\" ";
                $gzipped_msp_file=${&run_fasta_sequence_search(
                                   "a=$algorithm",
                                   "O=$out_msp_file", "$create_sso",
                                   "File=$each_seq_fasta", "e=$E_val",
                                   "DB=$sequence_DB", "k=$k_tuple", "$machine_readable")};
                push(@final_out, $gzipped_msp_file) if -s $gzipped_msp_file ;
                unlink($each_seq_fasta) if $Lean_output;
           }
       }
       if($machine_readable and $create_sso and -s $out_sso_file){ &cp($out_sso_file, $out_msso_file); }
   } # end of for($j=0; $j< @seq_names; $j++){
   return(\@final_out);
   EXIT:

} # do_sequence_search



#__________________________________________________________________________
# Title     : do_hmm_sequence_search
# Usage     : &do_hmm_sequence_search(\@file, "method=$default_search_method",
#								$over_write, "DB=$pdbd40_seq_fasta");
#
# Function  : does hmm sequence search using Sean Eddy's HMMER (hmmls, hmmfs)
# Example   :
# Keywords  : do_seq_search_with_hmm, do_hmmt_sequence_search
# Options   :
#    "method=ls"  for turning hmmls search option on (default)
#    "method=fs"  for turning hmmfs search option on
#    method= by method=
#   o  for overwriting existint xxxxx.hmm files
#   E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
#   t=20  for score thresh at the level of hmmls. Default of hmmls is 0. example showed has 15
#   $evalue_cutoff= by e=
#   $over_write = o by -o o
# Returns   :
# Argument  :
# Version   : 1.6
#----------------------------------------------------------------------------
sub do_hmm_sequence_search{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($version_number, @out_hmm_file_names, $evalue_cutoff);
    my $score_thresh=5; # default threshold

    $evalue_cutoff=3;
    $default_search_method='hmmsearch';
    $version_number=2; ## default

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # checking the existence of hmm search file
    #_____________________________________________________
    if(&check_file_exists_in_path("hmmsearch")){
        $default_search_method='hmmsearch';
        $version_number=2;
    }elsif(&check_file_exists_in_path("hmmls")){
        $default_search_method='hmmls';
        $version_number=1;
    }else{
        print "\n# (ERROR) $0 can not find hmmsearch or hmmls, Please put them in the PATH\n\n";
        if($vars{'method'}=~/ls/){
        }elsif( $vars{'method'}=~/fs/){ $default_search_method='hmmfs';
        }else{ exit; }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
    # Checking the target DB file
    #______________________________________________________________
    if( $vars{'DB'} =~/\S/ and -s $vars{'DB'}){  $target_DB = $vars{'DB'}
    }else{  print "\n# (WARN) I need target DB to search for hmmls-fs. Use: DB=xxxx.fa form";
        print "\n#     or Default PDB40D_FASTA ENV setting will be used for DB\n";
        $target_DB= $ENV{'PDBD40_SEQ_FASTA'};
        unless(-s $target_DB){
           print "\n# (ERROR) Even the default DB setting $target_DB does not exist, check path/file\n\n";
           exit;
        }
    }
    if($vars{'E'}=~/\S/){ $enquiry_name        =$vars{'E'} }
    if($vars{'t'}=~/\S/){ $score_thresh        =$vars{'t'} }
    if($vars{'e'}=~/\S/){ $evalue_cutoff       =$vars{'e'} }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # running hmm with @file (hmmb files) against target_DB
    #____________________________________________________________
    for($i=0; $i< @file; $i++){
        print "\n# Running do_hmm_sequence_search with $file[$i], options are: $char_opt\n";
        if($vars{'E'}=~/\S/){ $base=$enquiry_name;   # When $enquiry_name is given, it uses for output name
        }else{
            $base=${&get_base_names($file[$i])};
        }

        if($default_search_method=~/hmmfs/){
            $output_hmm_result = "$base\.hmmfs";
        }elsif($default_search_method=~/hmmsearch/ or $default_search_method=~/hmmls/){
            $output_hmm_result = "$base\.hmmls";
        }
        if($char_opt=~/o/ or !(-s $output_hmm_result) ){
            if($version_number==2){
                print "Running: $default_search_method -T $score_thresh -E $evalue_cutoff $file[$i] $target_DB \> $output_hmm_result\n";
                system("$default_search_method -T $score_thresh -E $evalue_cutoff $file[$i] $target_DB > $output_hmm_result");
            }else{
                print "Running: $default_search_method -t $score_thresh $file[$i] $target_DB \> $output_hmm_result\n";
                system("$default_search_method -t $score_thresh $file[$i] $target_DB > $output_hmm_result");
            }
        }else{
            print "\n# The $out_hmm_file file already exists. To overwrite use -o opt\n";
        }
        push(@out_hmm_file_names, $output_hmm_result);
    }
    if(@out_hmm_file_names > 1){
       return(\@out_hmm_file_names);
    }else{
       return(\$out_hmm_file_names[0]);
    }
}



#_______________________________________________________________________
# Title     : divide_clusters
# Usage     : &divide_clusters(\@file);
# Function  : This is the main funciton for divclus.pl
#               divides complex single linkage cluster into smaller duplication
#               module level sub clusters.
# Example   : &divide_clusters(\@file, $verbose, $range, $merge, $sat_file,
# 	                $dindom, $indup, "T=$length_thresh", "e=$evalue", $over_write,
#                   $optimize, "s=$score", "f=$factor");
#
# Keywords  : divicl, divclus, div_clus, divide clusters
# Options   : _  for debugging.
#   f=<digit>   for determing the factor in filtering out non-homologous
#                  regions, 7 = 70% now!!
#   l=<digit>   for seqlet(duplication module) length threshold
#   t=<digit>   for seqlet(duplication module) length threshold
#                  (same as l opt, confusing, huh? )
#   s=<digit>   for score threshold
#   e=<digit>   for evalue threshold
#   z           for activating remove_similar_sequences, rather than remove_dup....
#   o           for overwriting
#   v           for verbose printout (infor)
#   D           for dynamic factor
#   S  $short_region=  S by S -S  # taking shorter region overlap in removing similar reg
#   L  $large_region=  L by L -L  # taking larger  region overlap in removing similar reg
#   A  $average_region=A by A -A  # taking average region overlap in removing similar reg
#   o  for $over_write
#
# Version   : 2.9
#------------------------------------------------------------------------
sub divide_clusters{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my($merge, $verbose, $sat_file, $length_thresh, $factor, $indup, $indup_percent,
       $score, @temp_show_sub, $optimize, $file, $evalue, $over_write, $din_dom,
       $sum_seq_num, $base_1, $output_clu_file, $short_region, $large_region,
       $average_region, $dynamic_factor, @sub_clustering_clu_files);

    $factor=7; # default factor is 7 for 70%
    $length_thresh=30;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Dealing with options
    #_________________________________________
    if($char_opt=~/m/){        $merge='m';
    }if($char_opt=~/v/){       $verbose='v'; # for showing debugging information
    }if($char_opt=~/i/){       $indup='i';
    }if($char_opt=~/z/){       $optimize='z';
    }if($char_opt=~/o/){       $over_write='o';
    }if($char_opt=~/d/){       $din_dom='d';
    }if($char_opt=~/s/){       $sat_file='s';
    }if($char_opt=~/y/){       $dynamic_factor='y';
    }if($char_opt=~/S/){       $short_region  ='S';
    }if($char_opt=~/L/){       $large_region  ='L';
    }if($char_opt=~/A/){       $average_region='A';
    }if($vars{'T'}=~/\d+/){    $length_thresh= $vars{'T'};
    }if($vars{'l'}=~/\d+/){    $length_thresh= $vars{'l'}; ## synonym of 't'
    }if($vars{'f'}=~/\S+/){    $factor= $vars{'f'};
    }if($vars{'s'}=~/\d+/){    $score = $vars{'s'};
    }if($vars{'e'}=~/\d+/){    $evalue= $vars{'e'};
    }if($vars{'E'}=~/\d+/){    $evalue= $vars{'E'}; # synonym of e
    }

   $percent_fac=$factor*10;

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # (0) When one file input was given (yes, divclus can handle multiple files, Sarah!)
   #________________________________________________________________________________
   if(@file == 1){  #<=== @file has xxxx.msp, yyyy.msp  zzzz.msp ....,
        print "\n# (1) divide_clusters: One single file was given=> \"@file\"\n" if $verbose;
        $file=$file[0];
        $base_1=${&get_base_names($file)};

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (2) Define the output cluster file name:  eg, 3-232_cluster_F7.clu , F7 means factor used is 7
        #______________________________________________________________________________________________
        $output_clu_file="$base_1\_F${factor}\.clu";

        if( !$over_write and -s $output_clu_file){
            print "\n# $output_clu_file Already EXISTS, skipping. Use \'o\' opt to overwrite\n"; exit;
        }
        print "# (2) divide_clusters: processing ONE single file \"@file\" with merge_sequence_in_msp_file\n" if $verbose;

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (3) merge_sequence_in_msp_file does not do much. Just filtering and producing
        #     sequences in ISPA_PBS_21-215 VPR_PBS_160-354 format from msp format
        #________________________________________________________________________________
        @grouped_seq_names=@{&merge_sequence_in_msp_file(\@file, "s=$score", $optimize, $din_dom, $sat_file,
                $optimize, "T=$length_thresh", "e=$evalue", "f=$factor", "$range", "$merge", $verbose,
                $short_region, $large_region, $average_region, $over_write, $dynamic_factor)};

        if($verbose){
            print "\n\n# (3) divide_clusters: finished running \"merge_sequence_in_msp_file\"\n  ==>";
            for($i=0; $i< @grouped_seq_names; $i++){
               print "\n-->> $grouped_seq_names[$i]";
            }
        }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (4) This is critical seqlet merging step
        #________________________________________________________________________________
        @out=@{&cluster_merged_seqlet_sets(\@grouped_seq_names, $dynamic_factor,
               "f=$factor", $short_region, $large_region, $average_region, $optimize)};

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (5) This is showing the result in clu file format
        #________________________________________________________________________________
        @temp_show_sub=&show_subclusterings(\@out, $file, $sat_file, $dindom, $indup,
						   "e=$evalue", "p=$percent_fac", "f=$factor" );
        $good_bad       = $temp_show_sub[0];
        $indup_c        = $temp_show_sub[1];
        $sum_seq_num   += $temp_show_sub[2];
        push(@sub_clustering_out_files, @{$temp_show_sub[3]});

        if($good_bad==1){      push(@good, $file);
        }else{                 push(@bad, $file);       }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (6) Final write up stage (unecessary)
        #_______________________________________________________________
	&write_good_bad_list_in_divide_clusters(\@good, \@bad);

   }
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # when more than one single file input is given
   #____________________________________________________________
   elsif(@file >1 ){
       my (@good, @bad);
       if($indup =~/i/i){   open (INDUP, ">indup_stat\.txt");  } # this is not essential.

       for($i=0; $i< @file; $i++){
            my (@grouped_seq_names, @temp_show_sub);
            my $indup_c=0;
            my $big_msp_file=$file[$i];
            unless(-s $big_msp_file){ print "\n# (E) \$big_msp_file does not exist\n"; exit }

            $base_1=${&get_base_names($big_msp_file)};
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (1) Define the output cluster file name:  eg, 3-232_cluster_F7.clu , F7 means factor used is 7
            #______________________________________________________________________________________________
            $output_clu_file="$base_1\_F${factor}\.clu";

            print "\n# DIVCLUS: just before merge_sequence_in_msp_file, \$output_clu_file is $output_clu_file from input file $big_msp_file" if $verbose;
            if( !$over_write and -s $output_clu_file){
                print "\n# $output_clu_file Already EXISTS, skipping. Use \'w\' opt to overwrite\n";
                next;  }

            print "\n# (1)  divide_clusters: processing file \"$big_msp_file\" for $output_clu_file" if $verbose;

            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            #  (2) If clu file(eg 2-1618_ss.clu ) is in pwd, tries to skip
            #____________________________________________________________
            if((-s $output_clu_file) > 10 and $over_write !~/o/){
                print "# $output_clu_file exists, skipping, use \"o\" option to overwrite\n";  next;
            }

            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # (3) merge_sequence_in_msp_file does not do much. Just filtering and producing
            #     sequences in ISPA_PBS_21-215 VPR_PBS_160-354 format of STRING from msp format
            #     $big_msp_file is an MSP file
            #________________________________________________________________________________
            print "\n# (i) divide_clusters : I am merging seq in $big_msp_file file for $output_clu_file\n" if $verbose;
            @grouped_seq_names=@{&merge_sequence_in_msp_file(\$big_msp_file, "s=$score", $din_dom, $sat_file, $optimize,
                                "T=$length_thresh", "e=$evalue", "f=$factor", "$range", "$merge", $verbose, $over_write,
                                 $short_region, $large_region, $average_region, $dynamic_factor )};

            if($verbose){
                print "\n# \@file has more than one input file\n # The result of \"merge_sequence_in_msp_file\"\n";
                print "@grouped_seq_names";
            }

            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            #  (4) Clustering the sets of merged seqlets => CORE algorithm
            #____________________________________________________________

            @out=@{&cluster_merged_seqlet_sets(\@grouped_seq_names, "f=$factor", $optimize, $dynamic_factor,
                   $short_region, $large_region, $average_region)};

            @temp_show_sub=&show_subclusterings(\@out, $big_msp_file, $sat_file, $dindom, $indup,
                                                    "e=$evalue", "p=$percent_fac", "f=$factor");
                        $good_bad       = $temp_show_sub[0];
                        $indup_c        = $temp_show_sub[1];
                        $sum_seq_num   += $temp_show_sub[2];
            push(@sub_clustering_out_files, @{$temp_show_sub[3]});

            if($good_bad==1){          push(@good, $big_msp_file);
            }else{         push(@bad, $big_msp_file);       }

          }
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
          &write_good_bad_list_in_divide_clusters(\@good, \@bad);
          sub write_good_bad_list_in_divide_clusters{
               my  (@good, @bad, $i); @good=@{$_[0]}; @bad=@{$_[1]};
               open(GOODBAD, ">good_bad.list");
               print GOODBAD "GOOD: all link    : 000\n";
               for($i=0; $i< @good; $i++){  print GOODBAD "$good[$i]\n";  }
               print GOODBAD "BAD : Not all link: 000\n";
               for($i=0; $i< @bad; $i++){   print GOODBAD "$bad[$i]\n";   }
               close(GOODBAD);
          }
          #_______________________________________________________________

   }
   return(\@sub_clustering_out_files); # contains (xxxx.clu, yyy.clu,, )
}



#______________________________________________________________________________
# Title     : remove_file_extension
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub remove_file_extension{
    my (@modified_files, $i, @files);
    @files=@_;
    for($i=0; $i< @files; $i++){
        $base=${&get_base_names($files[$i])};
        rename($files[$i], $base);
        push(@modified_files, $base);
    }
    return(\@modified_files);
}

#______________________________________________________________________________
# Title     : remove_small_files
# Usage     : @files_removed=@{&remove_small_files(@ARGV)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub remove_small_files{
    my($file_size_cut_line, $size, @files_removed, $i, @files);
     $file_size_cut_line=$_[0];
     $file_size_cut_line_bigger_than=$ARGV[1];

     @files=@{&read_file_names_only('.')};

     for($i=0; $i< @files; $i++){
        $size= -s $files[$i];
        if($size <= $file_size_cut_line and $size >= $file_size_cut_line_bigger_than){
            unlink($files[$i]);
            push(@files_removed, $files[$i]);
            print "\n# (i) $files[$i] is removed , size= $size byte";
        }
     }
     print "\n$0 finished. You might have killed some useful files :-)  \n\n\n\n";
     return(\@files_removed);
}



#______________________________________________________________________________
# Title     : remove_mail_header_in_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub remove_mail_header_in_files{
    my (@modified_files, $i, @files);
    @files=@_;
    for($i=0; $i< @files; $i++){
        my ($line_count);
        &cp("$files[$i]", "$files[$i]\.bak");
        open(FILE_BAK, "$files[$i]\.bak");
        open(FILE, ">$files[$i]");
        while(<FILE_BAK>){
           $line_count++;
           if($line_count > 30){
               print FILE $_; next;
           } # No point in looking very far down !
           if(/^From +\S/){               next;
           }elsif(/^Date\: +/){           next;
           }elsif(/^From: +/){            next;
           }elsif(/^To: +/){              next;
           }elsif(/^Subject: +/){         next;
           }else{                   print FILE;
           }
        }
        close(FILE_BAK);
        close(FILE);
        if(-s $files[$i] > ( $original_file_size - 400) ){
            push(@modified_files,$files[$i]);
            print "\n# (i) $files[$i] has real size, I am removing $files[$i]\.bak\n";
        }else{
            print "\n# (i) The file size of new $files[$i] is a bit small, I am leaving $files[$i]\.bak\n";
        }
    }
    return(\@modified_files);
}



#_____________________________________________________________________________
# Title     : remove_similar_seqlets
# Usage     : @seqlets=@{&remove_similar_seqlets(\@split)};
# Function  : merges(gets average starts and ends ) of similar
#             seqlets to reduce them into smaller numbers. This can also handle
#              names like XLBGLO2R_8-119_d1hlm__.
#
# Example   : @seqlets=@{&remove_similar_seqlets(\@mrg1, $mrg2, \@mrg3)};
#               while @mrg1=qw(M_2-100 M_2-110 M_8-105 M_4-108 N_10-110 N_12-115);
#                     $mrg2='Z_3-400 Z_2-420';
#                     @mrg3=('X_2-300 X_3-300', 'X_2-300', 'X_5-300 X_2-301' );
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : merge_sequence_names, merge_seq_names, merge_sequence_ranges
#             merge_seq_ranges
# Options   : _  for debugging.
#             #  for debugging.
#             f= for factor
#             S  for shorter region matched is used
#             A  for average region matched is used
#             L  for larger region matched is used
# Category  :
# Version   : 2.0
#-------------------------------------------------------------------------------
sub remove_similar_seqlets{
	 my ($i, $j, $seq1, $smaller_leng, $leng1, $leng2, $start1, $end1, $seq2, $start2,
	   $av_diff, $num_of_seq, $av_end, $av_start, $end2, @seqlets,
	   @array_input, @seqlet, $tail1, $tail2, $shorter_region, $larger_region,
	   $average_region);
	 my $factor=7;  ## !!! This var makes big difference in the final clustering
	 $average_region = 'A'; ## default is to get the average of comparing regions

	 for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
		   @array_input=@{$_[$i]};
		   for($j=0; $j<@array_input; $j++){
			   @seqlet=split(/ +/, $array_input[$j]);
							 push(@seqlets, @seqlet);
		   }
		   if($verbose){
							 print "\n# remove_similar_seqlets: ARRAY ref is given as input\n";
							 print "#  They are: @seqlets, Sarah\n";
					 }
	   }elsif($_[$i]=~/f=(\S+)/){ $factor=$1
	   }elsif($_[$i]=~/^(S) *$/){     $shorter_region=$1 ; $average_region=0;
	   }elsif($_[$i]=~/^(L) *$/){     $larger_region =$1 ; $average_region=0;
	   }elsif($_[$i]=~/^(A) *$/){     $average_region=$1 ; $shorter_region=$larger_region=0;
	   }elsif($_[$i]=~/\S+\_\d+\-\d+/){
		   push(@seqlets, split(/ +/, $_[$i]) );
	   }elsif(ref($_[$i]) eq 'SCALAR' and ${$_[$i]}=~/\S+\_\d+\-\d+/){
	       push(@seqlets, split(/ +/, ${$_[$i]}) );
	   }
	 }
	 print "\n# remove_similar_seqlets : I am using \$factor : $factor\n" if $verbose;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # Sorting is necessary as I am not doing the real thorough comparison
	 #______________________________________________________________________
	 $num_of_seq=@seqlets=sort @seqlets;
	 if($verbose){
			 print "\n# (1) remove_similar_seqlets: Num of seq to merge: $num_of_seq (from \@seqlets)";
			 print "\n# (2) remove_similar_seqlets: \@seqlets are  @seqlets\n";
	 }
	 my ($short_start, $large_start, $short_end, $large_end);

	 for($i=0; $i< @seqlets; $i++){
	  if($seqlets[$i]=~/^ *(\S+)_(\d+)\-(\d+)(\S*)/){  ## last (\S*) is necessary for XLBGLO2R_8-119_d1hlm__
		 my($seq1, $start1, $end1, $tail1)=($1, $2, $3, $4);
	     if($seqlets[$i+1]=~/^(\S+)_(\d+)\-(\d+)(\S*)/){
			($seq2, $start2, $end2, $tail2)=($1, $2, $3, $4);
			if($seq1 eq $seq2){
			   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			   $diff_start=abs($start1 - $start2);
			   $diff_end  =abs($end1   - $end2  );
			   $leng1=$end1-$start1;
		       $leng2=$end2-$start2;

			   if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }
			   if( ($diff_start+$diff_end)/2 <= $smaller_leng*($factor/10) ){

				   if($average_region){
					   $av_start=int(($start1+$start2) / 2);
					   $av_end  =int(($end1 + $end2) / 2);
											 $seqlets[$i]="$seq1\_$av_start\-${av_end}$tail1";  # $tail1 is for names like XLBGLO2R_8-119_d1hlm__
											 print "\n# new seqlet : $seqlets[$i]\n" if $verbose;
					   splice(@seqlets, $i+1, 1);
					   $i--;
				   }else{
					   if($start1 < $start2){
							$short_start=$start2; $large_start=$start1;  ## note that short start should be $start2 if $start2 is bigger
					   }else{
							$short_start=$start1; $large_start=$start2;
					   }
					   if($end1 < $end2){
							$short_end=$end1;  $large_end=$end2;
					   }else{
							$short_end=$end2;  $large_end=$end1;
					   }
					   if($shorter_region){
						   $seqlets[$i]="$seq1\_$short_start\-${short_end}$tail1";
					   }elsif($larger_region){
						   $seqlets[$i]="$seq1\_$large_start\-${large_end}$tail1";
											 }

					   splice(@seqlets, $i+1, 1);
					   $i--;
			       }
			   }
			}
		 }
	  }
	 }
	 print "\n# (3) remove_similar_seqlets: The final out are: @seqlets\n" if $verbose;
	 return(\@seqlets);
}



#__________________________________________________________________________
# Title     : show_subclusterings
# Usage     : &show_subclusterings(\@out);
# Function  : This is the very final sub of divclus.pl
# Example   : @temp_show_sub=&show_subclusterings(\@out, $file, $sat_file, $dindom, $indup);
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : print_subclusterings, sum_subclusterings, write_subclustering
#             show_clusterings, display_subclusterings
# Options   :
#             f  for file output, eg: xxxxxxx.sat
# Category  :
# Version   : 2.7
#-------------------------------------------------------------------------
sub show_subclusterings{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
        my ($max_size, $sat_file_name, $clu_file_name,
        $ori_cluster_size, $ori_cluster_num, $good_bad, @keys, $percentage_fac,
        $indup, @sizes, $sum_seq_num, $indup_percent, $indup_count, %tem4,
        @sub_clustering_out_files);  # clusall_1e-5_clu_14-324_ss.sat
	my @out=@{$array[0]};
	$indup_count=0;

	if($char_opt=~/d/){	    $dindom=1;	}
	if($char_opt=~/i/){		$indup=1;	}
	if($vars{'f'}=~/\S+/){     $factor= $vars{'f'}; }
	if($vars{'p'}=~/\d+/){ $percentage_fac= int($vars{'p'}); }
	if($vars{'s'}=~/\d+/){	   $score = $vars{'s'};	}
	if($vars{'e'}=~/\d+/){	   $evalue= $vars{'e'};	}

	#print "\n# (1) show_subclusterings : \@file has : @file\n";
	if( $file[0]=~/([\S+_]*?(\d+)\-(\d+)[_\w]*)\.msp/  or
		$file[0]=~/([\S+_]*?(\d+)\-(\d+)[_\w]*)\.sat/   ){
		 $ori_cluster_size=$2;
		 $ori_cluster_num =$3;
		 $base=$1;
		 $sat_file_name="$base\.sat";
		 $clu_file_name="$base\.clu";
	}else{
				 print "\n# (2) LINE:",__LINE__," The \@file input to show_subclusterings is not the right format, dying\n";
				 print "\n     Sarah!, right format looks like: 13-234.msp or 8-420_cluster.msp \n";  exit;
	}

        open(CLU, ">$clu_file_name") or die "\n# (ERROR) show_subclusterings failed miserably to open \"$clu_file_name\" \n";
        push(@sub_clustering_out_files, $clu_file_name);


	@out=@{&sort_string_by_length(\@out)};

	for($i=0; $i< @out; $i++){ # @out has ( 'YAL054C_98-695 YBR041W_90-617', 'YBR115C_230-842 YBR222C_16-537 YER015W_121-686', etc)
	   my $count+=$i+1;
	   my ( $int_dup_number, $sub_clu_size, $seq_with_range, @sp, $new_clus_NAME,
	        %tem, %tem2, %tem3, $j, @keys, $num_seq);
	   if($out[$i]=~/^ *$/){ next }
	   @sp=sort split(/ +/, $out[$i]);

	   for($j=0; $j < @sp; $j++){
		  $seq_with_range=$sp[$j];
		  if($seq_with_range=~/^((\S+)_((\d+)\-(\d+)))/){
			 $tem{$2}++;
			 $tem2{$2}.=sprintf("%-15s ", $1);
			 $tem3{$2} =$3;
			 $tem4{$2} =$5-$4;
		  }
	   }

	   @keys=sort keys %tem;
	   $num_seq=$sub_clu_size=@keys;

	   if($max_size < $sub_clu_size){
		  $max_size=$sub_clu_size; ## This is to collect the sizes of clusters to see if it is good.
	   }
	   $indup_count= &print_summary_for_divclus(
		         $count, \%tem2, \%tem,
		         $ori_cluster_num,
		         $ori_cluster_size,
		         $dindom,
		         $clu_file_name,
								 \%tem3, \%tem4,
								 $indup, );

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # Local subroutine
           #_______________________________________________________________
	   sub print_summary_for_divclus{ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               my(@keys, $indup_count, $x, $m, $percentage_fac);
               my $count=$_[0]; # count of cluster
	       my %tem2=%{$_[1]};	my $num_seq=@keys=sort keys %tem2;
	       my %tem=%{$_[2]};	my $ori_cluster_num=$_[3];
	       my $new_clus_NAME=$ori_cluster_num.'0'.$count.'0'.$num_seq;
	       my $ori_cluster_size=$_[4];
	       my $dindom=$_[5];	my %tem3=%{$_[7]};
	       my $indup=$_[9];	my (%internal_dup);
	       my %tem4=%{$_[8]};
               #~~~~~~~~~~ Domain Inside Domain ~~~~~~~~~~~~~~~~~
	       if($dindom){
	          for($x=0; $x <@keys; $x++){
                       @domain_inside_domain=@{&get_domain_inside_domain($tem2{$keys[$x]})};
                       @domain_inside_domain=@{&remove_dup_in_array(\@domain_inside_domain)};
                       for($m=0; $m< @domain_inside_domain; $m++){ print "  # Dindom: $m : $domain_inside_domain[$m]\n";   }
                       print "\n";
		  }
               }
               #==========================================================================================

	       #~~~~~~~~~~ Internal duplication  ~~~~~~~~~~~~~~
	       if($indup==1){
		   # @keys is the same as sub cluster size,
		   for($x=0; $x < @keys; $x++){
                             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                             # Checks each sequence for duplication
                             #___________________________________________________
                             my %internal_dup=%{&get_internal_dup_in_a_cluster( $tem2{$keys[$x]} )};
                             my @dup_keys=keys %internal_dup;
                             if(@dup_keys > 0){
                                     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                                     #  This calculates the actual duplicated number rather than jus tthe sequences
                                     #______________________________________________________________________________
                                     $indup_count++;
                                     printf ("%-14s %-12s %-4s", $keys[$x], $new_clus_NAME, $num_seq);
                                     for($m=0; $m< @dup_keys; $m++){
                                             printf ("%-19s=> %s\n", $dup_keys[$m], $internal_dup{ $dup_keys[$m] } );
                                     }
                             }
                    }
                 }

                #~~~~~~~~~~ Summary ~~~~~~~~~~~~~~~~~~~~~~~~~~~
                print  CLU  "Cluster size $num_seq\n";
                                        printf CLU ("Cluster number %-12s # E:%-5s Factor:%-2s P:%-2s, Ori size:%-4s Sub:%-4s From:%-12s\n",
                                          $new_clus_NAME, $evalue, $factor, $percentage_fac,
                                          $ori_cluster_size, $num_seq, $ori_cluster_num);
                print       "Cluster size $num_seq\n";
                printf     ("Cluster number %-12s # E:%-5s Factor:%-2s P:%-2s, Ori size:%-4s Sub:%-4s From:%-12s\n",
                              $new_clus_NAME, $evalue, $factor, $percentage_fac,
                              $ori_cluster_size, $num_seq, $ori_cluster_num);
                for($x=0; $x <@keys; $x++){
                   printf CLU ("   %-4s %-5s %-17s %-10s %-3s leng: %-s\n",
                               $num_seq, $ori_cluster_num, $keys[$x], $tem3{$keys[$x]}, $tem{$keys[$x]}, $tem4{$keys[$x]});
                   printf ("   %-4s %-5s %-17s %-10s %-3s leng: %-s\n",
                          $num_seq, $ori_cluster_num, $keys[$x], $tem3{$keys[$x]}, $tem{$keys[$x]}, $tem4{$keys[$x]});
                }
                return($indup_count);
	   }
	}
		close(CLU); ## this is a bug fix

	if($max_size == $ori_cluster_size){   $good_bad=1;
	}else{	                              $good_bad=0;	}

       print "\n# Sarah, Do you think the subclusterings are O.K.?" if $verbose;
       print "\n#   Tell me, if you feel suspicious, jong\@salts.med.harvard.edu\n\n" if $verbose;
       return($good_bad, $indup_count, $ori_cluster_size, \@sub_clustering_out_files);
}





#__________________________________________________________________________
# Title     : exchange_query_with_match_in_msp
# Usage     : @exchanged_msp=@{&exchange_query_with_match_in_msp(\@file)};
# Function  :
# Example   :
# Keywords  : swap_query_with_match_in_msp, invert_query_with_match_in_msp,
#             swap_query_seq_with_match_seq_in_msp,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub exchange_query_with_match_in_msp{

	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%exchanged_msp, @sorted_by_query_seq_names, @new_msp_lines);
	$open_msp_files_x_opt = 'x';
	if($char_opt=~/n/){ $names_only='n' }
	%exchanged_msp=%{&open_msp_files(@file, $open_msp_files_x_opt, $names_only )};

	@new_msp_lines=values %exchanged_msp;
	@sorted_by_query_seq_names=
	   map{ $_->[0] } sort {$a->[1] cmp $b->[1]} map {/^\d+ +\S+ +\d+ +\d+ +(\S+)/ && [$_, $1] } @new_msp_lines;
	return(\@sorted_by_query_seq_names);
}



#______________________________________________________________
# Title     : get_internal_dup_in_a_cluster
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_internal_dup_in_a_cluster{
	$cluster_line=$_[0] || ${$_[0]};
	my($i, $j, @seq, %out, $seq_name);
	my $overlap_factor=40;
	my $min_inside_dom_size=40;
	@seq=split(/ +/, $cluster_line);  ## These sequence are single seq with different regions
	@seq= map{$_->[0]} sort{$a->[1] cmp $b->[1] or $a->[2] <=> $b->[2] }
			             map {/^((\S+)_(\d+)\-(\d+) *.*)$/ && [$1, $2, $3, $4]} @seq;

	F1:for($i=0; $i< @seq; $i++){
	   $seq1=$seq[$i];
	   if($seq1=~/^(\S+)_(\d+)\-(\d+)/){
		  $seq_name=$1;
		  $start1=$2;
		  $end1=$3;
	   }
	   F:for($j=1; $j< @seq; $j++){
		  $seq2=$seq[$j];
		  if($seq1 eq $seq2){ next } ### Skip IDENTICAL ones (xxxx_1-10, xxxx_1-10)
		  if($seq2=~/^(\S+)_(\d+)\-(\d+)/){
			 $start2=$2;
			 $end2=$3;
		  }
		  $leng2=$end2-$start2;
		  $margin=$leng2/12;   ## 8% overlap is regarded as not overlapping

		  if(( ($start1+$margin) > $end2)||
		    ( ($start2+$margin) > $end1)){ # skips non overlapping seqlets

			$out{"$start1\-$end1"}.="$start2\-$end2 ";

			splice(@seq, $j, 1);
			$j--;
		  }
	   }
	}
	#@out=sort (@out);
	#@out=@{&remove_dup_in_array(\@out)};
	#@out=@{&remove_similar_seqlets(\@temp, "f=2")};
	return(\%out);
}

#______________________________________________________________
# Title     : get_domain_inside_domain
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : find_dindoms, domain_inside_domain, domain_in_domain
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_domain_inside_domain{
	$cluster_line=$_[0] || ${$_[0]};
	my($i, $j, @seq, @out);
	my $overlap_factor=40;
	my $min_inside_dom_size=40;
	@seq=split(/ +/, $cluster_line);
	F1:for($i=0; $i< @seq; $i++){
	   $seq1=$seq[$i];
	   if($seq1=~/^(\S+)_(\d+)\-(\d+)/){
		  $seq_name=$1;
		  $start1=$2;
		  $end1=$3;
	   }
	   F:for($j=0; $j< @seq; $j++){
		  $seq2=$seq[$j];
		  if($seq1 eq $seq2){ next } ### Skip IDENTICAL ones (xxxx_1-10, xxxx_1-10)
		  if($seq2=~/^(\S+)_(\d+)\-(\d+)/){
			 $start2=$2;
			 $end2=$3;
		  }
		  if(($start1 > $end2)||($start2 > $end1)){ # skips non overlapping seqlets
			 next;
		  }
		  if(($start1 > $start2)&&($end1 < $end2)){  #   -----
			 $leng_seq1=$end1-$start1;               # ----------
			 $leng_seq2=$end2-$start2;
			 if(( ($leng_seq2/2) >= $leng_seq1 )&&
			    ($leng_seq1 > $min_inside_dom_size) ){   # if seq1 is less than 60% of seq2, it is a hidden domain
				push(@out, "$seq2\($seq1\)");
			 }
		  }elsif(($start1 < $start2)&&($end1 > $end2)){  # -----------
			 $leng_seq1=$end1-$start1;                   #   ------
			 $leng_seq2=$end2-$start2;
			 if(( ($leng_seq1/2) >= $leng_seq2)&&
			    ($leng_seq2 > $min_inside_dom_size) ){   # if seq1 is less than 60% of seq2, it is a hidden domain
				push(@out, "$seq1\($seq2\)");
			 }
		  }
	   }
	}
	return(\@out);
}



#______________________________________________________________
# Title     : scale_for_horizontal_histogram
# Usage     :
# Function  : used to make things like:
#
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub scale_for_horizontal_histogram{
	my @query=@{$_[0]};
	if(@query > 2400){
	$condense_factor=20;
	}elsif(@query > 2200){
	$condense_factor=18;
	}elsif(@query > 1900){
	$condense_factor=16;
	}elsif(@query > 1600){
	$condense_factor=15;
	}elsif(@query > 1400){
	$condense_factor=14;
	}elsif(@query > 1200){
	$condense_factor=12;
	}elsif(@query > 1000){
	$condense_factor=10;
	}elsif(@query > 800){
	$condense_factor=9;
	}elsif(@query > 630){
	$condense_factor=8;
	}elsif(@query > 440){
	$condense_factor=6;
	}elsif(@query> 220){
	$condense_factor=4;
	}elsif(@query > 120){
	$condense_factor=3;
	}else{
	$condense_factor=2;
	}
	return(\$condense_factor);
}



#______________________________________________________________
# Title     : get_added_matched_regions_in_msp
# Usage     :
# Function  : This reads MSP file regions matched for a target seq
#             and adds things up to plot horizontally.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_added_matched_regions_in_msp{
	 my @lines=@{$_[0]};
	 for($i=0; $i< @lines; $i++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
	  #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
	  if($lines[$i]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) *(.*)/){
		 if($4 eq $7){
			$query_name=$4;
			$query_leng=$3;
			for($j=0; $j<$query_leng; $j++){ $query[$j]=0; }
			next;
		 }else{
			if($match_name ne $7){ push(@matched_members, $7); }
			$query_start=$2;
			$query_end  =$3;
			$query_seq  =$4;
			$match_start=$5;
			$match_end  =$6;
			$desc       =$8;
			$match_name =$7;
			for($k= $query_start; $k<$query_end; $k++){
			   $query[$k]++;
			}
		 }
	  }
	 }
	 return(\@query);
}




#______________________________________________________________
# Title     : cluster_merged_seqlet_sets
# Usage     : @out=@{&cluster_merged_seqlet_sets(\@lines)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#  $short_region=  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region=  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region=A by A -A # taking average region overlapped in removing similar regions
#
# Version   : 1.8
#--------------------------------------------------------------
sub cluster_merged_seqlet_sets{
   #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
   my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
   my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
   my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
   my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
   my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
   if($debug==1){print "\n\t\@hash=\"@hash\"
      \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
      \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
   #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($optimize, @splited1, @splited2, $verbose, $link_or_not);
   my @seq_names_in_clu=@{$array[0]};
   $link_or_not=0;
   my $factor=7; # 7 means 70% now

   if($vars{'f'}=~/(\S+)$/){ $factor=$1 }
   if($char_opt=~/o/){ $optimize=1 }
   if($char_opt=~/S/){ $short_region='S'; }
   if($char_opt=~/L/){ $large_region='L';   }
   if($char_opt=~/A/){ $average_region='A'; }
   if($char_opt=~/v/){ $verbose=1 }

   if($verbose){ print "\n# (1) cluster_merged_seqlet_sets: Checking linkage and merging <<<<<>>>>>\n@seq_names_in_clu\n";   }

   F1: for($i=0; $i< @seq_names_in_clu; $i++){
      @splited1=split(/ +/, $seq_names_in_clu[$i]);
      for($j=0; $j< @seq_names_in_clu; $j++){
        if($seq_names_in_clu[$i] eq $seq_names_in_clu[$j]){ next  }
        @splited2=split(/ +/, $seq_names_in_clu[$j]);

        $link_or_not=${&check_linkage_of_2_similar_seqlet_sets(\@splited1, \@splited2, "f=$factor")};
        print "\n +++++ \$link_or_not is  $link_or_not +++" if $verbose;
        if($link_or_not==1){
             if($verbose){
                 print "\n# (2) cluster_merged_seqlet_sets: \n $seq_names_in_clu[$i]  \n and $seq_names_in_clu[$j] \n are linked \n";
             }

             if($optimize){ ##---- This will also remove similar seqlets, not only identical ones
                $seq_names_in_clu[$i]=join(' ', sort @{&remove_similar_seqlets( [@splited1, @splited2],
                                            $short_region, $large_region, $average_region)} );
             }else{
                $seq_names_in_clu[$i]=join(' ', sort @{&remove_dup_in_array( [@splited1, @splited2])} );
             }
             splice(@seq_names_in_clu, $j,1);
             $j--; $i--;
             next F1;
		 }
	  }
   }
   return(\@seq_names_in_clu);
}

#______________________________________________________________________________
# Title     : check_homology_of_seq_pair
# Usage     :
# Function  :
# Example   : $homology_info=${&check_homology_of_seq_pair(\$pairs[$i], \%pdbg_hash_table)};
#
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub check_homology_of_seq_pair{
    my($i, %pdbg_hash_table,%input_hash, $seq_name1, $seq_name2,
       $classification1, $classification2);
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Argument handling
    #___________________________________
    for($i=0; $i< @_; $i++){
       if(ref($_[$i]) eq 'SCALAR' and ${$_[$i]}=~/^(\S+)[\t ]+(\S+)$/){
           ($seq_name1, $seq_name2)=($1,$2);
           splice(@_, $i, 1); $i--;
       }elsif(ref($_[$i]) eq 'ARRAY' and @{$_[$i]} eq 2){
           ($seq_name1, $seq_name2)=@{$_[$i]}; splice(@_, $i, 1); $i--;
       }elsif(ref($_[$i]) eq 'HASH'){
           %input_hash=%{$_[$i]};
           my @keys=keys %input_hash;
           if($keys[0]=~/\S+/ and $input_hash{$keys[0]}=~/(\d+\.\d+\.\d+)/){
               %pdbg_hash_table=%input_hash; splice(@_, $i, 1); $i--;
           }
           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # When seq pair name is given in HASH=(seq1, seq2)
           #___________________________________________________
           elsif($keys[0]=~/^\S+$/ and $input_hash{$keys[0]}=~/^\S+$/){
               ($seq_name1, $seq_name2)= ($keys[0], $input_hash{$keys[0]});
               splice(@_, $i, 1); $i--;
           }elsif($keys[0]=~/^(\S+)[\t ](\S+)$/){
               ($seq_name1, $seq_name2)= ($1, $2); splice(@_, $i, 1); $i--;
           }
       }
    }

    if($pdbg_hash_table{$seq_name1}=~/^(\d+\.\d+\.\d+)/){
       $classification1=$1;
    }else{
       print "\n\t# (W) \'$pdbg_hash_table{$seq_name1}\' is NOT FOUND in \%pdbg_hash_table, One member group??";
    }
    if($pdbg_hash_table{$seq_name2}=~/^(\d+\.\d+\.\d+)/){
       $classification2=$1;
    }else{
       print "\n\t# (W) \"$pdbg_hash_table{$seq_name2}\" is NOT FOUND in \%pdbg_hash_table, One member group??";
    }
    if($classification1 and $classification1 eq $classification2){
       print "\n\t# (i) $seq_name1 $classification1 == $seq_name2 $classification2";
       return(\'Homolog');
    }else{
       print "\n\t# (i) $seq_name1 $classification1 =X= $seq_name2 $classification2";
       return(\'Nomolog');
    }
}



#______________________________________________________________
# Title     : check_linkage_of_2_similar_seqlet_sets
# Usage     :
# Function  : connects two clusters of seqlets if they share
#              identical or near identical seqlets
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : check_link, check_relation, check_relatedness
# Options   : _  for debugging.
#  $factor = by f=  # eg)  "f=$factor" in the higher level sub
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.7
#--------------------------------------------------------------
sub check_linkage_of_2_similar_seqlet_sets{
	 my ($seq1, $name1, $start1, $end1, $seq2,
	   $leng1, $leng2, $name2, $start2, $end2, $diff_start,
	   $diff_end);
	 my @splited1=@{$_[0]};
	 my @splited2=@{$_[1]};

	 my $link_or_not=0;
	 my $factor=7;  # this means 70% sequence region overlap of the intermediate is chosen

	 if($_[2]=~/f=(\S+)/i){
	  $factor=$1;
	 }

	 F1: for($s=0; $s<@splited1; $s++){
	  if($splited1[$s]=~/^ *((\S+)_(\d+)\-(\d+))/){
		  $seq1=$1;
		  $name1=$2;
		  $start1=$3;
		  $end1=$4;
	  }
	  F2: for($t=0; $t< @splited2; $t++){
		 if($splited2[$t]=~/^ *((\S+)_(\d+)\-(\d+))/){
			 $seq2=$1;
			 $name2=$2;
			 $start2=$3;
			 $end2=$4;
		 }
		 if($seq1 eq $seq2){ $link_or_not=1; return(\$link_or_not) }
		 if($name1 ne $name2){
			 next F2;
		 }elsif($name1 eq $name2){ ## ~~~~~~~~~~~~~ THIS is the MOST IMP CORE PART ~~~~~~~~~~~~~
			 $leng1=$end1-$start1;
		     $leng2=$end2-$start2;
			 if($leng1 >= $leng2){ $smaller_leng=$leng2; }else{ $smaller_leng=$leng1; }
			 $diff_start=abs($start1-$start2);
			 $diff_end  =abs($end1  -$end2  );
			 if((($diff_start+$diff_end)/2) <= ($smaller_leng/$factor) ){
			 	$link_or_not=1;
				return(\$link_or_not);
			 }
		 }## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  }
	 }
	 return(\$link_or_not);
}


#__________________________________________________________________________
# Title     : merge_arrays_by_common_elements
# Usage     :  @out=@{&merge_arrays_by_common_elements(\@ref_of_arrays)}
# Function  : merges arrays if there are common array elements.
#             if @A has (1,2,3) and @B has (2, 4, 5), they share 2, so
#             they are merged to be (1,2,3,4,5)
# Example   :
# Keywords  : cluster_arrays_by_common_elements, merge_arrays_if_common_elements
#             merge_array_if_common_elements, merge_arrays_when_common_elements_occur
#             merge_arrays
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#----------------------------------------------------------------------------
sub merge_arrays_by_common_elements{
	my ($i, @mother_array);

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Handling input array
	#______________________________________
	if( @_==1 and ref($_[0]) eq 'ARRAY'){  @mother_array=@{$_[0]};
	}elsif(@_ > 1){   @mother_array=@_;
	}else{
	   print "\n# The input for merge_arrays_by_common_elements needs one ref of array or multiple refs of array\n";
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#  Main algo
	#______________________________________
	for($i=0; $i< @mother_array; $i++){
	   my @merged=(@{$mother_array[$i]}, @{$mother_array[$i+1]});
	   my ($common_or_not, %merged_hash, $j);

	   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   #  Checks if there is a common element
	   #______________________________________
	   for($j=0; $j< @merged; $j++){
		  $merged_hash{$merged[$j]}++;
		  if($merged_hash{$merged[$j]} > 1){ $common_or_not=1 }
	   }
	   my @non_redundant=keys %merged_hash;

	   if($common_or_not==1){
		   $mother_array[$i]=\@non_redundant;
		   splice(@mother_array, ($i+1), 1);
		   $i--;
	   }
	}
	return(\@mother_array);
}


#________________________________________________________________________________
# Title     : check_parf_files
# Usage     : $number_of_parf=${&check_parf_files(@input)};
# Function  : checks if given file(s) is a parf file and returns the number of
#              identified parf file. If you check 2 files and both are parf, you
#               will get (\$num_of_parf_file) value of 2.
# Example   :
#    PARF file looks like this>
#   d1nsca_   d3nn9__   Homolog -664.92 2.43.1.1.3  2.43.1.1.2
#   d1dppa_   d2olba_   Homolog -617.41 3.68.1.1.6  3.68.1.1.1
#   d2ach.1a1 d9api.1a1 Homolog -556.38 5.2.1.1.3   5.2.1.1.4
# Keywords  :
# Options   :
# Author    :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------------------
sub check_parf_files{
     my(@parf_file, $i, $j, @array, $counter, $num_of_parf_file);

     for($i=0; $i< @_; $i++){
          if(ref($_[$i]) eq 'SCALAR' and -f ${$_[$i]}){
               push(@parf_file, ${$_[$i]})
          }elsif(-f $_[$i]){
               push(@parf_file, $_[$i])
          }elsif(ref($_[$i]) eq 'ARRAY'){
               @array=@{$_[$i]};
               for($j=0; $j< @array; $j++){
                   if(-f $array[$j]){
                       push(@parf_file, $array[$j]);
                   }
               }
          }
     }
     print "\n# There were: @parf_file\n" if $verbose;
     for($i=0; $i< @parf_file; $i++){ ## usually @parf_file has only one element!
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Check if it is parf file inside the file
          #__________________________________________________________
          open(INPUT_FILE, "<$parf_file[$i]");
          while(<INPUT_FILE>){
               $counter++;
               if(/^ *\S+[\t ]+\S+[\t ]+[HN]omolog[\t ]+\S+[\t ]+\S+[\t ]+\S+/){
                   $num_of_parf_file++;
                   print "\n# $parf_file[$i] is a PARF file\n" if $verbose;
                   last;
               }else{
                   if($counter > 100){  ## giving up, it is not PARF file!
                       print "\n# $0 needs to have PARF files, others are ignored" if $verbose;
                       last;
                   }else{
                       next;
                   }
               }
          }
          close(INPUT_FILE);
     }
     return(\$num_of_parf_file);
}



#__________________________________________________________________________
# Title     : check_common_elements_in_array
# Usage     : &check_common_elements_in_array($mother_array[$i], $mother_array[$i+1]));
# Function  : accepts 1 or 2 refs of arrays and checks if there is any
#             common(repeating) elements between the two (or inside one)
#             The result is either ref of 1, or 0
# Example   :
# Keywords  : is_there_common_element, if_common_elements
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub check_common_elements_in_array{
	my(@merged_array, %common_elemnts_count, $m);
	if(@_ > 1){
	   @merged_array=(@{$_[0]}, @{$_[1]});
	}else{
	   @merged_array=(@{$_[0]});
	}
	for $m (@merged_array){
	   $common_elemnts_count{$m}++;
	   if($common_elemnts_count{$m} > 1){
		  print "\n# $common_elemnts_count{$m}     $m common!\n";
		  return(\1);
	   }
	}
	return(\0);
}

#__________________________________________________________________________
# Title     : link_ranges
# Usage     : @all_ranges = @{&link_ranges(@all_ranges)};
# Function  : merges ranges(10-20, 11-21 etc) when there is any overlap
#              is present
#             If you put a reverse range like '2000-20', it will
#              complain and reverse the order and do the job after correction.
#
# Example   : INPUT:
#
#   @input=( '1-30 1-40 1-50',
#            '2-49 4-40 2-99'....)
#
# Keywords  : connect_ranges, link_overlapping_ranges, connect_overlapping_ranges
# Options   : _  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------
sub link_ranges{
	 my (@all_ranges, $new_start, $new_end, @output, $i, $seq1, $start1,
	   $end1, $seq2,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2);
	 my $leng_thresh=30;
	 my $optimize=0;
	 for($i=0; $i< @_; $i++){
	  if(ref($_[$i]) eq 'ARRAY'){
		  @all_ranges=@{$_[$i]};
			}
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  Sorting the ranges by the starting range number.(essential)
	 #______________________________________________________________
	 @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			   map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  iterating merger
	 #_________________________________________________________
	 for($i=0; $i< @all_ranges; $i++){
	   if($all_ranges[$i] =~/(\d+)\-(\d+)/){
		   ($start1, $end1)=($1, $2);
		   if($start1 > $end1){
			  print "\n# link_ranges: Error, \$start1 :$start1 is larger than \$end1: $end1\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }
	   }
	   if($all_ranges[$i+1] =~/(\d+)\-(\d+)/){
		   ($start2, $end2)=($1, $2);
		   if($start2 > $end2){
			  print "\n# link_ranges: Error, \$start2 :$start2 is larger than \$end2: $end2\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i+1]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }

		   if($start1 <= $start2      and $end1 >= $end2){    ## -----------------
			   $new_start=$start1;                             #      --------
			   $new_end =$end1;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if( $start2 <= $start1     and $end2 >= $end1){      ##    -------
			   $new_start=$start2;                              #  ----------------
			   $new_end =$end2;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if($start1 <= $start2      and $start2 <= $end1){     #  -----------
			   $new_start=$start1;                               #     -----------
			   $new_end =$end2;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
		   if($start2 <= $start1      and $start1 <= $end2){    #       -----------
			   $new_start=$start2;                              #  ---------
			   $new_end =$end1;
			   splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			   $i--;
			   next;
		   }
	   }
	 }
	 return(\@all_ranges);
}



#__________________________________________________________________________
# Title     : merge_similar_ranges
# Usage     : @all_ranges = @{&merge_similar_seqlets(@all_ranges)};
# Function  : merges ranges(10-20, 11-21 etc) when there is any overlap
#              is present (resulting in average start and end at each level)
#             If you put a reverse range like '2000-20', it will
#              complain and reverse the order and do the job after correction.
#
# Example   : INPUT:
#
#   @input=( '1-30 1-40 1-50',
#            '2-49 4-40 2-99'....)
#
# Keywords  : merge_similar_regions, merge_ranges, merge_regions,
#              merge_sequence_ranges, merge_overlap_ranges, connect_ranges
#              connect_overlapping_ranges, connect_similar_ranges,
#              remove_similar_ranges
# Options   : f=   for setting factor (0.7 for 70% overlap minimum)
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------------
sub merge_similar_ranges{
	 my (@all_ranges, $new_start, $new_end, @output, $i, $seq1, $start1,
	   $end1, $seq2, $average_leng,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2);
	 my $factor=0.9;     #  0.8 means 80% overlap
	 my $leng_thresh=30;
	 my $optimize=0;
	 for($i=0; $i< @_; $i++){
	  if(ref($_[$i]) eq 'ARRAY'){
		  @all_ranges=@{$_[$i]};
	  }elsif($_[$i]=~/f=(\S+)/){
					$factor=$factor_ori=$1;
	      #print "\n# merge_similar_ranges: Factor used will be $factor\n\n";
	  }elsif($_[$i]=~/z/i){
	      $optimize=1 }
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  Sorting the ranges by the starting range number.(essential)
	 #______________________________________________________________
	 @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			   map { $_=~/(\d+)\-\d+ *$/ and [$_, $1] } @all_ranges;

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  iterating merger
	 #_________________________________________________________
	 for($i=0; $i< @all_ranges; $i++){
			 $factor=$factor_ori;

	   if($all_ranges[$i] =~/(\d+)\-(\d+) *$/){  ## ranges are at the end
		   ($start1, $end1)=($1, $2);
		   if($start1 > $end1){
			  print "\n# merge_similar_ranges: Error, \$start1 :$start1 is larger than \$end1: $end1\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-\d+ *$/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }
	   }
			 if($all_ranges[$i+1] =~/(\d+)\-(\d+) *$/){ ## ranges are at the end
		   ($start2, $end2)=($1, $2);
		   if($start2 > $end2){
			  print "\n# merge_similar_ranges: Error, \$start2 :$start2 is larger than \$end2: $end2\n";
			  print "\n# Exchanging the start and end, and starting it all over again";
			  $all_ranges[$i+1]="$2\-$1";
			  @all_ranges=map {$_->[0]} sort { $a->[1] <=> $b->[1] }
			              map { $_=~/(\d+)\-\d+ *$/ and [$_, $1] } @all_ranges;
			  $i= -1;
			  next;
		   }

					 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					 $average_leng=(($end2+$end1)/2)-(($start1+$start2)/2);   ## this is protein seq ranges
					 $factor-=$average_leng/5000; ## adjusting $factor according to the size of range.
					 #_____________________________________________________________

					 if($start1 <= $start2      and $end1 >= $end2){        ## -----------------

							 if(($end1-$start1)*$factor  <= ($end2-$start2) ){    #      --------
									$new_start=(($start1+$start2)/2);
			      $new_end  =(($end1+$end2)/2);
			      splice(@all_ranges, $i, 2, "$new_start\-$new_end");
			      $i--;			      next;
			   }
		   }
		   if( $start2 <= $start1     and $end2 >= $end1){       ##    -------
							 if(($end2-$start2)*$factor  <= ($end1-$start1) ){  #  ----------------
									 $new_start=(($start2+$start1)/2);
									 $new_end  =(($end1+$end2)/2);
									 splice(@all_ranges, $i, 2, "$new_start\-$new_end");
									 $i--;              next;
							 }
		   }
		   if($start1 <= $start2      and $start2 <= $end1){     #  -----------
							 if(($end1-$start2) >=  ($end2-$start1)*$factor ){ #      -----------
									$new_start=(($start2+$start1)/2);
									$new_end  =(($end1+$end2)/2);
									splice(@all_ranges, $i, 2, "$new_start\-$new_end");
									$i--;               next;
							 }
		   }
		   if($start2 <= $start1      and $start1 <= $end2){     #       -----------
							 if(($end2-$start1) >=  ($end1-$start2)*$factor ){ #    -----------
									 $new_start=(($start2+$start1)/2);
									 $new_end  =(($end1+$end2)/2);
									 splice(@all_ranges, $i, 2, "$new_start\-$new_end");
									 $i--;              next;
							 }
		   }
	   }
	 }
	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  making everything integer at the last minute to save CPU
	 #___________________________________________________________
	 @all_ranges = map{ /(\S+)\-(\S+) *$/ and int($1).'-'.int($2)  } @all_ranges;
	 return(\@all_ranges);
}




#_________________________________________________________________________
# Title     : merge_similar_seqlets
# Usage     : @all_seqlets = @{&merge_similar_seqlets(@all_seqlets)};
# Function  : merges seqlet sets which have identical
#             sequences and share similar regions by connection factor of 30%
#             This means, if any two seqlets from the same sequences which
#             share more than 70% seqlet regions overlapping are merged
#             This only sees the very first sequence in the seqlets line!!!
#             (so, PARTIAL MERGE !!)
# Example   : INPUT:
#
#   @input=( 'seq1_1-30 seq2_1-40 seq3_1-50',
#            'seq1_2-49 seq3_4-40 seq4_2-99'....)
#
#   @output=('seq1_1-30 seq2_1-45 seq3_2-45 seq4_2-99');
#
# Keywords  : merge_similar_sequences, merge_sequence_names, merge_sequences,
#              merge_sequence_ranges, merge_similar_sequences_with_ranges,
#              merge_seqlets, merge_duplication_modules
# Options   :
#
#   f=<digit>   for determing the factor in filtering out non-homologous
#                  regions, 7 = 70% now!!
#   l=<digit>   for seqlet(duplication module) length threshold
#   z           for activating remove_similar_sequences, rather than remove_dup....
#   S  $short_region=  S by S -S  # taking shorter region overlap in removing similar reg
#   L  $large_region=  L by L -L  # taking larger  region overlap in removing similar reg
#   A  $average_region=A by A -A  # taking average region overlap in removing similar reg
#
# Category  :
# Version   : 2.0
#-------------------------------------------------------------------------------
sub merge_similar_seqlets{
	 my (@all_seqlets, @result_all_seqlets, $i, $j, $k, $seq1, $start1, $end1, $seq2,
	   $smaller_leng, $start2, $end2, @split, @split1, @split2,
			 $short_region, $large_region, $average_region, $overlapping_seq_match_size);
	 my $factor=7;     #  30% sequence mismatch region is allowed(3)
	 my $leng_thresh=30;
	 my $optimize=1;
	 $average_region='A'; # default

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	 # Sorting (parsing) input to get options and input array
	 #_________________________________________________________
	 for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
					 @all_seqlets=@{$_[$i]};
			 }elsif($_[$i]=~/f=(\S+)/){  $factor=$1;
			 }elsif($_[$i]=~/z/i){       $optimize=1;
			 }elsif($_[$i]=~/l=(\d+)/i){ $leng_thresh=$1;
			 }elsif($_[$i]=~/^S/){       $short_region='S';   $large_region=$average_region='';
			 }elsif($_[$i]=~/^L/){       $large_region='L';   $short_region=$average_region='';
			 }elsif($_[$i]=~/^A/){       $average_region='A'; $short_region=$large_region  =''; }
	 }
	 if(@all_seqlets==1){

			 print "\n# (1) merge_similar_seqlets: \@all_seqlets == 1, returning\n" if $verbose;
			 print "\n#  \@all_seqlets = @all_seqlets\n" if $verbose;
			 return(\@all_seqlets);
	 }else{
			 print "\n# (1) merge_similar_seqlets: \@all_seqlets > 1, Processing\n" if $verbose;
			 if( $verbose){
					 for($i=0; $i< @all_seqlets; $i++){
							 print "\n# $all_seqlets[$i]" ;
					 }
			 }
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 # This is to remove which are identical in @all_seqlets;
	 #_________________________________________________________
	 F1: for($i=0; $i< @all_seqlets; $i++){
			my $merged_two_seqlet_lines;

			if($all_seqlets[$i] eq $all_seqlets[$i+1]){
	      print "\n# (2) merge_similar_seqlets: \$all_seqlets\[$i\] equals \$all_seqlets\[$i+1\]\n" if $verbose;
					splice(@all_seqlets, $i+1, 1);
					$i-- if $i >0;
		  next F1;
	  }else{
					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
					# @split1 and 2 are arrays from different string entry in @all_seqlets
					#______________________________________________________________________
					@split1=sort split(/ +/, $all_seqlets[$i]);
					@split2=sort split(/ +/, $all_seqlets[$i+1]);
			}

	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	  #  (3) If the first elements of @split1 and 2 are identical, lets merge the two arrays
	  #________________________________________________________________________________
			if($split1[0] eq $split2[0] or $split1[0] eq $split2[1] or $split1[0] eq $split2[2]){
	      print "\n# (3) \$split1[0] and \$split2[0] are identical, good!\n" if $verbose;
		  @split=(@split1, @split2);
		  if(1){ #~~~~~ optimize option removes similar seqlets
						 $all_seqlets[$i]= join(' ', sort @{&remove_similar_seqlets(\@split,
			                              $short_region, $large_region, $average_region)} );
		  }else{
			 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			 # Only removes exactly identical ones
			 #__________________________________________________________
						 $all_seqlets[$i]=  join(' ', @{&remove_dup_in_array(\@split, 's')} );
		  }
					print "\n# (3) New \$all_seqlets\[\$i\] is \n$all_seqlets[$i]\n" if $verbose;
		  splice(@all_seqlets, $i+1, 1);
		  $i-- if $i >0;
		  next F1;
	  }

	  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			# (4) If the first elements of @split1 and 2 are NOT identical, lets check the sequence ranges
	  #_____________________________________________________________________________________________
			if($verbose){
					print "\n# (4) merge_similar_seqlets: the first elements of \@split1 and \@split2 are NOT identical\n";
					print "# (4) >>>>>>>>> Checking the regions of them, \$factor= $factor\n";
			}

			F2: for($j=0; $j < @split1; $j++){
					if($split1[$j] =~/^ *(\S+)_(\d+)\-(\d+)/){
							 my ($seq1, $start1, $end1)=($1, $2, $3);

							 F3: for($k=0; $k<@split2; $k++){
									 if($split2[$k] =~/^ *(\S+)_(\d+)\-(\d+)/){
											 my($seq2, $start2, $end2)=($1, $2, $3);

											 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
											 # Check if the seqs are identicl (from the two arrays), no point to merge which are not identical from the first
											 #__________________________________________________________________________________________
											 if($seq1 eq $seq2){
														if($verbose){
																print "# (5) <<<<< The sequence names are identical with different regions, See if we can merge!\n";
														}
														$diff_start=abs($start1-$start2);
														$diff_end  =abs($end1  -$end2  );
														$leng1=$end1-$start1;
														$leng2=$end2-$start2;
														if($leng1 >= $leng2){
																$smaller_leng=$leng2;
																$larger_leng =$leng1
														}else{
																$smaller_leng=$leng1;
																$larger_leng =$leng2
														}

														#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														# Checking the minimal seq region leng here
														#______________________________________________________
														if($smaller_leng < $leng_thresh){ next }

														$overlapping_seq_match_size=${&get_overlapping_seq_match_size($start1, $end1, $start2, $end2)};
														$averge_seq_leng_of_2_seqs=($leng1+$leng2)/2;

														#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														# This is the critically important part
														#_______________________________________________________________
														if($average_region){
																$finally_adjusted_seq_leng=$averge_seq_leng_of_2_seqs*($factor/10);
														}elsif($short_region){
																$finally_adjusted_seq_leng=$smaller_leng*($factor/10);
														}elsif($large_region){
																$finally_adjusted_seq_leng=$larger_leng*($factor/10);
														}

														#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
														# Now let's check if we regard them homologous or not\
														#_______________________________________________________
														if( $overlapping_seq_match_size >=  $finally_adjusted_seq_leng){

																@split= (@split1, @split2);
																if ($verbose){
																		print "# (5) merge_similar_seqlets: \$averge_seq_leng_of_2_seqs=$averge_seq_leng_of_2_seqs";
																		print " $$$$ Merging occurs with \n@split  \n";
																}
																if($optimize){ #~~~~~ $optimize option removes similar seqlets
																		$all_seqlets[$i]= join(' ', sort @{&remove_similar_seqlets(\@split,
			                                  $short_region, $large_region, $average_region)} );
																}else{
																		$all_seqlets[$i]= join(' ', @{&remove_dup_in_array(\@split, 's')} );
																}
																$merged_two_seqlet_lines=1;
																splice(@all_seqlets, $i+1, 1);
																$i-- if $i >0;
																next F1;

														}
														#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
														# We believe they are not homologous
														#____________________________________________
														else{
																if ($verbose){
																		print "\n# (5) merge_similar_seqlets !! MERGING DID NOT occur->
																																				$split1[$j] $split2[$k] \n";
																		print "\n#   \$averge_seq_leng_of_2_seqs= $averge_seq_leng_of_2_seqs, \$overlapping_seq_match_size= $overlapping_seq_match_size \$finally_adjusted_seq_leng= $finally_adjusted_seq_leng\n";
																		print "\n# (5) merge_similar_seqlets, \$all_seqlets[$i]\$all_seqlets[$i+1]\n$all_seqlets[$i]\n$all_seqlets[$i+1]\n";
																}
																next F3;
														}
											 }
									 }
									 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
									 # If there is no range (region) in seq naem, let's skip, as there is no way to check
									 #__________________________________________________________________________________
									 else{ # when split2 does not match xxx_10-20 format
											 next;
									 }
							 }
					}else{ # when split1 does not match xxx_10-20 format
							 next;
					}
			}
			unless($merged_two_seqlet_lines){

			}
	 }
	 print "\n# \@all_seqlets is  @all_seqlets =======\n" if $verbose;
	 return(\@all_seqlets);
}






#______________________________________________________________
# Title     : sort_by_digits_in_string
# Usage     :
# Function  : sorts arrays of strings like
#
#   MJ0228_314-573 MJ1197_348-601
#   MJ0228_451-576 sll0078_502-594 sll1425_489-611
#   MJ0228_479-572 sll0078_502-594
#
#   According to the digits after seq names _314-, _451-, _479-
#    in the above
#   This only looks at the very first sequence in the string
#
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#--------------------------------------------------------------
sub sort_by_digits_in_string{
	 my (@out, $i,  @temp1, @temp2, $old, @T);
	 my @array_of_string=sort @{$_[0]};

	 for($i=0; $i<= @array_of_string; $i++){
	  if($array_of_string[$i]=~/^((\S+)_(\d+)\-(\d+) *.*)$/){
		 unless(defined($old)){
			$old=$2;
			push(@temp1, $1);
			push(@temp2, $3);
		    next;
		 }elsif($2 eq $old){
			push(@temp1, $1);
			push(@temp2, $3);
			next;
		 }elsif( ($2 ne $old)||($i==$#array_of_string) ){
		    &sort_and_put_strings_to_out;
		    push(@temp1, $1);
		    push(@temp2, $3);
			$old  =$2;

			#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			sub sort_and_put_strings_to_out{
			   my ($j, $k, $num);
			   @temp2=sort { $a<=>$b } @temp2; ## sort numerically
			   F1: for($j=0; $j< @temp2; $j++){
				  $num=$temp2[$j];
				  for($k=0; $k< @temp1; $k++){
					 if($temp1[$k]=~/^(\S+)_$num\-/){
						push(@out, $temp1[$k]);
						splice(@temp1, $k, 1);
						$k--;
						splice(@temp2, $j, 1);
						$j--;
						next F1;
					 }
				  }
			   }
			   @temp1=@temp2=();

			}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	     }
	  }elsif($i > 0){ ## for the very last sort
		  &sort_and_put_strings_to_out;
	  }
	 }
	 return(\@out);
}



#______________________________________________________________
# Title     : sort_words_in_string
# Usage     :
# Function  : sort words in strings sperated by ' ' or "\n"
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : sort_words_in_sequences, sort_sequences_in_string,
#             sort_strings_in_string, sort_string_by_words, sort_elements_in_string
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub sort_words_in_string{
	 my @in=@{$_[0]} || @_;
	 my @OUT;
	 for (@_){
	  push(@OUT, join(' ', sort split(/ +|\n/) ));
	 }
	 return(\@OUT);
}



#__________________________________________________________________________
# Title     : convert_hmmls_to_msp_files
# Usage     : @out=@{&convert_hmmls_to_msp_files(\@file)};
# Function  :
# Example   :
# Keywords  : convert_hmmls_to_msp
# Options   :
#   S=$single_out_file_name   for producing single msp file with all the hmmls contents
#   E=Enguiry_name    for specifying enquiry seq name rather than 'HMM', the default
#   $bit_score_threshold= by t=
# Returns   :
# Argument  :
# Version   : 1.4
#----------------------------------------------------------------------------
sub convert_hmmls_to_msp_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@all_the_files_written, $written_file, $base,
            $bit_score_threshold, $out_msp_file_name, $input_hmmls_file,
	    %out);
	$bit_score_threshold=10;

        if($vars{'S'}=~/\S/){ $single_out_file_name=$vars{'S'} };
        if($vars{'t'}=~/\S/){ $bit_score_threshold =$vars{'t'} };
	if($vars{'E'}=~/\S/){ $enquiry_name        =$vars{'E'}   # default $enquiry_name is input file base
	}else{ $enquiry_name='HMM' }

	for($i=0; $i< @file; $i++){

              if($vars{'E'}=~/\S/){ $base=$enquiry_name;   # When $enquiry_name is given, it uses for output name
              }else{
                  $base=${&get_base_names($file[$i])};
              }
              $out_msp_file_name="$base\.msp";
              $input_hmmls_file=$file[$i];
              if($vars{'S'}=~/\S/){
                      %out=(%out, %{&open_hmmls_files($input_hmmls_file,
                                       'm', "E=$enquiry_name",
                                       "t=$bit_score_threshold"
                                       )} );
              }else{
                      %out=%{&open_hmmls_files($input_hmmls_file,
                             "t=$bit_score_threshold", 'm')};  # m for msp out
                      $written_file=${&write_msp_files(\%out, $out_msp_file_name)};
                      push(@all_the_files_written, $written_file);
              }
	}

	if($vars{'S'}=~/\S/){
		$written_file=${&write_msp_files(\%out, $single_out_file_name)};
		push(@all_the_files_written, $written_file);
	}
	if(@all_the_files_written > 1){
		return(\@all_the_files_written);
	}else{
		return(\$all_the_files_written[0]);
	}
}


#______________________________________________________________
# Title     : convert_mmp_to_mrg
# Usage     :
# Function  :
# Example   :
#  Example OUT as string
#
#   slr1950 sll1920 sll0672 sll1076 sll1614 slr0797 slr0798 slr0822 slr1729
#   slr1729 sll1076 sll0672 sll1614 sll1920 slr0797 slr0798 slr0822 slr1950
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub convert_mmp_to_mrg{
	 my @mmp=@{$_[0]};
	 my($i, $long, $out, @out, $leading_seq);
	 for($i=0; $i< @mmp; $i++){
	 if(($mmp[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/)&&($1 eq $2)){
		next;
	 }elsif($mmp[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/){
		$leading_seq=$1;
		$long=$2;
		$long=~s/\,/ /g;
		$out="$leading_seq $long";
		push(@out, $out);
	 }
	 }
	 return(\@out);
}

#_______________________________________________________________________________
# Title     : add_ranges_in_msp_line
# Usage     :
# Function  : this adds ranges to the seqnames of msp files
#             mmp line is msp line with additional sequences at the end
# Example   :
# Keywords  : convert_msp_to_mmp, convert_msp, convert_msp_2_mmp
#             change_msp_to_mmp, add_range_in_msp, convert_msp_line_to_mmp_line
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#-------------------------------------------------------------------------------
sub add_ranges_in_msp_line{
	 my $input_msp=${$_[0]} || $_[0];
	 my($score, $evalue, $long_1, $new_seq1, $new_seq2, $middle,
	  $start1, $end1, $start2, $end2, $seq1, $seq2, $new);

	 if($input_msp=~/^ *(\d+) +(\S+) *\S*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
	  ($score, $evalue, $start1, $end1, $start2, $end2)=($1, $2, $3, $4, $6, $7);
	  ($seq1, $seq2)=($5, $8);
	  if($seq1=~/(\S+)\_\d+\-\d+/){
		 $new_seq1="$1\_$start1\-$end1";
	  }else{
		 $new_seq1="$seq1\_$start1\-$end1";
	  }
	  if($seq2=~/(\S+)\_\d+\-\d+/){
		 $new_seq2="$1\_$start2\-$end2";
	  }else{
		 $new_seq2="$seq2\_$start2\-$end2";
	  }
	  $new=sprintf("%-6s %-9s %-5s %-5s %-32s %-5s %-5s %-32s",
			$score, $evalue, $start1, $end1, $new_seq1, $start2, $end2, $new_seq2);
	 }
	 return(\$new);
}


#______________________________________________________________
# Title     : convert_msp_line_to_mmp_line
# Usage     :
# Function  : this adds ranges to the seqnames of msp files
#             mmp line is msp line with additional sequences at the end
# Example   :
# Keywords  : convert_msp_to_mmp, convert_msp, convert_msp_2_mmp
#             change_msp_to_mmp, add_range_in_msp
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#--------------------------------------------------------------
sub convert_msp_line_to_mmp_line{
	 my $input_msp=${$_[0]} || $_[0];
	 my($score, $evalue, $long_1, $new_seq1, $new_seq2, $middle,
	  $start1, $end1, $start2, $end2, $seq1, $seq2, $new);

	 if($input_msp=~/^ *(\d+) +(\S+) *\S*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
	  ($score, $evalue, $start1, $end1, $start2, $end2)=($1, $2, $3, $4, $6, $7);
	  ($seq1, $seq2)=($5, $8);
	  if($seq1=~/(\S+)\_\d+\-\d+/){
		 $new_seq1="$1\_$start1\-$end1";
	  }else{
		 $new_seq1="$seq1\_$start1\-$end1";
	  }
	  if($seq2=~/(\S+)\_\d+\-\d+/){
		 $new_seq2="$1\_$start2\-$end2";
	  }else{
		 $new_seq2="$seq2\_$start2\-$end2";
	  }
	  $new=sprintf("%-6s %-9s %-5s %-5s %-32s %-5s %-5s %-32s",
			$score, $evalue, $start1, $end1, $new_seq1, $start2, $end2, $new_seq2);
	 }
	 return(\$new);
}

#________________________________________________________________________________
# Title     : merge_sequence_alignments
# Usage     : &merge_sequence_alignments(@seq);  while @seq has
#              @seq=(\%hash1, \%hash2);  while %hash1 and %hash2 have
#    %hash1=qw(seq1 ANN-NTMQQRRQQQRKRRRQQQSSSSTTST seq2 --NNN--QQ--QQQ--RRRR--SSSS--);
#    %hash2=qw(seq2 NN-QQQQQ--RRRR----SS--SS---    seq3 -NNXQQQXQRTRRRXTTSTSSMMSSTTT);
#
# Function  :
# Example   :
# Keywords  : combine_sequence_alignment, merge_sequence_alignment_pairs
#             merge_seq_alignment, make_interm_alignment, make_3_way_alignment
#             merge_alignment, combine_alignment
# Options   :
#    l=  for sequence block length by print_seq_in_block subroutine
#    t=  for specifying the length of seq names shown.
#    t   for truncating the seq names in printing out.
#    s   for sorting the final output lines (default anyway for print_seq_in_block)
#
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------------------
sub merge_sequence_alignments{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@splited1_common, @splited2_common, @splited1_non_common, @splited2_non_common,
	   @gap_pos1, @gap_pos2, $block_length, $sort_seq_names,
	   $truncate_name_to_10_char, $trunc_name_to, $block_range );
	$block_length=60;
	if($vars{'l'}=~/\d+/){  $block_length=$vars{'l'}; }
	if($char_opt=~/t/){    $truncate_name_to_10_char='t' }
	if($vars{'t'}=~/\d+/){ $trunc_name_to=$vars{'t'}; }
	if($char_opt=~/s/){    $sort_seq_names='s' }
	if($vars{'r'}=~/(\d+\-\d+)/){ $block_range= $1 };

	for($i=0; $i< @hash; $i+=2){
		my %hash1=%{$hash[$i]};
		my %hash2=%{$hash[$i+1]};
		my ($pair1_name1, $pair1_name2)=keys  %hash1;
		my ($pair2_name1, $pair2_name2)=keys  %hash2;

		# finding the common entry
		if("$pair1_name1" eq "$pair2_name1"){
			 $common_seq_entry=$pair1_name1;
			 $non_common_seq_entry1=$pair1_name2;
			 $non_common_seq_entry2=$pair2_name2;
		}elsif("$pair1_name1" eq "$pair2_name2"){
			 $common_seq_entry=$pair2_name2;
			 $non_common_seq_entry1=$pair1_name2;
			 $non_common_seq_entry2=$pair2_name1;
		}elsif("$pair1_name2" eq "$pair2_name1"){
			 $common_seq_entry=$pair1_name2;
			 $non_common_seq_entry1=$pair1_name1;
			 $non_common_seq_entry2=$pair2_name2;
		}else{
			 print "\n# merge_sequence_alignments:
			 			 ERROR, I can not find common seq entry: $pair1_name1 $pair2_name1 $pair2_name1 $pair2_name2\n";
			 next;
		}

		%hash1=%{&make_seq_alignment_length_even(\%hash1)};
		%hash2=%{&make_seq_alignment_length_even(\%hash2)};


		#~~~~~~~~~~~~~~~~~~~~~~~`~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
		# Aligning the start of common sequences
		#  i.e. putting pre gap part to a sequence which does not have it.
		#
		#  NKNWKLRAHLC-KHTGEKP---FPCKEEGCDKGFTSLHHLTRHS---ITHTGEKN--FKCDSDKCDLTFTTKANMKKHFNRFH splited1_common
		#  --------------NKNWKLRAHLCKHTGEKPFPCKEEGCDKGFTSLHHLTRHSITHTGEKNFKCDSDKCDLTFTTKANMKKHFNRFH-- splited2_common
		#  becomes->
		#
		#  --------------NKNWKLRAHLC-KHTGEKP---FPCKEEGCDKGFTSLHHLTRHS---ITHTGEKN--FKCDSDKCDLTFTTKANMKKHFNRFH splited1_common
		#  --------------NKNWKLRAHLCKHTGEKPFPCKEEGCDKGFTSLHHLTRHSITHTGEKNFKCDSDKCDLTFTTKANMKKHFNRFH-- splited2_common
		#________________________________________________________________________
		if( $hash1{$common_seq_entry}=~/^(\-+)/){
			$hash2{$common_seq_entry}="$1".$hash2{$common_seq_entry};
			$hash2{$non_common_seq_entry2}="$1".$hash2{$non_common_seq_entry2};
		}elsif($hash2{$common_seq_entry}=~/^(\-+)/){
			$hash1{$common_seq_entry}="$1".$hash1{$common_seq_entry};
			$hash1{$non_common_seq_entry1}="$1".$hash1{$non_common_seq_entry1};
		}

		@gap_pos1=@{&get_gap_positions(\$hash1{$common_seq_entry}, 'p' )}; # p means all positive positions wanted
		@gap_pos2=@{&get_gap_positions(\$hash2{$common_seq_entry}, 'p' )};

		@splited1_common    =split(//, $hash1{$common_seq_entry} );
		@splited1_non_common=split(//, $hash1{$non_common_seq_entry1} );
		@splited2_common    =split(//, $hash2{$common_seq_entry} );
		@splited2_non_common=split(//, $hash2{$non_common_seq_entry2} );

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		#  Core algorithm
		#__________________________________________
		for($i=0; $i< @splited1_common; $i++){
		    if($splited1_common[$i] ne $splited2_common[$i]){
				if($splited1_common[$i]=~/\W/){
					 splice(@splited2_common, $i, 0, '-');
					 splice(@splited2_non_common, $i, 0, '-');
				}elsif($splited2_common[$i]=~/\W/){
				     splice(@splited1_common, $i, 0, '-');
					 splice(@splited1_non_common, $i, 0, '-');
					 if($splited1_common[$i] eq $splited1_non_common[$i]){
						 $homology_line2[$i]=':';
					 }else{
						 $homology_line2[$i]=' ';
					 }
				}
		    }
		}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Making Homology line
		#_____________________________________________
		for($i=0; $i< @splited1_non_common; $i++){
			if($splited1_non_common[$i] eq '-'){ $homology_line1[$i]=' '; }
			if($splited1_non_common[$i] eq $splited1_common[$i] and $splited1_non_common[$i] eq $splited2_non_common[$i] ){
				$homology_line1[$i]= $splited1_non_common[$i]  unless $splited1_non_common[$i] eq '-';
			}elsif($splited1_non_common[$i] eq $splited1_common[$i] ){
				$homology_line1[$i]=$splited1_non_common[$i] unless $splited1_non_common[$i] eq '-';
			}elsif($splited2_non_common[$i] eq $splited1_non_common[$i]){
				$homology_line1[$i]=':' unless $splited1_non_common[$i] eq '-';
			}elsif( ${&amino_acid_homology_matrix($splited2_non_common[$i], $splited1_non_common[$i])} ){
				$homology_line1[$i]='.' unless $splited1_non_common[$i] eq '-';
			}
			else{ $homology_line1[$i]=' '; }

			if($splited2_non_common[$i] eq '-'){ $homology_line2[$i]=' '; }
			if($splited2_non_common[$i] eq $splited2_common[$i] and $splited2_non_common[$i] eq $splited1_non_common[$i] ){
				$homology_line2[$i]=$splited2_non_common[$i]  unless $splited2_non_common[$i] eq '-';
			}elsif($splited2_non_common[$i] eq $splited2_common[$i]){
				$homology_line2[$i]=$splited2_non_common[$i] unless $splited2_non_common[$i] eq '-';
			}elsif($splited2_non_common[$i] eq $splited1_non_common[$i]){
				$homology_line2[$i]=':' unless $splited2_non_common[$i] eq '-';
			}elsif( ${&amino_acid_homology_matrix($splited2_non_common[$i], $splited1_non_common[$i])} ){
				$homology_line2[$i]='.' unless $splited1_non_common[$i] eq '-';
			}else{ $homology_line2[$i]=' '; }

		}
		if($verbose){
			 print @splited1_non_common, " \t splited1_non_common\n";
			 #print @homology_line1,      " \t homology line\n";
			 #print @splited1_common,    " \t splited1_common\n";
			 print @homology_line1,      " \t homology line\n";
			 print @splited2_common,     " \t splited2_common\n";
			 print @homology_line2,      " \t homology line\n";
			 print @splited2_non_common, " \t splited2_non_common\n";
		}
		$out_hash_issa{"1 $non_common_seq_entry1 "}=join('', @splited1_non_common);
		$out_hash_issa{"3 $common_seq_entry"}=join('', @splited2_common);
		$out_hash_issa{'2 homol_line1'}=join('', @homology_line1);
		$out_hash_issa{'4 homol_line2'}=join('', @homology_line2);
		$out_hash_issa{"5 $non_common_seq_entry2"}=join('', @splited2_non_common);
		&print_seq_in_block(\%out_hash_issa, $sort_seq_names,  "t=$trunc_name_to", "f=defaul_result\.issa",
		                      $truncate_name_to_10_char, "l=$block_length", "r=$block_range");
	}
}

#________________________________________________________________________________________
# Title     : merge_sequence_in_msp_file
# Usage     :
# Function  :
# Example   : INPUT: (MSP file) ===>
#  59     2.6        47    64     d2pia_3        10    30     d1erd___10-30
#  161    1.1e-07    24    91     d2pia_3        16    85     d1frd___16-85
#
#  722    0          1     106    d1put__        1     106    d1put___1-106
#  66     4.9        2     68     d1put__        43    106    d2lbp___43-106
#  69     1.3        12    49     d1put__        81    120    d1cgo___81-120
#
#  60     3.3        13    38     d1frd__        32    57     d1orda1_32-57
#  65     1.7        21    58     d1frd__        40    69     d2mtac__40-69
#
#   ==== OUTPUT ===>
#    d1frd___1-98 d1frd___1-98_1-98 d1frd___16-85 d2pia_3_24-91_24-91
#    d1frd___16-85_16-85 d2pia_3_24-91
#    d1put___1-106 d1put___1-106_1-106
#    d2pia_3_1-98 d2pia_3_1-98_1-98
#
# Keywords  : mergr_seq_in_msp_file, merge_sequence_in_msp, merge_sequences_in_msp_file
# Options   :
#  $dynamic_factor =  y by y -y   # adjusting factor value dynamically(more seq higher factor)
#  $short_region   =  S by S -S  # taking shorter region overlapped in removing similar regions
#  $large_region   =  L by L -L  # taking larger  region overlapped in removing similar regions
#  $average_region =  A by A -A # taking average region overlapped in removing similar regions
#
# Version   : 2.9
#----------------------------------------------------------------------------------------
sub merge_sequence_in_msp_file{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my ($msp_value, @all_seqlets, %temp_hash, @msp_chunks, $clu_out, $size_of_all_seqlets,
        $base, $optimize, $mrg_out, @arr, $sat_out, %final_hash_out, @final_pre_hash,
        $length_thresh, $merge, $factor, $evalue, $score, $dynamic_factor, $score_match,
        $eval_match, $query_seq, $query_start, $query_stop, $match_seq, $match_start,
        $short_region, $large_region, $average_region, $original_clu_size, $match_stop);
    $factor=$default_factor=7; #~~~~ default connection factor U, 7 means 70% now!
    $length_thresh=30;
    $evalue=10;
    $score =75;
    if(@file < 1){ print "\n# (E) merge_sequence_in_msp_file needs at least 1 MSP file\n"; exit }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Following changes the defaults with given parameters
    #_____________________________________________________________
    if($char_opt=~/z/i){       $optimize='z';    ## This will cause using remove_similar_seqlets than remove_dup_in_array !
    }if($char_opt=~/m/){       $merge='m';
    }if($char_opt=~/y/){       $dynamic_factor='y';
    }if($char_opt=~/v/){       $verbose='v';
    }if($char_opt=~/S/){       $short_region='S';
    }if($char_opt=~/L/){       $large_region='L';
    }if($char_opt=~/A/){       $average_region='A';
    }if($vars{'T'}=~/\d+/){    $length_thresh=$vars{'T'};
    }if($vars{'f'}=~/\S+/){    $factor=$vars{'f'};  ## Here I give a generous $factor !
    }if($vars{'s'}=~/\d+/){    $score = $vars{'s'};
    }if($vars{'e'}=~/\d+/){    $evalue= $vars{'e'}; }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #  Just to inform what parameters have been chosen
    #_____________________________________________________________
    if($verbose){
        print "\n# (1) merge_sequence_in_msp_file : default \$score      : $score";
        print "\n#                                : default \$evalue     : $evalue";
        print "\n#                                : used \$length_thresh : $length_thresh";
        print "\n#                                : default \$factor     : $default_factor";
        print "\n#                                : used    \$factor     : $factor";
        print "\n#                                : \$dynamic_factor     : $dynamic_factor\n";
    }

    for($c=0; $c< @file; $c++){
       open(MSP, "$file[$c]");
       $base=${&get_base_names($file[$c])};
       $clu_out="$base\_F${factor}.clu"; # <-- This is the most important output. Sarah's program will process this
       $sat_out="$base\_F${factor}.sat";
       print "# (2) merge_sequence_in_msp_file : processing $file[$c] for $clu_out\n" if $verbose;
       my @msp1=<MSP>;

       for($i=0; $i< @msp1; $i++){
          #~~~~~~~~~~ Include range or NOT in the seq name ~~~~~~~~~~~~~~~~~~~~~~~~~~`
          # %temp_hash is just to get the chunk of MSP block. As msp file uses empty line as a delimiter
          #____________________________________________________________________________
          if($char_opt=~/r/){
             if($msp1[$i]=~/^ *(\d+) +(\S+) *\S* +(\d+) +(\d+) +(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
                  $score_match=$1;
                  $eval_match=$2;
                  $query_seq=$5;
                  $query_start=$3;
                  $query_stop=$4;
                  $match_seq=$8;
                  $match_start=$6;
                  $match_stop=$7;
                  if($query_seq=~/\S+_\d+\-\d+$/){ $new_seq1=$query_seq }else{ $new_seq1="$query_seq\_$query_start\-$query_stop"; }
                  if($match_seq=~/\S+_\d+\-\d+$/){ $new_seq2=$match_seq }else{ $new_seq2="$match_seq\_$match_start\-$match_stop"; }

                  if($new_seq1 eq $new_seq2){
                     print "# (3.0) merge_sequence_in_msp_file: Skipped = $msp1[$i]"  if $verbose;
                     next
                  };

                  if($score_match < $score or $eval_match > $evalue){
                      print "# (3.1) merge_sequence_in_msp_file: Skipped = $msp1[$i]"  if $verbose;
                      next
                  };
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Modifying $msp1[$i] line !!!
                  #______________________________
                  $msp1[$i]=sprintf("%s %-3s %s %s %s %s %s %s",
                                $score_match, $eval_match, $query_start, $query_stop, $new_seq1, $match_start,
                                $match_stop, $new_seq2);
                  $temp_hash{$query_seq}.="$msp1[$i]\n";
             }
          }else{
             if($msp1[$i]=~/^ *(\d+)[ \t]+(\S+)[ \t]*\S*[ \t]+\d+[ \t]+\d+[ \t]+(\S+)[_\d+\-\d+]? +\d+[\t ]+\d+[ \t]+\S+/){
                  if($1 < $score or $2 > $evalue){
                  print "# (3.0) merge_sequence_in_msp_file: Skipped = $msp1[$i]"  if $verbose;
                  next };
                  $temp_hash{$3}.="$msp1[$i]\n";
             }
          }#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      }
      close(MSP);
    }
    $original_clu_size=@msp_chunks= values(%temp_hash); ## Using temp hash is more than 2 times faster than push

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Determining the dynamic factor here (when 'd' opt is set)
    #____________________________________________________________
    if($dynamic_factor){
        #--> 100 => 10, 1000 => 15, 10000 => 20
        print "\n# ### \$factor: $factor\n";
        $factor += (log($original_clu_size)*5)/10 - 1; ## This is a simplistic.
        if($factor > 9.5){ $factor=9.5 } # this is the very upper limit for any factor.
        print "\n# ### \$factor: $factor\n";
    }

    if($verbose){
        print "# (4) merge_sequence_in_msp_file: The msp chunks used are:\n";
        for($i=0; $i< @msp_chunks; $i++){
            print "$msp_chunks[$i]\n";
        }
    }

    for($i=0; $i< @msp_chunks; $i++){
        print "\n# (5) merge_sequence_in_msp_file: Processing eash chunk with merge_sequence_in_msp_chunk\n" if $verbose;
        @arr=@{&merge_sequence_in_msp_chunk($msp_chunks[$i], $verbose, $optimize,
                  "$merge", "e=$evalue", "s=$score", "f=$factor", "T=$length_thresh",
                  $short_region, $large_region, $average_region)};
        push(@all_seqlets,  @arr);
    }

    #~~~~~~~~~ sorting inner sequences in strings ~~~~~~~~~
    #______________________________________________________
    @all_seqlets=@{&sort_words_in_string(@all_seqlets)}; ## This speeds up about 2 times !!!

    #~~~~~~~ Sort by the _digit-  in seqlet names ~~~~~~~~~
    @all_seqlets= map{$_->[0]} sort{$a->[1] cmp $b->[1] or $a->[2] <=> $b->[2]  }
                  map {/^ *((\S+)_(\d+)\-(\d+).*)/ && [$1, $2, $3, $4]} @all_seqlets;
    if(  $verbose){
        print "\n# (6) merge_sequence_in_msp_chunk: Showing the very final result before merging\n";
        for($i=0; $i< @all_seqlets; $i++){
            print "\n$all_seqlets[$i]";
        }
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # merge sequences in a simple way until there is no change in the array
    #  This is an incomplete merge(merges first seqlets of string ...
    #______________________________________________________________________
    for($i=0; $i< @msp_chunks; $i ++){
         ITERATION_RETURN_POINT:
         $size_of_all_seqlets=@all_seqlets;
         @all_seqlets = @{&merge_similar_seqlets(\@all_seqlets, $optimize,
                                  $short_region, $large_region, $average_region, "f=$factor")};
         if($size_of_all_seqlets > @all_seqlets){
                 @all_seqlets = @{&merge_similar_seqlets(\@all_seqlets, $optimize,
                                $short_region, $large_region, $average_region, "f=$factor")};
                 goto ITERATION_RETURN_POINT;
         }else{
             last;
         }
    }

    if($optimize){
        @all_seqlets=@{&remove_similar_seqlets(\@all_seqlets,
                        $short_region, $large_region, $average_region)};
        @all_seqlets=@{&remove_dup_in_array(\@all_seqlets)};

    }else{
        @all_seqlets=@{&remove_dup_in_array(\@all_seqlets)};
    }
    return(\@all_seqlets);
}




#__________________________________________________________________________
# Title     : merge_sequence_in_msp_chunk
# Usage     :
# Function  : merges sequences which are linked by common regions
#             This filters the sequences by evalue and ssearch score
#             This is the main algorithm of merging similar sequences.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : connect_sequence_in_msp, link_sequence_in_msp_chunk
#             connect_sequence_in_msp_chunk, link_sequence_in_msp
#             merge_sequence, link_sequence, connect_sequence
# Options   : _  for debugging.
#             #  for debugging.
#             m  for merge file output format (.mrg)
#             t= for threshold of seqlet length eg)  "t=30"
#             f= for overlap factor (usually between 2 to 7 )
#                 2 means, if the two regions are not overlapped
#                  by more than HALF of of the smaller region
#                  it will not regard as common seqlet block
#             s= for ssearch score minimum
#             e= for ssearch e value maximum
#             S  for S -S  # taking shorter region overlapped in removing similar regions
#             L  for L -L  # taking larger  region overlapped in removing similar regions
#             A  for A -A # taking average region overlapped in removing similar regions
#
# Returns   :
# Argument  :
# Version   : 2.4
#--------------------------------------------------------------
sub merge_sequence_in_msp_chunk{
   #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
   my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
   my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
   my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
   my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
   my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
   if($debug==1){print "\n\t\@hash=\"@hash\"
      \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
      \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
   #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($ssearch_score2, $evalue_found2, $evalue_found1, $ssearch_score1, $optimize );
   my ($L, %out_hash, @out, $LL, @Final_out, $verbose, $final_factor, $R_diff,
       $short_region, $large_region, $average_region);
   my $factor =4; # default factor for around 30% sequence mis-overlap is the threshold for common block
   #~~~~~~~~~~~~~~ The lower the factor the larger clustering will occur ~~~~~~~~~~~~
   my $score  =75; # default ssearch score. seq below this will be chucked out
   my $evalue =40; # default maximum e value used. Seq higher than this will be thrown out
   my $length_thresh =30; # sequence length threshold. overlap less than this will be ignored

   if($char_opt=~/v/){     $verbose = 'v'
   }if($char_opt=~/z/){    $optimize = 'z'
   }if($char_opt=~/S/){    $short_region='S';
   }if($char_opt=~/L/){	   $large_region='L';
   }if($char_opt=~/A/){	   $average_region='A'; }

   if($vars{'T'}=~/\d+/){
	  $length_thresh=$vars{'T'}; print "\n# merge_sequence_in_msp_chunk: Thresh is $length_thresh\n" if (defined $verbose);
   }if($vars{'f'}=~/\S+/){
	  $factor=$vars{'f'}; print "\n# merge_sequence_in_msp_chunk: Factor is $factor\n" if (defined $verbose);
   }if($vars{'s'}=~/\d+/){
	  $score = $vars{'s'}; print "\n# merge_sequence_in_msp_chunk: Score is $score\n" if (defined $verbose);
   }if($vars{'e'}=~/\d+/){
	  $evalue= $vars{'e'}; print "\n# merge_sequence_in_msp_chunk: Evalue is $evalue\n" if (defined $verbose);
   }
   my @seqlets=split(/\n+/, (${$_[0]} || $_[0]) );
   print "@seqlets" if $verbose;

   F1: for($i=0; $i < @seqlets; $i ++){
	  if($seqlets[$i]=~/^ *((\d+) +(\d+\.?[e\-\d]*) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+)) +(\S+) *(.*)/){
              if($6 eq $9){ splice(@seqlets, $i, 1); $i--; next };
              ($long_match1, $enq_seq1, $mat_seq1, $R_start1, $R_end1 )=($1, $6, $9, $4, $5);
              $R_leng1=$R_end1-$R_start1;  $ssearch_score1= $2;  $evalue_found1 = $3;
	  }
	  if( ($R_leng1 < $length_thresh) || ($ssearch_score1 < $score) ){ splice(@seqlets, $i, 1); $i--; next; }
	  if( $evalue_found1 > $evalue){ splice(@seqlets, $i, 1); $i--; next; }

	  F2: for($j=0; $j < @seqlets; $j ++){
		 if($seqlets[$i] eq $seqlets[$j]){ next };
		 if($seqlets[$j]=~/^ *((\d+) +(\d+\.?[e\-\d]*) +(\d+) +(\d+) +(\S+) +(\d+) +(\d+)) +(\S+) *(.*)/){
			($long_match2, $enq_seq2, $mat_seq2, $R_start2, $R_end2)=($1, $6, $9, $4, $5);
			$R_leng2=$R_end2-$R_start2;			$ssearch_score2=$2;			$evalue_found2= $3;
	     }
		 if( ($R_leng2 < $length_thresh)||($ssearch_score2 < $score) ){ splice(@seqlets, $j, 1); $j--; next; }
		 if( $evalue_found2 > $evalue){ splice(@seqlets, $j, 1); $j--; next; }

		 $R_diff=abs($R_leng1-$R_leng2)/2;   ## <<<---- Note it is div by 2

		 if($R_leng2 < $R_leng1){ $smaller_leng=$R_leng2; }else{ $smaller_leng=$R_leng1; }

		 $Start_diff=abs($R_start1-$R_start2)/2; ## <<<---- Note it is div by 2
		 $final_factor=$smaller_leng/$factor;


		 #~~~~~~~~~~ If average R_diff and average Start_diff are less then 1/7 of the smaller seqlet
		 #~~~~~~~~~~ we regard they are same selqets
		 if(( $R_diff < $final_factor ) &&       ### $Start_diff is essential!
			($Start_diff < $final_factor ) ){  ### if diff is less than around 30% of the smaller length
			if($verbose=~/v/){
			   print "\n\$R_diff:$R_diff \$Start_diff:$Start_diff $smaller_leng $final_factor $factor";
			}
			if($R_leng2 >= $R_leng1){
			       #~~~~~ $mat_seq1 or $mat_seq2 can increase to 'slr1453,sll0238', so you need ',' in the middle only
				   $extended_name="$mat_seq2,$mat_seq1";
				   $L=length($extended_name);
				   $LL=length($long_match2)+2;
				   $seqlets[$i]= sprintf("%-${LL}s %-${L}s", $long_match2, $extended_name);
				   splice(@seqlets, $j, 1);
				   $i-- unless($i==0);
				   $j--;
				   next F1;
			}elsif( $R_leng1 >= $R_leng2){  ## chooses the bigger range seq
				   $extended_name="$mat_seq1,$mat_seq2"; # must be ',' not ' '
				   $L=length($extended_name);
				   $LL=length($long_match1)+2;
				   $seqlets[$i]=sprintf("%-${LL}s %-${L}s", $long_match1, $extended_name);
				   splice(@seqlets, $j, 1);
                   $i-- unless($i <= 0);
				   $j--;
				   next F1;
			}
	     }else{
			next F2;
		 }
	  }
   }
   if($char_opt=~/m/){
      for($i=0; $i< @seqlets; $i++){
         if($seqlets[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/){
            if($1 eq $2){ next }
            $leading_seq=$1; $long=$2; $long=~s/\,/ /g;
            push(@Final_out, "$leading_seq $long" );
         }
      }
   }
   @Final_out=sort @Final_out;
   print "\n\n\n# \@Final_out\n@Final_out \n=================\n " if $verbose;
   return(\@Final_out);
}


#______________________________________________________________
# Title     : get_overlapping_range
# Usage     : @n1=@{&get_overlapping_range(\@ranges1, \@ranges2)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_overlapping_range_in_msp, get_overlapping_range_in_msp_file,
#             get_overlapping_seq_match_range, get_overlap_seq_match_range
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_overlapping_range{
	 my (@new_range, $R_start1, $R_start2);
	 ($R_start1, $R_end1)=@{$_[0]}[0..1];
	 ($R_start2, $R_end2)=@{$_[1]}[0..1];

	 if(($R_start1 <= $R_start2)&&        # ------------
	 ( $R_end1 >= $R_end2) ){           #   -------
	   @new_range= ($R_start2, $R_end2);
	 }elsif(($R_start1 <= $R_start2)&&    # -----------
	 ( $R_end1 <= $R_end2) &&           #    -----------
	 ( $R_end1 >  $R_start2) ){
	   @new_range= ($R_start2, $R_end1);
	 }elsif(($R_start1 >= $R_start2)&&    #    -----------
	 ( $R_end1 >= $R_end2  ) &&         # -----------
	 ( $R_end2 >  $R_start1) ){
	   @new_range= ($R_start1, $R_end2);
	 }elsif(($R_start1 >= $R_start2)&&    #   ------
	 ( $R_end1 <= $R_end2) ){           # -----------
	   @new_range= ($R_start1, $R_end1);
	 }else{                                #  ----
	  @new_range=(0,0);                  #        --------
	 }
	 return(\@new_range);
}

#______________________________________________________________________________
# Title     : find_source_perl_library
# Usage     : $source_library=${&find_source_perl_library};
# Function  : gets the default perl sub source library from ENV setenv
# Example   :
# Keywords  :
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub find_source_perl_library{
     my($source_library);
     print "\n# $0: You did not use \"s=\" option for \$source_library\n";
     print "\n#     I am trying to retrieve your default source lib. \n";

     if( defined( $ENV{'MY_PERL_LIB'} ) ){
            $source_library=$ENV{'MY_PERL_LIB'};
     }elsif( defined( $ENV{'BIO_PERL'} ) ){
            $bioperl_lib=$ENV{'BIO_PERL'};
     }elsif(-e "/gn0/jong/Perl/Bio.pl"){
            $source_library="/gn0/jong/Perl/Bio.pl";
     }elsif(-e "/home/jong/Perl/Bio.pl"){
            $source_library="/home/jong/Perl/Bio.pl";
     }elsif(-e "/Perl/Bio.pl"){
            $source_library="/Perl/Bio.pl";
     }elsif(-e "Bio.pl"){
            $source_library="Bio.pl";
     }elsif(-e "/usr/Perl/Bio.pl"){
            $source_library="/usr/Perl/Bio.pl";
     }elsif(-e "/ss0/sat/Script/Bio.pl"){
            $source_library="/ss0/sat/Script/Bio.pl";
     }elsif(-e "/ss0/agb/Script/Bio.pl"){
            $source_library="/ss0/agb/Script/Bio.pl";
     }else{
            print "\n $0 can not find source library, please set BIO_PERL env\n";
     }
     return(\$source_library);
}

#______________________________________________________________
# Title     : find_central_seq_msp_chunk
# Usage     : This finds the correct msp chunk with given seq name
#             and big original or any msp chunk
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub find_central_seq_msp_chunk{
	 my $central_seq=${$_[0]};
	 my @MSP=@{$_[1]};
	 my ($j, $range, @MSP_1);
	 for($j=0; $j<@MSP; $j++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
	  #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
	  if($MSP[$j]=~/^ *(\d+) +\d+\.*\d* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) +(.+)/i){
		 if( ($4 eq $central_seq) && ($4 ne $7) ){
			$range="$2 \- $3";
			push(@MSP_1,  $range);
		 }
	  }
	 }
	 return(\@MSP_1);
}



#______________________________________________________________
# Title     : find_central_sequence
# Usage     :
# Function  : accepts msp file and finds the central sequence.
#             central sequence is in the centre of all the member
#             sequences in a group or cluster
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub find_central_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 3.9 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	 my (%score, $out, @Keys);
	 for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];

	  if($debug eq 1){ print "\n inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_msp_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  open(FILE_1,"$input_file");
	  @MSP=<FILE_1>;

	  for($j=0; $j<@MSP; $j++){
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 #                   $1                 $2     $3    $4      $5     $6    $7     $8
		 #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...
		 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
		 if($MSP[$j]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+) +(.+)/){
			if($4 eq $7){
			   $seq_name=$7;
			}elsif( ($4 ne $7) && ( defined($seq_name) ) ){
			   $score{$seq_name}+= $1;
			}
		 }
	  }
	 }
	 @Keys=keys %score;
	 for($i=0; $i< @Keys; $i++){
	  if($score{$Keys[$i]} > $largest){
		 $largest=$score{$Keys[$i]};
		 $out=$Keys[$i];
	  }
	 }
	 return(\$out);
}



#______________________________________________________________
# Title     : write_dof_files
# Usage     : &write_dof_files(\@msps);
#             while @msps means msp file names
# Function  : write Alex's domfam file. it prints out tilde lines
#             if the seqlet matched are below threshold defined.
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#             v  for verbose STDOUT
#             n  for NO seq start and end number display
#             t= for teshold (eg, t=40  for Blastp(or ssearch) score 40 threshold)
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub write_dof_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my $thresh = 0;
	 my @msps=@file;
	 my (@msp_name, $msp_file, @list, $leng, $scale);
	 $thresh=$vars{'t'} if defined( $vars{'t'} ) ;
	 $scale =$vars{'s'};

	 for($i=0; $i < @msps; $i++){ ### @msps should have msp file names
	  my($x, $leng, $line_size, $o, $I_len, $SC, %count);
	  my @msp_name=split(/\./, $msps[$i]);
	  my $base=$msp_name[0];
	  my $out_dof_file="$base\_$thresh\.dof";
	  open(DOF, ">$out_dof_file");
	  open(MSP, "$msps[$i]");
	  my @output=<MSP>;

	  ###### Getting automatic $scale ~~~~~~~~~~~~~~~~~~~~~~~
	  unless($scale=~/\d+/){
		 for($j=0; $j< @output; $j++){
			if($output[$j]=~/^ *\S+[\t ]+\S+[\t ]+1[\t ]+(\d+)[\t ]+\S+/){
			   $leng=$1 if ($1 > $leng);
			}
		 }
		 if($leng > 1300){ $scale = 20;
		 }else{
		    $scale=int($leng / (log($leng)*10) );
		 }
		 if($scale < 5){
			$scale=5;
		 }
	  }

	  if($output[0]=~/^$/){ splice(@output, 0, 1); }
	  if($remove=~/r/){ shift(@output); }  ## removing the first line

	  #######======  Drawing the top line ###########
	  $line_size=int($leng/$scale);

	  #######======  SCALE writing =======###########
	  print DOF "\n NAME       LENG  FROM- TO  ";
	  print     "\n NAME       LENG  FROM- TO  "  if ($char_opt=~/v/);
	  my $div=int($leng/$scale);
	  my $Scaled=int($div/$scale);
	  for($x=1; $x< $leng; $x+=$div){
		 $I_len=length($x);
		 $SC=$Scaled-$I_len;
		 print DOF $x."."x$SC;
		 print     $x."."x$SC if ($char_opt=~/v/);
	  }
	  #####~~~~ Processing MSP file lines ~~~~~~~~
	  for ($o=0; $o< @output; $o++){
		 my $each_msp_line=$output[$o];
		 my ($line,$score, $start1, $end1, $query, $start2, $end2,
		     $put_blank_line, $no_num, $target_seq,$first_time,
		     $S2L, $E2L, $L);
		 if($each_msp_line =~/^$/){
			print DOF "\n";
			print "\n"  if ($char_opt=~/v/);
			$first_time=1;
			next;
		 }elsif($each_msp_line =~/^ *(\d+)[ \t]+\S+[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\S+)/){
			$score=$1;
		    #if($score < $thresh){
		    #   print "\n>$4 score too low"; next;
		    #}
			$start1    =$2;
			$end1      =$3;
			$query     =$4;
			$start2    =$5;
			$end2      =$6;
			$target_seq=$7;
			$target_leng=length($fasta{$target_seq});
			if($count{$target_seq} >= 1){
			   $first_time=0;
			   $put_blank_line=0;
			}elsif($count{$target_seq} < 1){
			   $first_time=1;
			}
			$count{$target_seq}++;
		 }
		 my $S=int($start1/$scale);
		 my $E=int($end1/$scale);
		 $L=$E-$S+1;
		 if($char_opt=~/n/i){
			  if($score < $thresh){
				 $line=" "x$S."\~"x$L;
			  }else{
				 $line=" "x$S."\-"x$L;
			  }
		 }else{
			  $S2L=length($start2);
			  $E2L=length($end2);
			  $L=$L-$S2L-$E2L;
			  if($L < 1){ $L=1 }
			  if($score < $thresh){
				 $line=" "x$S.$start2."\~"x$L.$end2;
			  }else{
				 $line=" "x$S.$start2."\-"x$L.$end2;
			  }
		 }

		 ### Actual writing ####
		 if($first_time==1){
			$first_time=0;
				#    Name leng strt-end   |---------------------------------------------------------
			  printf DOF "\n\>%-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1, $line;
			  printf  ("\n\>%-11s %-4d %-4d\-%4d %-${line_size}s",
			   $target_seq, $target_leng, $start1, $end1, $line)  if ($char_opt=~/v/);
		 }elsif($first_time !=1){
			if($put_blank_line==1){
			  print DOF "\n";
			  printf DOF "\n %-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1,$line;
			  print  "\n"  if ($char_opt=~/v/);
			  printf  ("\n %-11s %-4d %-4d\-%4d %-${line_size}s",
				 $target_seq, $target_leng, $start1, $end1,$line)  if ($char_opt=~/v/);
		   }else{
			  printf DOF "\n %-11s %-4d %-4d\-%4d %-${line_size}s", $target_seq, $target_leng, $start1, $end1,$line;
			  printf ( "\n %-11s %-4d %-4d\-%4d %-${line_size}s",
				$target_seq, $target_leng, $start1, $end1,$line)  if ($char_opt=~/v/);
		   }
		 }

	  }
	  print DOF "\n\n";
	  print "\n"  if ($char_opt=~/v/);
	  print "\n# ~~~~~ lines mean match regions with below threshold ($thresh)" if $thresh > $score;
	  print "\n# $out_dof_file   is created \n";
	 }

}



#______________________________________________________________
# Title     : make_filtered_list
# Usage     :
# Function  : this is the core of check_genome_cluster.pl
#             finds good linkage seqlets in msp files
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub make_filtered_list{ #####################################33
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 $|=1;
	 my ($result, @msp,%temp_msp, @num_of_all_links,$link_counter,$diff1, $diff2);
	 my $num_seq=0;
	 my @sizes_of_seqlets;
	 open(MSP, "$file[0]");
	 FIRST_FOR:for($c=0; $c< @file; $c++){
	 my %temp_msp;
	 #print "\nFirst\(${c}\)th INPUT file processing\n";
	 my @msp1=<MSP>;
	 my (@msp, $MSP);
	 for($i=0; $i< @msp1; $i++){
	   if($msp1[$i]=~/^ *(\d+) +\d+\.?[e\-\d]*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)/i){
		  if($1 > $msp_value){
		    $temp_msp{$4} .= $msp1[$i];
		  }
	   }
	 }
	 close(MSP);
	 #print "\n", %temp_msp, "\n";

	 @msp=values %temp_msp;
	 $num_seq=@msp;
	 print "\nTOP Number of seq is: $num_seq \n";

	 #""""""  Real algorithm starts HERE##''''''''''''''''''''''''''''''''''''''''

	 # optimization: I found no need to go through all msp chunk. One is enough by experience
	 #MSP1: for($i=0; $i< @msp; $i++){ # @msp has (mspchunk1, mspchunk2...)
	 if($fast==1){ $msp_chunk_num=1
	 }else{ $msp_chunk_num=@msp }

	 MSP1: for($i=0; $i< $msp_chunk_num; $i++){ # @msp has (mspchunk1, mspchunk2...)
 	    my $pos=$i+1;
	    my @seqlets1=split(/\n+/,  $msp[$i]);
	    print "   MSP1 ${i}th  MSP chunk is handled #######","\n";
	    my $temp=@seqlets1-1;
	    SEQLET1: for($j=1; $j < @seqlets1; $j++){
		   my @OUTPUT=&follow_seqlet_link($seqlets1[$j], @msp);
		   my $depth_of_linking=${$OUTPUT[0]};
		   my $size_of_common_seqlet=${$OUTPUT[1]};
		   if(($depth_of_linking==@msp)&&($size_of_common_seqlet > $threshold)){
		      $result=1;
		      if($fast == 1){
				 last FIRST_FOR;
		         #goto EXIT_1;
		      }
		   }
		   #push(@num_of_all_links, $depth_of_linking);
		   #push(@sizes_of_seqlets, $size_of_common_seqlet);
	    }
	 }
	 }
	 #print "\n All searched links: \n", "@num_of_all_links", "\n";
	 #print "\n Sizes of common seqlet\n", "@sizes_of_seqlets\n";
	 EXIT_1:
	 return(\$result, \$num_seq);
}


#______________________________________________________________
# Title     : follow_seqlet_link
# Usage     :
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub follow_seqlet_link{
	 my $seqlet_line=shift;
	 my @msp=@_;
	 my ($i, $j, $link_counter, @common_range,$seqlet_very_ori, @ranges_very_ori,
	  @new_ranges, $seqlet_ori, $matched_ori, @ranges1, @ranges2);
	 if($seqlet_line=~/^ *\d+[ \t]+\d+\.?[e\-\d]*[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)[ \t]+(\d+)[ \t]+(\d+)[ \t]+(\w+)/i){
	  $seqlet_ori =$3;
	  $seqlet_very_ori=$3;
	  $matched_ori=$6;
	  @ranges1=($1, $2, $4, $5);
	  @ranges_very_ori=@ranges1;
	 }
	 my $visited .= "$seqlet_ori ";
	 #print "\n\=======$seqlet_ori \@ranges1 is @ranges1 =================================\n";

	 MSP: for($i=0; $i<@msp; $i++){
	  my @seqlets1=split(/\n+/,  $msp[$i]);
	  #my @common_range;
	  SEQLET1: for($j=1; $j < @seqlets1; $j++){
	     my $seqlet_num=$j;
	     my @temp= &get_msp_range($seqlets1[$j]) if $seqlets1[$j]=~/\S/;
	     my @ranges2=@{$temp[0]}; # has (1 2 3 4);
	     my $seq2        = $temp[1];
	     my $matched_seq2= $temp[2];
	     #splice(@seqlets1, $j, 1);
	     #$j--;
		 if($seq2 eq $seqlet_ori){
		    next MSP; # to the next MSP chunk
		 }elsif(($seq2 eq $matched_ori)&&($visited !~ /$matched_seq2/)){
			if($verbose=~/v$/i){
	          print "\n    SEQLET1_________________________________________\(${pos}th MSP chunk\)\n";
	          print "    SEQLET1 $temp number of seqlets  for the ${pos}th msp file\n";
			  print "    SEQLET1_________________________________________\(${seqlet_num}th seqlet )\n";
			  print "    QUERY seqlet is $temp[1]: ", "@ranges2[0..1]  MATCH seqlet is $temp[2]: ", "@ranges2[2..3]","\n";
			  print "    Target seqlet is $seq2 : @ranges2\n";
			}
			if(($ranges1[2] >= $ranges2[0])&&    ##     =======
			   ($ranges1[3] <= $ranges2[1]) ){   ##  ==============
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n      MSP2      \@ranges1 is ", "@ranges1" if($verbose=~/v/i);
			   #print "\n                \@ranges2 is ", "@ranges2" if($verbose=~/v/i);
			   $diff1=$ranges1[2] - $ranges2[0];
			   $diff2=$ranges2[1] - $ranges1[3];
			   @new_ranges =($ranges1[2], $ranges1[3], ($ranges2[2]+$diff1), ($ranges2[3]-$diff2) );
			   @ranges1=(@new_ranges); #, $ranges2[2], $ranges2[3]);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   if($verbose=~/v/i){
			      print "\n      FIRST elsif    Finalout @new_ranges \$link_counter=$link_counter", "\n";
			      print "                                         \$num_seq = $num_seq\n";
			   }
			   if( ($link_counter+2) >= @msp){
				  #print  "\n   All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     Common range: ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] <= $ranges2[0])&& ##    --------------
				   ($ranges1[3] >= $ranges2[1]) ){ ##       --------
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   @new_ranges =($ranges2[0], $ranges2[1], $ranges2[2], $ranges2[3],);
			   #print "\n      Second elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   #print "                                         \$num_seq = $num_seq\n";
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";

			   if( ($link_counter+2) >= @msp){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] <= $ranges2[0])&&   #    ======
				   ($ranges1[3] <= $ranges2[1]) &&   ##      =======
				   ($ranges1[3] >= $ranges2[0]) ){
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   #print "                                         \$num_seq = $num_seq\n";
			   $diff2=$ranges2[1] - $ranges1[3];
			   @new_ranges=($ranges2[0], $ranges1[3],  $ranges2[2], ($ranges2[3]-$diff2));
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   #print "\n      Third elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   if( ($link_counter+2) >= @msp){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}elsif(($ranges1[2] >= $ranges2[0])&&  #        ======
				  ($ranges1[3] >= $ranges2[1])&&   ##  =======
				  ($ranges1[2] <= $ranges2[1]) ){
			   $link_counter++;
			   #push(@common_range, @ranges1, @ranges2);
			   #print "\n         \@ranges1 is ", "@ranges1";
			   #print "\n         \@ranges2 is ", "@ranges2";
			   $diff1=$ranges1[2] - $ranges2[0];
			   @new_ranges=($ranges1[2], $ranges2[1], ($ranges2[0]+$diff1), $ranges2[1]);
			   @ranges1=(@new_ranges);
			   $seqlet_ori=$seq2;
			   $matched_ori=$matched_seq2;
			   $visited .= "$seqlet_ori ";
			   #print "\n      Fourth elsif  Finalout ", @new_ranges, " \$link_counter=$link_counter\n";
			   #print "                                         \$num_seq = $num_seq\n";
			   if( ($link_counter+2) >= @msp){
				  #print  "\n     All link found \$link_counter = $link_counter, \$num_seq=$num_seq\n";
				  $result=1;
				  #$link_counter=0;
				  #print "\n     ", "@common_range", "\n";
				  $visited .= "$matched_seq2 ";
				  #print "     Sequence visited: $visited \n";
				  @common_range=();
				  $not_visited_msp_chunk=0;
				  goto EXIT;
			   }
			   $i=0;
			   next MSP;
			}else{
			   if($verbose=~/v/i){
			      print "\nX X X X X   Link broken ", @new_ranges, " \$link_counter=$link_counter\n";
			   }
			   next SEQLET1;
			}
		 }
	  }
	 }
	 EXIT:
	 $final_num_of_seq_linked=$link_counter+2;
	 if($final_num_of_seq_linked==@msp){
	  $seqlet_leng=$new_ranges[$#common_range]-$new_ranges[$#common_range-1]+1;
	  #print "\n Common Seqlet size:  $seqlet_leng \n";
	 }else{
	  $seqlet_leng=0;
	 }
	 #print "\nLINKING seq num for seqlet $seqlet_very_ori \(","@ranges_very_ori","\) is $final_num_of_seq_linked \n";
	 return(\$final_num_of_seq_linked, \$seqlet_leng);
}


#________________________________________________________________________________
# Title     : convert_clu_to_msp
# Usage     : @written_msp_files=@{&convert_clu_to_msp(\$single_linkage_file)};
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big msp file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.msp file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_msp, cluster_to_msp, cluster_to_sso_to_msp
#              clu_to_sso_to_msp
# Options   :
# Category  :
# Version   : 2.3
#--------------------------------------------------------------------------------
sub convert_clu_to_msp{
     my($i, $j, $k, $s, $u, $v, $p, $m, $n, $y, @possible_extensions, $single_file_name,
        @seq_names, @final_files, @U_L_case, $file, @file, @name_types,
        @poss_sub_dir_heads, @written_msp_files, $Lean_output, $subdir_char_size,
        $search_file_base, $found_real_subdir_name, $found_search_prog_exention_used);

     $subdir_char_size=2; # default
     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # Opening cluster file (xx.clu)
     # %clus looks like this:  2-507     YGR041W YLR353W
     #                         3-308     YDR222W YDR346C YLR225C
     #                         2-184     YCL066W YCR040W
     #______________________________________________________________
     my $clu=${$_[0]} || $_[0];
     $Lean_output=${$_[1]} || $_[1];

     if($verbose){
           print "\n# convert_clu_to_msp : \"$clu\" is given
                                and I am processing it with clu_to_sso_to_msp\n" if defined $clu;
     }
     my %clus=%{&open_clu_files(\$clu)};
     my @clusters= keys %clus;
     my $num_of_cluster=@clusters=@{&sort_by_cluster_size(\@clusters)};

     print "# (i) $0: convert_clu_to_msp: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

     &show_array(\@clusters) if $verbose;
     &show_hash(\%clus) if $verbose;
     @possible_extensions=('msp', 'msp.gz', 'msso', 'msso.gz','fsso', 'pbla', 'pbla.gz',
                                  'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
     @U_L_case=('\U', '\L');

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
     # Making each SINGLE linkage clu to MSP file format to be ready for divclus
     #______________________________________________________________________________
     for($i=0; $i< @clusters; $i++){
         my (@seq_names, @final_files, $clus_name, $big_out_msp, @msp_hashes);
         $clus_name=$clusters[$i];
         unless($single_file_name=~/\S/){
             $big_out_msp="$clus_name\_cluster\.msp"; #<<<----- final output name
         }else{
             $big_out_msp=$single_file_name;
         }
         push(@written_msp_files, $big_out_msp); ## This is the output of this sub

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  If $clus_name.msp is already there, skip
         #_____________________________________________
         if( (-s $big_out_msp) > 100  and !$over_write ){
             print "\n# (i) convert_clu_to_msp : $big_out_msp MSP file already exists, skipping\n";
             print "#    Use  \$over_write option \'o\' to start all over again or \n";
             print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
             next ;
         }
         $num_of_seq_member=@seq_names=split(/ +/, $clus{$clusters[$i]}); # @seq_names has (HIU001, HI002, HI333, MJ111, etc)
         print "# $0: convert_clu_to_msp: No. of seq member=$num_of_seq_member after split \n" if $verbose;

         FOR0: for($j=0; $j < @seq_names; $j++){
               my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low,
                  $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
                  $file_name_prot_low_gz, $file_name_prot_up_gz);
                  $each_seq_name=$seq_names[$j];
               my @poss_sub_dir_heads=('.'); ## <<<<------- This is critically important, when 'D' opt is not used!

               if($each_seq_name=~/(\S+)_\d+\-\d+$/){
                   $each_seq_name_range=$each_seq_name;
                   $each_seq_name=$1;
                   @name_types=($each_seq_name, $each_seq_name_range);
               }else{
                   @name_types=($each_seq_name);
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Here I take chars from the sequ names, as dirs have fragments of chars
               #_______________________________________________________________________________
               for($s=1; $s <= $subdir_char_size ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
                   $sub_dir_head= substr($seq_names[$j], 0, $s);
                   push(@poss_sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
                   push(@poss_sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Checking all the possible subdirectories to crop all the sso files
               #_______________________________________________________________________________
               FOR1: for($p=0; $p <= @poss_sub_dir_heads; $p++){ ## Default has '.' will make things like '././fam_8_8.pbla.gz'
                    $subd=$poss_sub_dir_heads[$p];               ## Also, the '<=' not '<' cures the same problem.
                    FOR2 : for($e=0; $e <  @possible_extensions; $e++){
                         $ext=$possible_extensions[$e];
                         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                         #  This makes all the possible lower upper case names
                         #______________________________________________________
                         for( $u=0; $u < @U_L_case; $u++){
                            for($v=0; $v <@name_types; $v++){
                               $each_seq_name=$name_types[$v];
                               if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
                               }else{                   $each_seq_name="\L$each_seq_name"; }

                               if(-s "$each_seq_name\.$ext"){
                                    push(@final_files, "$each_seq_name\.$ext" ) ;
                                    $found_search_prog_exention_used=$ext;
                                    $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                    $found_search_prog_exention_used=$ext;
                                    next FOR0
                               }elsif(-s "$each_seq_name\.$ext\.gz"){
                                    push(@final_files, "$each_seq_name\.$ext\.gz" ) ;
                                    $found_search_prog_exention_used=$ext;
                                    $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                    $found_search_prog_exention_used=$ext;
                                    next FOR0
                               }else{
                                    $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
                                    if(-s $file_wanted){
                                        push( @final_files, $file_wanted);
                                        $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                        $found_search_prog_exention_used=$ext;
                                        next FOR0
                                    }elsif(-s "$file_wanted\.gz"){
                                        push( @final_files, "$file_wanted\.gz");
                                        $found_search_prog_exention_used=$ext;
                                        $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                        next FOR0;
                                    }
                               }
                            }
                         }
                    } # FOR2
               } # FOR1
               print @final_files, "\n";
         } # FOR0

         #print "\n# @final_files \n=============> $big_out_msp  \n\n";

         if(@final_files < 1){
              print "\n# convert_clu_to_msp :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
              print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
              next;
         }
         $write_each_msp_to_disk='';

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  Check if small msp files have already made in previous steps
         #________________________________________________________________
         if($final_files[0]=~/(\S+)\.msp/){ ##  concatenate msp into big_msp
             $search_file_base=$1;
             print "\n# $search_file_base $found_real_subdir_name $found_search_prog_exention_used\n" if $verbose;

             if($final_files[0]=~/\S\.gz$/){
                 print "\n# $final_files[0] is gzipped \n";
                 system("gzip -d  $final_files[0]");
                 $final_files[0]=~s/\.gz//;
             }
             open(BIG_MSP_FILE, ">$big_out_msp");
             for($y=0; $y< @final_files; $y++){
                 open(SINGLE_MSP, "$final_files[$y]");
                 while(<SINGLE_MSP>){
                     print BIG_MSP_FILE $_;
                 }
             }
             close(BIG_MSP_FILE);
             close(SINGLE_MSP);
             push(@written_msp_files, $big_out_msp);

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
             # Doing something for L option ($Lean_output)
             #___________________________________________________
             if($Lean_output and -d $found_real_subdir_name and $found_search_prog_exention_used){
                 for($y=0; $y< @seq_names; $y++){
                     unlink("$found_real_subdir_name\/$seq_names[$y]\.$found_search_prog_exention_used");
                     unlink("$found_real_subdir_name\/$seq_names[$y]\.$found_search_prog_exention_used\.gz");
                 }
             }elsif($Lean_output){
                 for($y=0; $y< @seq_names; $y++){
                     unlink("$seq_names[$y]\.$found_search_prog_exention_used");
                     unlink("$seq_names[$y]\.$found_search_prog_exention_used\.gz");
                 }
             }
         }else{
             if($write_each_msp_to_disk){
                  print "\# $0 : going to run open_sso_files with $write_each_msp_to_disk opt\n";
                  $big_out_msp=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                          "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};
                  if(-s $big_out_msp > 200){  print "\n# $0: SUCCESS to create $big_out_msp :) :) :-) :-) ?\n"; }
             }else{
                  print "\n# convert_clu_to_msp: I am running open_sso_files. \n";
                  @msp_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                 "u=$upper_expect_limit", $new_format, $add_range,
                                                 $add_range2, $big_out_msp, $over_write)};
                  &write_msp_files(@msp_hashes, $big_out_msp); ## concatenates all the hash ref to one
             }
         }
     }## end of  for($i=0; $i< @clusters; $i++){
     return(\@written_msp_files);
}# end of



#________________________________________________________________________________
# Title     : clu_to_sso_to_msp  (use convert_clu_to_msp)
# Usage     : &clu_to_sso_to_msp(\$clu);
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big msp file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.msp file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_msp, cluster_to_msp, cluster_to_sso_to_msp
#              convert_clu_to_sso_to_msp
# Options   : USE, convert_clu_to_sso_to_msp, this is obsolute now
# Category  :
# Version   : 1.7
#--------------------------------------------------------------------------------
sub clu_to_sso_to_msp{
    my($i, $j, $k, $s, $u, $p, $m, $n, $y, @possible_extensions, @list,
	@final_files, @U_L_case, $file, @file, @written_msp_files);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Opening cluster file (xx.clu)
    # %clus looks like this:  2-507     YGR041W YLR353W
    #                         3-308     YDR222W YDR346C YLR225C
    #                         2-184     YCL066W YCR040W
    #______________________________________________________________
    my $clu=${$_[0]} || $_[0];
    if($verbose){
        print "\n# clu_to_sso_to_msp : \"$clu\" is given
                                                and I am processing it with clu_to_sso_to_msp\n" if defined $clu;
    }
    my %clus=%{&open_clu_files(\$clu)};
    my @keys= keys %clus;
    my $num_of_cluster=@keys=@{&sort_by_cluster_size(\@keys)};

    print "# $0: clu_to_sso_to_msp: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

    &show_array(\@keys) if $verbose;
    &show_hash(\%clus) if $verbose;
    @possible_extensions=('msp', 'sso', 'msso', 'msso.gz','fsso', 'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
    @U_L_case=('\U', '\L');

    for($i=0; $i< @keys; $i++){
          my (@list, @final_files, $clus_name, $big_out_msp, @msp_hashes);
          $clus_name=$keys[$i];
          unless($single_file_name=~/\S/){
                $big_out_msp="$clus_name\_cluster\.msp"; #<<<----- final output name
          }else{
                $big_out_msp=$single_file_name;
          }
          push(@written_msp_files, $big_out_msp); ## This is the output of this sub

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          #  If $clus_name.msp is already there, skip
          #_____________________________________________
          if( (-s $big_out_msp) > 100  and !$over_write ){
                print "\n# clu_to_sso_to_msp : $big_out_msp MSP file already exists, skipping\n";
                print "#    Use  \$over_write option \'o\' to start all over again or \n";
                print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
                next ;
          }
          $num_of_seq_member=@list=split(/ +/, $clus{$keys[$i]}); # @list has (HIU001, HI002, HI333, MJ111, etc)
          print "# $0: clu_to_sso_to_msp: No. of seq member=$num_of_seq_member after split \n" if $verbose;

          FOR0: for($j=0; $j < @list; $j++){
               my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low, @sub_dir_heads,
                     $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
                     $file_name_prot_low_gz, $file_name_prot_up_gz);

               $each_seq_name=$list[$j];
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Here I take chars from the sequ names, as dirs have fragments of chars
               #_______________________________________________________________________________
               for($s=1; $s <=2 ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
                     $sub_dir_head= substr($list[$j], 0, $s);
                     push(@sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
                     push(@sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Checking all the possible subdirectories to crop all the sso files
               #_______________________________________________________________________________
               FOR1: for($p=0; $p < @sub_dir_heads; $p++){
                     $subd=$sub_dir_heads[$p];
                     FOR2 : for($e=0; $e < @possible_extensions; $e++){
                           $ext=$possible_extensions[$e];
                           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                           #  This makes all the possible lower upper case names
                           #______________________________________________________
                           for( $u=0; $u < @U_L_case; $u++){
                                  if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
                                  }else{                   $each_seq_name="\L$each_seq_name"; }

                                  if(-s "$each_seq_name\.$ext"){   push(@final_files, "$each_seq_name\.$ext" ) ; next FOR0 }
                                  elsif(-s "$each_seq_name\.$ext\.gz"){ push(@final_files, "$each_seq_name\.$ext\.gz" ) ; next FOR0 }
                                  else{
                                       $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
                                       if(-s $file_wanted){
                                            push( @final_files, $file_wanted); next FOR0 }
                                       elsif(-s "$file_wanted\.gz"){
                                            push( @final_files, "$file_wanted\.gz");
                                            next FOR0
                                       }
                                  }
                           }
                     } # FOR2
               } # FOR1

          } # FOR0

          print "\n# @final_files \n=============> $big_out_msp  \n\n" if $verbose;

          if(@final_files < 1){
               print "\n# clu_to_sso_to_msp :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
               print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
               next;
          }
          # $write_each_msp_to_disk='w';

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           #  Check if small msp files have already made in previous steps
           #________________________________________________________________
           if($final_files[0]=~/\.msp *$/){ ##  concatenate msp into big_msp
                 open(BIG_MSP_FILE, ">$big_out_msp");
                 for($y=0; $y< @final_files; $y++){
                      open(SINGLE_MSP, "$final_files[$y]");
                      while(<SINGLE_MSP>){
                           print BIG_MSP_FILE $_;
                      }
                 }
                 close(BIG_MSP_FILE);
                 close(SINGLE_MSP);
                 push(@written_msp_files, $big_out_msp);

           }else{
                if($write_each_msp_to_disk){
                    print "\# $0 : going to run open_sso_files with $write_each_msp_to_disk opt\n";
                    $big_out_msp=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                            "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};
                    if(-s $big_out_msp > 200){  print "\n# $0: SUCCESS to create $big_out_msp :) :) :-) :-) ?\n"; }
                }else{
                    print "\n# clu_to_sso_to_msp: I am running open_sso_files. \n";
                    @msp_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                            "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};

                    &write_msp_files(@msp_hashes, $big_out_msp); ## concatenates all the hash ref to one
                }
          }
    }
    return(\@written_msp_files);
}# end of


#________________________________________________________________________________
# Title     : convert_clu_to_sso_to_msp
# Usage     : &clu_to_sso_to_msp(\$clu);
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big msp file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.msp file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_msp, cluster_to_msp, cluster_to_sso_to_msp
#              clu_to_sso_to_msp
# Options   :
# Category  :
# Version   : 1.8
#--------------------------------------------------------------------------------
sub convert_clu_to_sso_to_msp{
     my($i, $j, $k, $s, $u, $p, $m, $n, $y, @possible_extensions, @list,
          @final_files, @U_L_case, $file, @file, @written_msp_files);

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # Opening cluster file (xx.clu)
     # %clus looks like this:  2-507     YGR041W YLR353W
     #                         3-308     YDR222W YDR346C YLR225C
     #                         2-184     YCL066W YCR040W
     #______________________________________________________________
     my $clu=${$_[0]} || $_[0];
     if($verbose){
           print "\n# clu_to_sso_to_msp : \"$clu\" is given
                                                      and I am processing it with clu_to_sso_to_msp\n" if defined $clu;
     }
     my %clus=%{&open_clu_files(\$clu)};
     my @keys= keys %clus;
     my $num_of_cluster=@keys=@{&sort_by_cluster_size(\@keys)};

     print "# $0: clu_to_sso_to_msp: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

     &show_array(\@keys) if $verbose;
     &show_hash(\%clus) if $verbose;
     @possible_extensions=('msp', 'sso', 'msso', 'msso.gz','fsso', 'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
     @U_L_case=('\U', '\L');

     for($i=0; $i< @keys; $i++){
         my (@list, @final_files, $clus_name, $big_out_msp, @msp_hashes);
         $clus_name=$keys[$i];
         unless($single_file_name=~/\S/){
              $big_out_msp="$clus_name\_cluster\.msp"; #<<<----- final output name
         }else{
              $big_out_msp=$single_file_name;
         }
         push(@written_msp_files, $big_out_msp); ## This is the output of this sub

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  If $clus_name.msp is already there, skip
         #_____________________________________________
         if( (-s $big_out_msp) > 100  and !$over_write ){
             print "\n# clu_to_sso_to_msp : $big_out_msp MSP file already exists, skipping\n";
             print "#    Use  \$over_write option \'o\' to start all over again or \n";
             print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
             next ;
         }
         $num_of_seq_member=@list=split(/ +/, $clus{$keys[$i]}); # @list has (HIU001, HI002, HI333, MJ111, etc)
         print "# $0: clu_to_sso_to_msp: No. of seq member=$num_of_seq_member after split \n" if $verbose;

         FOR0: for($j=0; $j < @list; $j++){
                  my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low, @sub_dir_heads,
                     $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
                     $file_name_prot_low_gz, $file_name_prot_up_gz);

                  $each_seq_name=$list[$j];
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  #  Here I take chars from the sequ names, as dirs have fragments of chars
                  #_______________________________________________________________________________
                  for($s=1; $s <=2 ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
                       $sub_dir_head= substr($list[$j], 0, $s);
                       push(@sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
                       push(@sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
                  }
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  #  Checking all the possible subdirectories to crop all the sso files
                  #_______________________________________________________________________________
                  FOR1: for($p=0; $p < @sub_dir_heads; $p++){
                       $subd=$sub_dir_heads[$p];
                       FOR2 : for($e=0; $e < @possible_extensions; $e++){
                            $ext=$possible_extensions[$e];
                            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                            #  This makes all the possible lower upper case names
                            #______________________________________________________
                            for( $u=0; $u < @U_L_case; $u++){
                                  if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
                                  }else{                   $each_seq_name="\L$each_seq_name"; }

                                  if(-s "$each_seq_name\.$ext"){   push(@final_files, "$each_seq_name\.$ext" ) ; next FOR0 }
                                  elsif(-s "$each_seq_name\.$ext\.gz"){ push(@final_files, "$each_seq_name\.$ext\.gz" ) ; next FOR0 }
                                  else{
                                       $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
                                       if(-s $file_wanted){
                                                       push( @final_files, $file_wanted); next FOR0 }
                                       elsif(-s "$file_wanted\.gz"){
                                                       push( @final_files, "$file_wanted\.gz");
                                                       next FOR0
                                       }
                                  }
                            }
                       } # FOR2
                  } # FOR1

         } # FOR0

         print "\n# @final_files \n=============> $big_out_msp  \n\n" if $verbose;

         if(@final_files < 1){
              print "\n# clu_to_sso_to_msp :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
              print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
              next;
         }
         # $write_each_msp_to_disk='w';

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  Check if small msp files have already made in previous steps
         #________________________________________________________________
         if($final_files[0]=~/\.msp *$/){ ##  concatenate msp into big_msp
             open(BIG_MSP_FILE, ">$big_out_msp");
             for($y=0; $y< @final_files; $y++){
                 open(SINGLE_MSP, "$final_files[$y]");
                 while(<SINGLE_MSP>){
                                 print BIG_MSP_FILE $_;
                 }
             }
             close(BIG_MSP_FILE);
             close(SINGLE_MSP);
             push(@written_msp_files, $big_out_msp);

         }else{
             if($write_each_msp_to_disk){
                  print "\# $0 : going to run open_sso_files with $write_each_msp_to_disk opt\n";
                  $big_out_msp=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                          "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};
                  if(-s $big_out_msp > 200){  print "\n# $0: SUCCESS to create $big_out_msp :) :) :-) :-) ?\n"; }
             }else{
                  print "\n# clu_to_sso_to_msp: I am running open_sso_files. \n";
                  @msp_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                          "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};

                  &write_msp_files(@msp_hashes, $big_out_msp); ## concatenates all the hash ref to one
             }
         }
     }
     return(\@written_msp_files);
}# end of


#______________________________________________________________________________
# Title     : sso_to_msp
# Usage     : &sso_to_msp(@ARGV, $single_out_opt);
# Function  : This takes sso file(s) and produces MSP file. It
#             concatenate sso file contents when more than one
#             sso file is given.
# Example   : &sso_to_msp(@ARGV, 'OUT.msp', $single_out_opt);
# Warning   : This capitalize all the input file names when
#              producing xxxxx.msp. xxxxx.sso -> XXXX.sso
# Keywords  : sso_file_to_msp_file, convert_sso_to_msp,
# Options   : _  for debugging.
#             #  for debugging.
#             v  for showing the MSP result to screen
#             s  for making single MSP file for each sso file
#                    as well as big MSP file which has all sso
#             u= for upper expectation value limit
#             l= for lower expect val limit
#             s= for single file name input eg. "s=xxxxx.msp"
#             n  for new format (msp2 format)
#             r  for adding range
#             r2 for adding ranges in all sequence names
#
# Returns   : the file names created (xxxx.msp, yyyy.msp,,,,)
# Argument  :
# Category  :
# Version   : 2.6
#-----------------------------------------------------------------------------
sub sso_to_msp{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($upper_expect_limit, $lower_expect_limit)=(50, 0);
	 my (%sso, @sso, @SSO, $big_out_msp1,  @final_out, $big_out_msp2,
	   $create_sso, $single_out_opt, $add_range, $add_range2, $big_out_msp,
	   $Evalue_thresh, $new_format, $Score_thresh, $margin, $single_file_name);
	if($vars{'u'}=~/([\.\d]+)/){ $upper_expect_limit = $vars{'u'} };
	if($vars{'l'}=~/([\.\d]+)/){ $lower_expect_limit = $vars{'l'} };
	if($vars{'t'}=~/(\d+)/){ $Score_thresh  = $vars{'t'} };
	if($vars{'m'}=~/(\d+)/){ $margin  = $vars{'m'} };
	if($vars{'s'}=~/\S/){ $single_file_name  = $vars{'s'} };
	if($char_opt=~/r2/){  $add_range='r'; $add_range2='r2' }
	if($char_opt=~/r/){   $add_range = 'r' }
	if($char_opt=~/c/){   $create_sso = 'c' }
	if($char_opt=~/s/){   $single_out_opt='s' }
	if($char_opt=~/n/){   $new_format='n' }
	 print "\n# File given to sso_to_msp is \"@file\", Normally xxx.sso file names\n";

	 if($single_file_name=~/\S/){
	   $big_out_msp=$single_file_name;
	 }else{
	   for($i=0; $i < @file; $i++){
		   if($file[$i]=~/\.msp$/){ ## when output file name is given
			   $big_out_msp=$file[$i];
			   splice(@file, $i, 1);
			   $i--;
		   }elsif($file[$i]=~/^(\d+\-\d+)([_\d]*)\.[mfs]?sso/){  ## creates xxxx.msp file name from xxxx.sso
			   $big_out_msp1="\U$1"."$2"."\.msp";
			   $big_out_msp2="\U$1".".msp";
		   }elsif($file[$i]=~/^(\S+)\.[mfs]?sso$/){
			   $big_out_msp1="\U$1"."\.msp";
			   $big_out_msp2="\U$1"."_all".".msp";
			   print "\n# sso_to_msp: File matched  xxxx.sso  format \n";
		   }elsif($file[$i]=~/^(\S+)\.out$/){
			   $big_out_msp1="\U$1"."\.msp";
			   $big_out_msp2="\U$1"."_all".".msp";
			   print "\n# sso_to_msp: File matched  xxxx.out  format \n";
		   }elsif($file[$i]=~/^(\S+)\.p[rot\,]*\.ts\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }elsif($file[$i]=~/^(\S+)\.ts\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }
	   }
	 }
	 if(defined($big_out_msp)){
	   $big_out_msp1=$big_out_msp2=$big_out_msp;
	   print "\n# \$big_out_msp is defined as \'$big_out_msp\'\n";
	 }else{
	   print "\n# sso_to_msp: You did not define the big MSP file out format, so $big_out_msp1 \n";
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (1) When File was given to this sub routine
	 #__________________________________________
	 if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_msp1 \n";
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
			  "l=$lower_expect_limit",
			  "m=$margin",
			  $new_format,
			  "s=$big_out_msp")};
	  push(@final_out, &write_msp_files(@sso, $big_out_msp1,
	        $single_out_opt, $add_range) );

	 }elsif(@file > 1){ ## MOre than 1 file input??
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	        "l=$lower_expect_limit",
	        "u=$upper_expect_limit",
	        "m=$margin",
	        $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp2,
			$single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (2) When NO File but ARRAY is given
	 #      Here, you can have SSO files created
	 #__________________________________________
	 elsif(@array >=1){
	  print "\n# In sso_to_msp, \@array is given rather than \@file";
	  @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
			  "l=$lower_expect_limit", $add_range, $create_sso,
			  "m=$margin", $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp,
						  $single_out_opt, $add_range)} );
	 }
	 return(\@final_out);
}



#______________________________________________________________________________
# Title     : convert_sso_to_msp
# Usage     : &convert_sso_to_msp(@ARGV, $single_out_opt);
# Function  : This takes sso file(s) and produces MSP file. It
#             concatenate sso file contents when more than one
#             sso file is given.
# Example   : &convert_sso_to_msp(@ARGV, 'OUT.msp', $single_out_opt);
# Warning   : This capitalize all the input file names when
#              producing xxxxx.msp. xxxxx.sso -> XXXX.sso
# Keywords  : sso_file_to_msp_file, convert_sso_to_msp,
# Options   : _  for debugging.
#             #  for debugging.
#             v  for showing the MSP result to screen
#             s  for making single MSP file for each sso file
#                    as well as big MSP file which has all sso
#             u= for upper expectation value limit
#             l= for lower expect val limit
#             s= for single file name input eg. "s=xxxxx.msp"
#             n  for new format (msp2 format)
#             r  for adding range
#             r2 for adding ranges in all sequence names
#
# Returns   : the file names created (xxxx.msp, yyyy.msp,,,,)
# Argument  :
# Category  :
# Version   : 2.6
#-----------------------------------------------------------------------------
sub convert_sso_to_msp{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($upper_expect_limit, $lower_expect_limit)=(50, 0);
	 my (%sso, @sso, @SSO, $big_out_msp1,  @final_out, $big_out_msp2,
	   $create_sso, $single_out_opt, $add_range, $add_range2, $big_out_msp,
	   $Evalue_thresh, $new_format, $Score_thresh, $margin, $single_file_name);
	if($vars{'u'}=~/([\.\d]+)/){ $upper_expect_limit = $vars{'u'} };
	if($vars{'l'}=~/([\.\d]+)/){ $lower_expect_limit = $vars{'l'} };
	if($vars{'t'}=~/(\d+)/){ $Score_thresh  = $vars{'t'} };
	if($vars{'m'}=~/(\d+)/){ $margin  = $vars{'m'} };
	if($vars{'s'}=~/\S/){ $single_file_name  = $vars{'s'} };
	if($char_opt=~/r2/){  $add_range='r'; $add_range2='r2' }
	if($char_opt=~/r/){   $add_range = 'r' }
	if($char_opt=~/c/){   $create_sso = 'c' }
	if($char_opt=~/s/){   $single_out_opt='s' }
	if($char_opt=~/n/){   $new_format='n' }
	 print "\n# File given to convert_sso_to_msp is \"@file\", Normally xxx.sso file names\n";

	 if($single_file_name=~/\S/){
	   $big_out_msp=$single_file_name;
	 }else{
	   for($i=0; $i < @file; $i++){
		   if($file[$i]=~/\.msp$/){ ## when output file name is given
			   $big_out_msp=$file[$i];
			   splice(@file, $i, 1);
			   $i--;
		   }elsif($file[$i]=~/^(\d+\-\d+)([_\d]*)\.[mfs]?sso/){  ## creates xxxx.msp file name from xxxx.sso
			   $big_out_msp1="\U$1"."$2"."\.msp";
			   $big_out_msp2="\U$1".".msp";
		   }elsif($file[$i]=~/^(\S+)\.[mfs]?sso$/){
			   $big_out_msp1="\U$1"."\.msp";
			   $big_out_msp2="\U$1"."_all".".msp";
			   print "\n# convert_sso_to_msp: File matched  xxxx.sso  format \n";
		   }elsif($file[$i]=~/^(\S+)\.out$/){
			   $big_out_msp1="\U$1"."\.msp";
			   $big_out_msp2="\U$1"."_all".".msp";
			   print "\n# convert_sso_to_msp: File matched  xxxx.out  format \n";
		   }elsif($file[$i]=~/^(\S+)\.p[rot\,]*\.ts\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }elsif($file[$i]=~/^(\S+)\.ts\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }
	   }
	 }
	 if(defined($big_out_msp)){
	   $big_out_msp1=$big_out_msp2=$big_out_msp;
	   print "\n# \$big_out_msp is defined as \'$big_out_msp\'\n";
	 }else{
	   print "\n# convert_sso_to_msp: You did not define the big MSP file out format, so $big_out_msp1 \n";
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (1) When File was given to this sub routine
	 #__________________________________________
	 if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_msp1 \n";
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
			  "l=$lower_expect_limit",
			  "m=$margin",
			  $new_format,
			  "s=$big_out_msp")};
	  push(@final_out, &write_msp_files(@sso, $big_out_msp1,
	        $single_out_opt, $add_range) );

	 }elsif(@file > 1){ ## MOre than 1 file input??
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	        "l=$lower_expect_limit",
	        "u=$upper_expect_limit",
	        "m=$margin",
	        $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp2,
			$single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (2) When NO File but ARRAY is given
	 #      Here, you can have SSO files created
	 #__________________________________________
	 elsif(@array >=1){
	  print "\n# In convert_sso_to_msp, \@array is given rather than \@file";
	  @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
			  "l=$lower_expect_limit", $add_range, $create_sso,
			  "m=$margin", $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp,
						  $single_out_opt, $add_range)} );
	 }
	 return(\@final_out);
}



#________________________________________________________________________________
# Title     : bla_to_msf  (this is not used. Use convert_bla_to_msf)
# Usage     : @msf_file_made=@{&bla_to_msf(\@bla_file)};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced
# Example   :
# Keywords  : convert_bla_to_msf
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub bla_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, %alignment_hash, %alignment_hash_query,
			 %alignment_hash_subject);
		$choose_iteration=1;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						if(/^Query=(\S+)/){
								$query_seq=$1;   last;
						}
				}
				close(BLAST_OUTPUT);

				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Searching\.\.\.\.\.\.\.\.\.\.\./){
                                                     $present_iteration++;
                                                     if($present_iteration > $choose_iteration){
                                                          last
                                                     }else{
                                                          %alignment_hash_subject=%alignment_hash_query=();
                                                     }
						}elsif(/^\> *(\S+)/){
                                                     $subject_seq=$1;
                                                     $start_point='';
                                                     if($alignment_hash_subject{$subject_seq}){
                                                         $seq_already_in=1;
                                                         $subject_seq='';
                                                         next;
                                                     }
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, match the line to get expectation value
						#________________________________________________________________
						elsif($subject_seq
							and /^[\t ]*Score[\t ]*\=[\t ]*(\S+)[\t ]*bits.+\,[\t ]*Expect[\t ]*=[\t ]*(\S+)/i){
								$expect_value=$2;
								unless($alignment_hash_subject{$subject_seq} or $expect_value > $e_val_threshold){
										$alignment_hash_subject{"$subject_seq"}="$expect_value ";
										$alignment_hash_query{"$subject_seq"}="$expect_value ";
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Query seq line,
						#   0.0005 = $e_val_threshold
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Query +(\d+) +(\S+) +\d+/){
								if($start_point){
										$alignment_hash_query{"$subject_seq"}.=$2;
								}else{ # If this is the first match of 'query', put dashes according to the start point
										$start_point=$1;
										$alignment_hash_query{"$subject_seq"}.="_"x($start_point-1).$2;
										$alignment_hash_subject{"$subject_seq"}.="_"x($start_point-1);
								}

						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Sbjct seq line
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Sbjct +\d+ +(\S+) +\d+/){
								$alignment_hash_subject{"$subject_seq"}.=$1;
						}
				}
				close(BLAST_OUTPUT);

				# now in %alignment_hash, I have many pairs like:
				# --------VAVCQNMGIGK--DGNLPWPPLRNEYKYFQR
				# --------WARKNKLGWGFELKGSMPSAPLITEQTYFKD
				# -----------------------KTWFSIPEKNRPLK
				# -----------------------KTWEEIPALDKELK

				$output_msf="$file_base_name\.msf";

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
				# sorting the keys and then the first column of hash value(which is evals) and then
				# keys by keys names when the E vals are equal
				# This way, I get:
				# 0 : d8dfr__ query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 1 : d8dfr__ sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 2 : nr_DYR_CHICK query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 3 : nr_DYR_CHICK sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				#    ...
				#___________________________________________________________________
				@keys= map{ $_->[1] }
							 sort { $a->[0] <=> $b->[0] }
							 map{ $alignment_hash_subject{$_}=~/^(\S+)/ or $_=~/^(\S+)/ ; [$1, $_] }
							 sort keys %alignment_hash_subject;

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
				# Making the final alignments by adjusting gaps in every pairwise step comparison
				# keys   are 'd8dfr__ query',          'd8dfr__ sbjct',,,
				# values are '7e-92 VRSLNSIVAVCQ....', '7e-92 VRSLNSIVAVCQN....'
				#________________________________________________
				$template_query=$alignment_hash_query{$keys[0]};

				if($alignment_hash_query{$keys[0]}=~/^\S+ +(\S+)/){   @template_query_seq=split(//, $1);       }

				print "\n", @template_query_seq, "\n" if $verbose;


				for($j=0; $j < @keys; $j++){
						my($k, $evalue, @gapped_position, $query_seq, $g);

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# matching query entry and inserting gaps
						#__________________________________________________
						$query_name=$keys[$j];
						if($alignment_hash_query{$query_name}=~/^(\S+) +(\S+)$/){
								$evalue=$1;
								$query_seq=$2;       }
						if($query_seq !~/\-/){  next   }

						my @splited_query_seq=split(//, $query_seq);

						$longest_query_seq=@splited_query_seq if @splited_query_seq > $longest_query_seq;

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# skip gaps at the beginning
						#_____________________________________________________
						if($splited_query_seq[0] eq '_'){
								for($k=0; $k < @splited_query_seq; $k++){
										if($splited_query_seq[$k] ne '_' and $splited_query_seq[$k] ne '-'){
												last;
										}
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# Finding all the gapped position and saving them
						#___________________________________________________
						for( $s=$k-1; $s < @splited_query_seq; $s++){
								if($splited_query_seq[$s] eq '-'){
										push(@gapped_position, $s);
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# (1) TEMPLATE: matching sbjct entry and inserting gaps
						#__________________________________________________
						if(@gapped_position < 1){ next }
						for($g=0; $g< @gapped_position; $g++){
								$char_posi=$template_query_seq[$gapped_position[$g]] ;
								if($char_posi ne '-'){
										splice(@template_query_seq, $gapped_position[$g], 0, '-');
								}
						}
						print "\n# gaps are @gapped_position \n" if $verbose;
						@gapped_position=();
						next;
				}

				#print "\n      ", @template_query_seq, "\n The raw subject lines are:\n";

				if($verbose){
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_subject{$keys[$k]}, "\n";
						}
						print "\n The raw QUERY lines\n";
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_query{$keys[$k]}, "\n";
						}
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				# Fixing subject sequences according to template and query seqs
				#____________________________________________________________________
				for($g=0; $g < @keys; $g++){
						 $subject_name=$keys[$g];

						 my($evalue, @splited_subject_seq, @splited_query_seq);

						 $alignment_hash_query{  $subject_name}=~/^(\S+) +(\S+)/;
						 @splited_query_seq  =split(//, $2);
						 $alignment_hash_subject{$subject_name}=~/^(\S+) +(\S+)/;
						 @splited_subject_seq=split(//, $2);
						 $evalue=$1;

						 for($t=0; $t< @template_query_seq; $t++){
								 if($template_query_seq[$t] ne '-' ){
										 next
								 }elsif($template_query_seq[$t] eq '-'){

										 $char_of_the_position=$splited_query_seq[$t];
										 if($char_of_the_position ne '-' and $char_of_the_position ne '_'){

												 #print "\n# \$t is $t";
												 #print "\n# \$evalue is $evalue\n ==>";
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n ==>";
												 splice(@splited_subject_seq, $t, 0, '-');
												 splice(@splited_query_seq, $t, 0, '-');
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n";
												 next;
										 }elsif($char_of_the_position eq '_'){
												 splice(@splited_subject_seq, 0, 0, '_');
												 splice(@splited_query_seq, 0, 0, '_');

										 }elsif($char_of_the_position eq '-'){
												 next;
										 }
								 }
						 }
						 $new_subject_seq=join('', @splited_subject_seq);
						 $new_query_seq  =join('', @splited_query_seq);
						 #$alignment_hash{$keys[$g]}="$evalue $new_subject_seq";
						 #$alignment_hash{$keys[$g-1]}="$evalue $new_query_seq";
						 $alignment_hash_subject{$subject_name}="$evalue $new_subject_seq";
						 $alignment_hash_query{$subject_name}  ="$evalue $new_query_seq";
				}


				print "\n";print @template_query_seq, "\n" if $verbose;

				for($h=0; $h< @keys; $h++){
						 $subject_name=$keys[$h];
						 $alignment_hash_subject{$subject_name}=~/^(\S+) +(\S+)/;
						 #print "\n $alignment_hash_query{$subject_name}";
						 print "\n $alignment_hash_subject{$subject_name}";
						 $final_seq_out{$subject_name}=$2;
				}
				&write_msf(\%final_seq_out, \$output_msf);
				push(@final_out, $output_msf);
		}
		return(\@final_out);
}

#________________________________________________________________________________
# Title     : convert_bla_to_msf
# Usage     : @msf_file_made=@{&convert_bla_to_msf(\@bla_file)};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced
# Example   :
# Keywords  : convert_bla_to_msf
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub convert_bla_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, %alignment_hash, %alignment_hash_query,
			 %alignment_hash_subject);
		$choose_iteration=1;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						if(/^Query=(\S+)/){
								$query_seq=$1;   last;
						}
				}
				close(BLAST_OUTPUT);

				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Searching\.\.\.\.\.\.\.\.\.\.\./){
								$present_iteration++;
								if($present_iteration > $choose_iteration){
										last
								}else{
										%alignment_hash_subject=%alignment_hash_query=();
								}
						}elsif(/^\> *(\S+)/){
								$subject_seq=$1;
								$start_point='';
								if($alignment_hash_subject{$subject_seq}){
										$seq_already_in=1;
										$subject_seq='';
										next;
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, match the line to get expectation value
						#________________________________________________________________
						elsif($subject_seq
							and /^[\t ]*Score[\t ]*\=[\t ]*(\S+)[\t ]*bits.+\,[\t ]*Expect[\t ]*=[\t ]*(\S+)/i){
								$expect_value=$2;
								unless($alignment_hash_subject{$subject_seq} or $expect_value > $e_val_threshold){
										$alignment_hash_subject{"$subject_seq"}="$expect_value ";
										$alignment_hash_query{"$subject_seq"}="$expect_value ";
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Query seq line,
						#   0.0005 = $e_val_threshold
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Query +(\d+) +(\S+) +\d+/){
								if($start_point){
										$alignment_hash_query{"$subject_seq"}.=$2;
								}else{ # If this is the first match of 'query', put dashes according to the start point
										$start_point=$1;
										$alignment_hash_query{"$subject_seq"}.="_"x($start_point-1).$2;
										$alignment_hash_subject{"$subject_seq"}.="_"x($start_point-1);
								}

						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, and expectation val is less than thresh, match Sbjct seq line
						#_____________________________________________________________________________________
						elsif($subject_seq and $expect_value < $e_val_threshold and /Sbjct +\d+ +(\S+) +\d+/){
								$alignment_hash_subject{"$subject_seq"}.=$1;
						}
				}
				close(BLAST_OUTPUT);

				# now in %alignment_hash, I have many pairs like:
				# --------VAVCQNMGIGK--DGNLPWPPLRNEYKYFQR
				# --------WARKNKLGWGFELKGSMPSAPLITEQTYFKD
				# -----------------------KTWFSIPEKNRPLK
				# -----------------------KTWEEIPALDKELK

				$output_msf="$file_base_name\.msf";

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
				# sorting the keys and then the first column of hash value(which is evals) and then
				# keys by keys names when the E vals are equal
				# This way, I get:
				# 0 : d8dfr__ query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 1 : d8dfr__ sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 2 : nr_DYR_CHICK query 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				# 3 : nr_DYR_CHICK sbjct 7e-92 VRSLNSIVAVCQNMGIGKDGNLPWPPLRNEYKYFQRMTSTSHVEGKQNAVIM
				#    ...
				#___________________________________________________________________
				@keys= map{ $_->[1] }
							 sort { $a->[0] <=> $b->[0] }
							 map{ $alignment_hash_subject{$_}=~/^(\S+)/ or $_=~/^(\S+)/ ; [$1, $_] }
							 sort keys %alignment_hash_subject;

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
				# Making the final alignments by adjusting gaps in every pairwise step comparison
				# keys   are 'd8dfr__ query',          'd8dfr__ sbjct',,,
				# values are '7e-92 VRSLNSIVAVCQ....', '7e-92 VRSLNSIVAVCQN....'
				#________________________________________________
				$template_query=$alignment_hash_query{$keys[0]};

				if($alignment_hash_query{$keys[0]}=~/^\S+ +(\S+)/){   @template_query_seq=split(//, $1);       }

				print "\n", @template_query_seq, "\n" if $verbose;


				for($j=0; $j < @keys; $j++){
						my($k, $evalue, @gapped_position, $query_seq, $g);

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# matching query entry and inserting gaps
						#__________________________________________________
						$query_name=$keys[$j];
						if($alignment_hash_query{$query_name}=~/^(\S+) +(\S+)$/){
								$evalue=$1;
								$query_seq=$2;       }
						if($query_seq !~/\-/){  next   }

						my @splited_query_seq=split(//, $query_seq);

						$longest_query_seq=@splited_query_seq if @splited_query_seq > $longest_query_seq;

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# skip gaps at the beginning
						#_____________________________________________________
						if($splited_query_seq[0] eq '_'){
								for($k=0; $k < @splited_query_seq; $k++){
										if($splited_query_seq[$k] ne '_' and $splited_query_seq[$k] ne '-'){
												last;
										}
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# Finding all the gapped position and saving them
						#___________________________________________________
						for( $s=$k-1; $s < @splited_query_seq; $s++){
								if($splited_query_seq[$s] eq '-'){
										push(@gapped_position, $s);
								}
						}

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
						# (1) TEMPLATE: matching sbjct entry and inserting gaps
						#__________________________________________________
						if(@gapped_position < 1){ next }
						for($g=0; $g< @gapped_position; $g++){
								$char_posi=$template_query_seq[$gapped_position[$g]] ;
								if($char_posi ne '-'){
										splice(@template_query_seq, $gapped_position[$g], 0, '-');
								}
						}
						print "\n# gaps are @gapped_position \n" if $verbose;
						@gapped_position=();
						next;
				}

				#print "\n      ", @template_query_seq, "\n The raw subject lines are:\n";

				if($verbose){
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_subject{$keys[$k]}, "\n";
						}
						print "\n The raw QUERY lines\n";
						for($k=0; $k< @keys; $k++){
								print $alignment_hash_query{$keys[$k]}, "\n";
						}
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
				# Fixing subject sequences according to template and query seqs
				#____________________________________________________________________
				for($g=0; $g < @keys; $g++){
						 $subject_name=$keys[$g];

						 my($evalue, @splited_subject_seq, @splited_query_seq);

						 $alignment_hash_query{  $subject_name}=~/^(\S+) +(\S+)/;
						 @splited_query_seq  =split(//, $2);
						 $alignment_hash_subject{$subject_name}=~/^(\S+) +(\S+)/;
						 @splited_subject_seq=split(//, $2);
						 $evalue=$1;

						 for($t=0; $t< @template_query_seq; $t++){
								 if($template_query_seq[$t] ne '-' ){
										 next
								 }elsif($template_query_seq[$t] eq '-'){

										 $char_of_the_position=$splited_query_seq[$t];
										 if($char_of_the_position ne '-' and $char_of_the_position ne '_'){

												 #print "\n# \$t is $t";
												 #print "\n# \$evalue is $evalue\n ==>";
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n ==>";
												 splice(@splited_subject_seq, $t, 0, '-');
												 splice(@splited_query_seq, $t, 0, '-');
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n";
												 next;
										 }elsif($char_of_the_position eq '_'){
												 splice(@splited_subject_seq, 0, 0, '_');
												 splice(@splited_query_seq, 0, 0, '_');

										 }elsif($char_of_the_position eq '-'){
												 next;
										 }
								 }
						 }
						 $new_subject_seq=join('', @splited_subject_seq);
						 $new_query_seq  =join('', @splited_query_seq);
						 #$alignment_hash{$keys[$g]}="$evalue $new_subject_seq";
						 #$alignment_hash{$keys[$g-1]}="$evalue $new_query_seq";
						 $alignment_hash_subject{$subject_name}="$evalue $new_subject_seq";
						 $alignment_hash_query{$subject_name}  ="$evalue $new_query_seq";
				}


				print "\n";print @template_query_seq, "\n" if $verbose;

				for($h=0; $h< @keys; $h++){
						 $subject_name=$keys[$h];
						 $alignment_hash_subject{$subject_name}=~/^(\S+) +(\S+)/;
						 #print "\n $alignment_hash_query{$subject_name}";
						 print "\n $alignment_hash_subject{$subject_name}";
						 $final_seq_out{$subject_name}=$2;
				}
				&write_msf(\%final_seq_out, \$output_msf);
				push(@final_out, $output_msf);
		}
		return(\@final_out);
}



#________________________________________________________________________________
# Title     : convert_bla_to_msp
# Usage     : %hash_out_final=%{&convert_bla_to_msp(\$file, [$Lean_output])};
# Function  : reads in PSI blast output and produces MSP file format.
#             Takes all the good hits below certain threshold in multiple iteration
#             Reports the best evalue with a given sequence name
# Example   : %hash_out=%{&convert_bla_to_msp(\$file)};
# Keywords  : pbla_to_msp, blast_to_msp, bla_2_msp, blastp_to_msp_format,
#             blast_to_msp_format, convert_bla_to_msp, convert_bla_to_msp_files
#             bla_to_msp
# Options   :
#   $pdbd_seq_only  d   for getting dxxxx_ like seq names only(pdb40d names for examp)
#   $all_seq  a         for forcing all seq conversion
#   $which_iteration= by i=    # choose which iteration result you want to take
#   $which_iteration   as just a digit
#   $report_only_the_best=b by b -b
#   $take_only_the_last_iteration=l by l
#   $accumulative_hits_eval_thresh= by e=
#   $genome_seq_only=g      by g
#   $nrdb_seq_only=n        by n
#   $evalue_thresh=         by E=
#   $Accumulate_matches=A   by A -A
#   $Lean_output=L          by L -L  # to remove search output to unclutter
#
# Author    : Sarah Teichmann and Jong Park, jong@salt2.med.harvard.edu
# Version   : 4.0
#--------------------------------------------------------------------------------
sub convert_bla_to_msp{
   my($i, $j, $k, @lines, $match_string_count,  $line_count, $query_string_count,
      $match_length, $Lean_output,
      $lines, $duplicated_match_count, $new_sorted_name, $sorted_name, $verbose,
      $pdbd_seq_only, $entry_found, $which_iteration, $report_only_the_best,
      $genome_seq_only, $all_seq, $header_found, $accumulative_hits_eval_thresh,
      $take_only_the_last_iteration, $original_query, $nrdb_seq_only,
      $get_the_final_iteration, $read_entry_lines, $verbose, $Accumulate_matches);
   my $match_leng_thresh=10;
   ### This localization is critial NOT my, as I use a sub which relies on this
   local(%hash_out, %accumulative_hits, $file, $score, $score_ori, $evalue,
         $evalue_ori, $seq_id, $query_range_start, $query_range_stop,
         $query, $match_string_start, $match_string_stop, $matched,
         $read_point_found);
   $duplicated_match_count=0;

   my $evalue_thresh=$accumulative_hits_eval_thresh=1; ## default eval threshes
   $query='query_seq'; ## default query seq name, to avoid blank name

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # Processing the input arguments to get file and options etc
   #_____________________________________________________________
   for (@_){
       if(ref $_ eq 'ARRAY'){ @lines =@{$_};
       }elsif( ref $_ eq 'SCALAR' and -s ${$_} ){ $file=${$_};
       }elsif( -s $_ ){            $file=$_;
       }elsif(/^ *d *$/){          $pdbd_seq_only='d'; $all_seq=''; $genome_seq_only='';
           print "\n $0: convert_bla_to_msp,  You set \$pdbd_seq_only option, I will skip others.\n";
       }elsif(/^ *[i=]*(\d+) *$/){ $which_iteration=$1;
       }elsif(/^ *b *$/){          $report_only_the_best='b';
       }elsif(/^ *a *$/){          $all_seq='a'; $genome_seq_only=''; $pdbd_seq_only=''; $nrdb_seq_only='';
       }elsif(/^ *g *$/){          $genome_seq_only='g'; $all_seq=''; $pdbd_seq_only='';$nrdb_seq_only='';
       }elsif(/^ *n *$/){          $nrdb_seq_only='n'; $all_seq=''; $pdbd_seq_only=''; $genome_seq_only='';
       }elsif(/^ *l *$/){          $take_only_the_last_iteration='l';
       }elsif(/^ *v *$/){          $verbose='v';
       }elsif(/^ *L *$/){          $Lean_output='L';
       }elsif(/e=(\S+)/){          $accumulative_hits_eval_thresh=$1;
       }elsif(/E=(\S+)/){          $evalue_thresh=$1;
       }elsif(/A$/){               $Accumulate_matches='A';  }
   }

   unless($which_iteration){  $get_the_final_iteration=1 }
   print "\n\n# (W) convert_bla_to_msp: NO \$Accumulate_matches opt \'A\' set. Sure??\n\n", chr(7) if !$Accumulate_matches;

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
   # If the input file is gzipped, uncompress it to text file and then open
   #__________________________________________________________________
   if($file=~/\.gz *$/){
       print "\n# (INFO) >>>>> Running gunzip to open $file";
       open(BLA_FILE, "gunzip -c $file|") || die "\n# $0: Failed to open $file\n";
       print "\n# (INFO) $0: bla_to_msp, input file was $file\n";
       if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;
       }
   }else{
       open(BLA_FILE, "$file") || die "\n# $0: Failed to open $file\n";
       print "\n# (i) $0: convert_bla_to_msp, input file was $file\n";
       if($file=~/^([de]*\d\d*\w\w\w\w\w)\./){         $query=$1;
       }
   }
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # UP to NOW is frivalous option handling stuff
   #_______________________________________________________


   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # (1) Main reading in .pbla file (or any extension)
   #____________________________________________________________________________
   while(<BLA_FILE>){
      $line_count++;      $lines=$_;  ## putting $_ to $lines var
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (1) Matching "BLASTP 2.0.4 [Feb-24-1998]"
      #____________________________________________________________
      if(!$header_found and $lines=~/^ *BLASTP +\d/){  # blastp header as   BLASTP 2.0.2 [Sep-3-1997] or BLASTP 2.0.4 [Feb-24-1998]
           $header_found=1;
           print "\n# (1) \$header_found becomes $header_found\n";
      }elsif($header_found and $lines=~/^ *BLASTP +\d/){  print "\n# (E) \$header_found is already SET and I found \"BLASTP\", ERROR!!!\n"; exit }

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (1.1) If I reach the end of the opened file, I use &put_msp_lines_to_hash_from_bla sub to write the final msp line and finish
      #________________________________________________________________________________________________________________
      if( eof ){
           if( $read_point_found <= $which_iteration  or  $get_the_final_iteration){
               print "     # (i) <<<< The end of file reached, writing $sorted_name \n\n";
               @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                        $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                        $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                        $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
               %hash_out=         %{$out_from_put_msp_lines[0]};
               %accumulative_hits=%{$out_from_put_msp_lines[1]};
               $read_point_found= $out_from_put_msp_lines[2];
               last;
           }
      }

      if($lines=~/^ *$/ or $lines=~/^  +Length +\= +\d+ *$/){      next     }  ## skipping some junk lines

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (2) Extracting query seq name(this is the only place to get it)
      #____________________________________________________________
      if($lines=~/^ *Query= +(\S+)/){ $query=$original_query=$1;    next    }

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~
      # (3) 'Searching......done'  line indicates new search step(iteration)
      #_________________________________________________________________________
      if( $lines=~/^ *Searching\.\.+[done]/i ){
          $read_point_found++;
          if($verbose){
             print "\n==========================================================";
             print "\n# (3.1) Searching........ line found, so \$read_point_found is $read_point_found\n";
          }
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # (3.2) This is just error checking
          #__________________________________________________________
          if(!$header_found){ ##  header is  'BLASTP 2.0.4 [Feb-24-1998]',  the very first line in bla output
              print chr(7); print chr(7); print chr(7);print chr(7);print chr(7);
              print "\n# (ERROR)\'Sequences producing\' line is matched while BLASTP header is not found\n";
              print "\n# (ERROR) convert_bla_to_msp:  It means the file is truncated !!!!\n"; exit;
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
          #  (3.3) Following is the KEY part for controlling iteration
          #__________________________________________________________
          if( $which_iteration and $read_point_found < $which_iteration){
               print "\n# (INFO) skipped, \$which_iteration: ($which_iteration), \$read_point_found: ($read_point_found)" if $verbose;
               $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
               $query_range_stop=$query_range_start=$match_string_stop=$msp_line=$new_sorted_name='';
               $entry_found=$duplicated_match_count=0;

               if( !$Accumulate_matches){
                   %hash_out=(); ## this is to remove any discarded pairs in the iteration
               }
               print "\n# (INFO) ===> New iteration ====\$read_point_found: $read_point_found, \$which_iteration:$which_iteration\n";
               next;
          }elsif( $which_iteration and $read_point_found == $which_iteration){
               $read_entry_lines=1;  next;
          }elsif( $which_iteration and $read_point_found >  $which_iteration){
               @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                        $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                        $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                        $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
               %hash_out=         %{$out_from_put_msp_lines[0]};
               %accumulative_hits=%{$out_from_put_msp_lines[1]};
               $read_point_found= $out_from_put_msp_lines[2];
               last;
          }elsif(!$which_iteration){
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
               # (3.4) Default situation
               #____________________________________________________________
               print "\n# (WARN) You did not set \$which_iteration option \n\n" if $verbose;
               if($read_point_found > 1){
                       print "\n (3.3) Writing the last entry $sorted_name BEFORE next Searching........ line\n";
                       @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                                $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                                $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                                $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                       %hash_out=         %{$out_from_put_msp_lines[0]};
                       %accumulative_hits=%{$out_from_put_msp_lines[1]};
                       $read_point_found= $out_from_put_msp_lines[2];
               }
               $match_string_count=$query_string_count=$score=$evalue=$seq_id=$score_ori=$evalue_ori='';
               $query_range_stop=$query_range_start=$match_string_stop=$msp_line=$new_sorted_name='';
               $entry_found=$duplicated_match_count=0;
               if( !$Accumulate_matches){  %hash_out=(); $entry_found=0; $duplicated_match_count=0;     }
               $read_entry_lines=1; ## this is set by 'Searching......' line
               next;
          }
      }
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (4) '> xxxx '  New sequence entry, '>' starts
      #__________________________________________________________
      elsif($read_entry_lines and $lines=~/^\> *(\S+)/){
          $temp_match=$1;

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # (4.0) To get the pdbd seq names only 'dxxx__' sort of thing
          #________________________________________________________
          if($pdbd_seq_only and ($temp_match !~/^pdb_\S+/ and $temp_match !~/^[cde]\d\S+/)  ){
              $entry_found=0; print "\n# NOT pdb seq\n";    next;
          }elsif($genome_seq_only and $temp_match !~/^gn_\S+/){
              $entry_found=0; print "\n# NOT genome seq\n"; next
          }elsif($nrdb_seq_only and $temp_match !~/^nr_\S+/){
              $entry_found=0; print "\n# NOT nrdb\n";      next
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
          # (4.1) This is the DEFAULT
          #_____________________________________________________________
          }else{ ## This is default and equivalent to have the $all_seq option on.
              $entry_found=1;
              if($match_string_count){ ## $match_string_count is incremented only by 'Sbjct' line
                    print "              (4.1) Writing $sorted_name ent to \%hash_out\n" if $verbose;
                    @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                             $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                             $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                             $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                    %hash_out=         %{$out_from_put_msp_lines[0]};
                    %accumulative_hits=%{$out_from_put_msp_lines[1]};
                    $read_point_found= $out_from_put_msp_lines[2];
                    $match_string_count=0;
                    $duplicated_match_count=0;
                    print "              (4.1) reset \$match_string_count\n\n" if $verbose;
              }else{ print "              (4.1) \$match_string_count or \$duplicated_match_count is not right\n" if $verbose; }

              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              #  Only with new seq entry, I count the pair occurrances
              #__________________________________________________________________
              $query=$original_query; $query_string_count='';
              $matched=$temp_match; ## this should be here, after if
              $sorted_name=join(' ', sort($query, $matched) );
              print "\n# (4) >>> NEW \$sorted_name is  $sorted_name <<------ \$entry_found ($entry_found)\n" if $verbose;
          }
          $match_string_count=0;
      }
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (5) Matching  Score =  325 bits (824), Expect = 6e-89           << 2 >>
      #_________________________________________________________________
      elsif( ($entry_found and $lines=~/^[\t ]*Score[\t ]*\=[\t ]*(\S+)[\t ]*bits +\(\S+\)\,[\t ]*Expect[\t ]*=[\t ]*(\S+)/i)
          or ($entry_found and $lines=~/^[\t ]*Score[\t ]*\=[\t ]*(\S+)[\t ]*bits.+\,[\t ]*Expect[\t ]*=[\t ]*(\S+)/i)){

          $score_ori=$1;
          $evalue_ori=$2;
          if($evalue_ori=~/^e\-\d\d\d/){ $evalue_ori="1".$evalue_ori; } ## bug fix for short eval in blast distribution

          if($match_string_count){ # $match_string_count is increased when Sbjct word is found
              if($evalue > $evalue_thresh){ $evalue=$evalue_ori; $score=$score_ori; next }
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
              # When Only the first match(best evalue) is required, write msp line and reset $entry_found var
              #_________________________________________________________________________________________________
              if($report_only_the_best){
                  print "      (5)  \$report_only_the_best is set\n" if $verbose;
                  @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                           $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                           $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                           $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                  %hash_out=         %{$out_from_put_msp_lines[0]};
                  %accumulative_hits=%{$out_from_put_msp_lines[1]};
                  $read_point_found= $out_from_put_msp_lines[2];
                  $entry_found=0; next;
              }else{
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~`
                  # duplicated match count means, query matched more than one region of a match seq
                  #__________________________________________________________________________________
                  $duplicated_match_count++;
                  $sorted_name="$sorted_name $duplicated_match_count";
                  print " ====(5) Multiple region for \"$new_sorted_name\" is found =========== e= $evalue\n" if $verbose;
                  @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                           $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                           $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                           $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
                  %hash_out=         %{$out_from_put_msp_lines[0]};
                  %accumulative_hits=%{$out_from_put_msp_lines[1]};
                  $read_point_found= $out_from_put_msp_lines[2];
              }
              $score=$score_ori; $evalue=$evalue_ori;
          }else{
              print "     (5) \$match_string_count is not set NO write \$evalue_ori $evalue_ori\n" if $verbose;
              $evalue=$evalue_ori; $score=$score_ori;
          } ## to next line

          sub reset_all_the_vars{
             print "            !!!!  Reseting all the vars !!!!\n" if $verbose;
             $query_string_count=$score=$evalue=$seq_id=$query_range_stop=$query_range_start='';
             $match_string_stop=$msp_line=$new_sorted_name=$match_string_count='';
          }
      }

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
      # (6) Matching   Identities = 158/158 (100%), Positives = 158/158 (100%)    ,
      #____________________________________________________________________________________
      elsif( $entry_found and $lines=~/^ *Identities += +\S+\/(\S+) +\( *(\S+) *\%\)/i){
          $query_string_count=$match_string_count=0;
          $seq_id=$2/100;
          $match_length=$1;
          if($match_length < $match_leng_thresh){
              print "     (6) \$match_leng_thresh $match_leng_thresh > \$match_length $match_length" if $verbose;
              $entry_found=0;
              $match_string_count=1;
              next;
          }else{
              print "     (6) $sorted_name : ABOVE leng thresh. \$seq_id= $seq_id, \$match_length= $match_length\n" if $verbose;
          }
      }
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (7) Matching 'Query: 2 GIRAATSQEINELT..' line    ,
      #_________________________________________________________________
      elsif($entry_found and $lines=~/^ *Query\:?[\t ]+(\d+) +\D+ +(\d+)/){
          $query_string_count++;
          $query_line_found=1;
          if($query_string_count==1){      $query_range_start=$1;   $query_range_stop =$2;
          }elsif($query_string_count > 1){ $query_range_stop=$2;     }
          print "        (7) Query: line found: $query\_$query_range_start\-$query_range_stop\n" if $verbose;
      }
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (8) Matching 'Sbjct: 2 GIRAATSQEINELT..' line
      #_________________________________________________________________
      elsif($entry_found and $query_line_found and $lines=~/^ *Sbjct\:? +(\d+) +[\w\-]+ +(\d+)/i){
          $match_string_count++;
          $subject_line_found=1;
          if($match_string_count==1){      $match_string_start=$1;
                                           $match_string_stop =$2;
          }elsif($match_string_count > 1){ $match_string_stop=$2;      }
          print "        (8) Sbjct: line found: $temp_match\_$match_string_start\-$match_string_stop\n" if $verbose;
      }
      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      # (9) Matching '   Database: ' line    ,                << END >>
      #_________________________________________________________________
      elsif( ($entry_found and  $lines=~/^ +Database: +\S+/) or eof){ # the very last write
          if($evalue > $evalue_thresh){ last
          }else{
              print "        <<<< The end of file reached, writing $sorted_name\n" if $verbose;
              @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out, \%accumulative_hits, $query,$matched,$evalue, $score, $seq_id,
                                       $sorted_name, $query_range_start, $query_range_stop,$match_string_start,
                                       $match_string_stop, $read_point_found, $accumulative_hits_eval_thresh,
                                       $take_only_the_last_iteration, $accumulative_hits_eval_thresh, $evalue_thresh)};
              %hash_out=         %{$out_from_put_msp_lines[0]};
              %accumulative_hits=%{$out_from_put_msp_lines[1]};
              $read_point_found= $out_from_put_msp_lines[2];
              last;
          }
      }
   }
   close(BLA_FILE);
   unless( $take_only_the_last_iteration){
       print "\n# >> ACCUMULATIVE HITS are reported as you did not set \$take_only_the_last_iteration opt!!\n";
       %hash_out=(%hash_out, %accumulative_hits);
   }
   print "\n Congratulations!\n";
   print ">>>>>>>---- Finished reading in xxxx.pbla file Now sorting \%hash_out ----<<<<<<<<\n\n";
   &show_hash(\%hash_out) if $verbose;
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
   # CLeaning up the BLA file if $Lean_output is set
   #_____________________________________________________
   $gzipped_search_file="$file\.gz";
   if($Lean_output ){ ## If Lean_out opt is set and $file exists and %hash_out is not empty, remove $file
       if(-s $file){
           unlink($file);  ## removes fam_8_8.pbla etc,
       }elsif(-s $gzipped_search_file){
           unlink($gzipped_search_file); ## removes fam_8_8.pbla.gz etc,
       }else{
           print "\n# (E) convert_bla_to_msp: tried to remove search out file for \$Lean_output opt,
                              but failed. Something is wrong. Think! or report to jong\@salt2.med.harvard.edu,
                              jong\@mrc-lmb.cam.ac.uk, sat\@mrc-lmb.cam.ac.uk, jong_p\@hotmail.com\n";
               exit;
       }

   }
   return(\%hash_out);
}



#______________________________________________________________________________
# Title     : put_msp_lines_to_hash_from_bla
# Usage     : @out_from_put_msp_lines=@{&put_msp_lines_to_hash_from_bla(\%hash_out,
#                                        $query,$matched,$evalue, $score, $seq_id,
#                                        $sorted_name, $query_range_start,
#                                        $query_range_stop,$match_string_start,
#                                        $match_string_stop, $read_point_found,
#                                        $accumulative_hits_eval_thresh,
#                                        $take_only_the_last_iteration)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub put_msp_lines_to_hash_from_bla{
     my (@finale_out, $sorted_name, $msp_line, $evalue, $score, $matched, $seq_id, $query_range_start,$accumulative_hits_eval_thresh,
         $query_range_stop, $query, $match_string_start, $match_string_stop, $read_point_found, %hash_out, %accumulative_hits, $evalue_thresh);
     %hash_out=%{$_[0]};         %accumulative_hits=%{$_[1]};
     $query=$_[2];               $matched=$_[3];
     $evalue=$_[4];              $score=$_[5];
     $seq_id=$_[6];              $sorted_name=$_[7];
     $query_range_start=$_[8];   $query_range_stop =$_[9];
     $match_string_start=$_[10]; $match_string_stop=$_[11];
     $read_point_found=$_[12];   $accumulative_hits_eval_thresh=$_[13];
     $take_only_the_last_iteration=$_[14];
     $accumulative_hits_eval_thresh=$_[15];
     $evalue_thresh=$_[16];

     $query  ="$query\_$query_range_start\-$query_range_stop";

     if($matched !~/^\S+\_\d+\-\d+ *$/){         $matched="$matched\_$match_string_start\-$match_string_stop";
     }elsif($matched =~/^(\S+)\_\d+\-\d+ *$/){   $matched="$1\_$match_string_start\-$match_string_stop";     }

     if($score=~/\S/ and $evalue=~/\S/ and $match_string_start=~/\S/ and $evalue_thresh > $evalue){
         $msp_line=sprintf("%-6s %-9s %-5s %-5s %-5s %-32s %-5s %-5s %-38s %-3s\n",
                        $score, $evalue, $seq_id, $query_range_start, $query_range_stop,
                        $query, $match_string_start, $match_string_stop, $matched, $read_point_found);

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # This is where I really put the matches !!!
         #_____________________________________________________
         if($hash_out{$sorted_name}=~/^\S+ +(\S+) +/){
             if($1 >= $evalue){
                 print "                    (1) put_msp_lines_to_hash_from_bla: $1 >= $evalue WRITING to hash. 1\n" if $verbose;
                 $hash_out{$sorted_name}=$msp_line;
             }else{
                 print "                    put_msp_lines_to_hash_from_bla: $1 < $evalue_ori NO write to hash\n" if $verbose;  }
         }else{
             print "                    (2) put_msp_lines_to_hash_from_bla: NO eval >= $evalue WRITING to hash. 2\n" if $verbose;
             $hash_out{$sorted_name}=$msp_line;
         }

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # This part is to rescue the hits dropped by matrix migration
         #_________________________________________________________________
         if(!$take_only_the_last_iteration and $evalue <= $accumulative_hits_eval_thresh ){
            if($accumulative_hits{$sorted_name}){
               if($accumulative_hits{$sorted_name}=~/^[\t ]*\S+[\t ]+(\S+)[\t ]/){
                   if($evalue < $1){
                       $accumulative_hits{$sorted_name}=$msp_line;   }   }
            }else{ $accumulative_hits{$sorted_name}=$msp_line;     }
         }
     }else{     }
     @finale_out=(\%hash_out, \%accumulative_hits, $read_point_found, $query, $matched, $evalue, $score, $seq_id, $sorted_name,
                  $query_range_start, $query_range_stop, $match_string_start, $match_string_stop  );
     return(\@finale_out);
}


#________________________________________________________________________________
# Title     : convert_bla_multaln_to_msf
# Usage     : @msf_file_made=@{&convert_bla_multaln_to_msf(\@bla_file, [i=2])};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced,
#              when you use m6 option for PSI blast
#             this adds '00x' extensions to the repeatedly occurring seq names
#
# Example   : @msf_file_made=@{&convert_bla_multaln_to_msf(\@bla_file,
#                                              $verbose, "i=$iteration")};
# Keywords  : psi_blast_to_msf, psi_blast_multaln_to_msf
# Options   :
#   i=$iteration
#   v  for verbose
# Author    :
# Category  :
# Version   : 1.6
#--------------------------------------------------------------------------------
sub convert_bla_multaln_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my ($e_val_threshold)=0.0005;
    my(@template_query_seq, @keys, $present_iteration, $blank_line_counter,
       %alignment_hash_subject, $seq_order, $choose_iteration, %final_output_hash,
       $seq_name, %seq_names_in_block, $put_alphabet_to_number_only_name);
    $choose_iteration=1;
    if($vars{'i'}=~/(\d+)/){
       $choose_iteration=$1;
    }
    if($char_opt=~/v/){ $verbose='v' }
    if($char_opt=~/a/){ $put_alphabet_to_number_only_name='a' }

    print "\n# $0: bla_multaln_to_msf, \$choose_iteration is $choose_iteration\n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Opening file
    #______________________________________________
    for($i=0; $i< @file; $i++){
        $file_base_name=${&get_base_names($file[$i])};
        print "\n# bla_multaln_to_msf: processing $file[$i]\n";
        my($present_iteration, %seq_names_in_block, $seq_name_ori, $sequence);
        open(BLAST_OUTPUT, $file[$i]);
        while(<BLAST_OUTPUT>){
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
            #  Finds the query sequence, resets $start_point and next line
            #____________________________________________
            if(/^Query= *(\S+)/){
                $query_seq=$1;
                print "\n# The query sequence is: $query_seq\n";
            }elsif(/^Searching\.\.\.\.\.\.\.\./ or eof){ ### to make sure it gets the last one
                $present_iteration++;
                if($present_iteration > $choose_iteration){
                    %final_output_hash=%alignment_hash_subject;
                    last;
                }else{
                    %final_output_hash=%alignment_hash_subject;
                    %alignment_hash_subject=();
                    %seq_names_in_block=();
                    $seq_order='';
                }
            }elsif(/^(QUERY) +\d* *(\S\S+) *\d*$/){
                %seq_names_in_block=();
                $seq_name=$1;
                $seq_order=$seq_name;
                $seq_names_in_block{$seq_name}++;
                $alignment_hash_subject{$seq_name} .= $2; ## bug fix '.'
            }elsif(/^(\S+) +\d+ +(\S+) +\d+ *$/){
                $seq_name=$seq_name_ori=$1;
                $sequence=$2;

                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                # This is to replace number only seq names to alphabetical ones
                #_____________________________________________________________
                if($put_alphabet_to_number_only_name and $seq_name=~/^\d+$/){
                    $seq_name='T'.$seq_name;
                    $seq_name_ori='T'.$seq_name_ori;
                }

                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # This is to handle multiple identical seq name entry
                #_____________________________________________________
                if($seq_names_in_block{$seq_name} > 0){
                   $seq_name=$seq_name."00$seq_names_in_block{$seq_name}";
                }
                $seq_order.=" $seq_name";
                $alignment_hash_subject{$seq_name}.=$sequence;
                $seq_names_in_block{$seq_name_ori}++; ## NOTE that it is $seq_name_ori not  $seq_name
            }elsif(/^(\S+) +(\-+) *$/){
                $seq_name=$seq_name_ori=$1;
                $sequence=$2;

                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
                # This is to replace number only seq names to alphabetical ones
                #_____________________________________________________________
                if($put_alphabet_to_number_only_name and $seq_name=~/^\d+$/){
                    $seq_name='T'.$seq_name;
                    $seq_name_ori='T'.$seq_name_ori;
                }

                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # This is to handle multiple identical seq name entry
                #_____________________________________________________
                if($seq_names_in_block{$seq_name} > 0){
                   $seq_name=$seq_name."00$seq_names_in_block{$seq_name}";
                }
                $seq_order.=" $seq_name";
                $alignment_hash_subject{$seq_name}.=$sequence;
                $seq_names_in_block{$seq_name_ori}++;  ## NOTE that it is $seq_name_ori not  $seq_name
            }

        }
        close(BLAST_OUTPUT);
        print "\n# finished reading in BLAST output(@file) \n";

        # now in %alignment_hash, I have many pairs like:
        # --------VAVCQNMGIGK--DGNLPWPPLRNEYKYFQR
        # --------WARKNKLGWGFELKGSMPSAPLITEQTYFKD
        # -----------------------KTWFSIPEKNRPLK
        # -----------------------KTWEEIPALDKELK
        print "\n# the seq order is: $seq_order\n" if $verbose;

        $output_msf="$file_base_name\.msf";
        print "\n# $0: running write_msf subroutine\n";
        &write_msf(\%final_output_hash, \$output_msf, "o=$seq_order");
        print "\n# $0: $output_msf is created\n";
        push(@final_out, $output_msf);
    }
    return(\@final_out);
}








#______________________________________________________________
# Title     : get_sub_hash
# Usage     : %sub_hash=%{&get_sub_hash(\%FASTA, \@list)};
# Function  : fetches hash keys and values by giving keys to
#             a hash
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : subhash, sub_hash, get_hash_elements, fetch_sub_hash
#             take_sub_hash, get_hash_by_keys, get_sub_hash_by_keys
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_sub_hash{
		#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (%out_hash, @out_hash_all_ref);
	for($i=0; $i < @hash; $i++){
	   my %hash=%{$hash[$i]};
	   my @keys = keys %hash;
	   for($j=0; $j < @raw_string; $j++){
		  for($l=0; $l < @keys; $l++){
			if($keys[$l] eq $raw_string[$j]){
				$out_hash{$keys[$l]}=$hash{$keys[$l]};
			}
		  }
	   }
	   push(@out_hash_all_ref, \%out_hash);
	}
	if(@out_hash_all_ref > 1){
	   return(@out_hash_all_ref);
	}else{
	   return($out_hash_all_ref[0]);
	}
}



#______________________________________________________________
# Title     : get_smallest_file
# Usage     : $smallest_file_name=${&get_largest_file(@ARGV)};
# Function  : checks the size of files and returns the smallest
#             one's name. If a file is not present in pwd or
#             specified absolute path, it ignores it.
# Example   :
# Keywords  : choose_smallest_file, smallest_file, find_smallest_file
#             get_the_smallest_file, choose_the_smallest_file,
#             fetch_smallest_file, take_smallest_file, get_smaller_file,
# Options   : _  for debugging.
#             #  for debugging.
#             e  for extract the smallest from the input array
#                       leaving it one element less, in this case
#                       there will be two returning refs.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------
sub get_smallest_file{
    my @in;
    if(ref $_[0] eq 'ARRAY'){
	 @in = @{$_[0]};
    }else{
	 @in = @_;
    }
    my $smallest=10000000000;
    my ($smallest_file, $i, $extract_opt);

    for($i=0; $i< @in; $i++){
	if(($in[$i]=~/^\-?e$/i)&&(!(-f $in[$i])) ){
	   $extract_opt=1;
	   splice(@in, $i, 1);
	   $i--;
	}
    }
    for($i=0; $i< @in; $i++){
	my $size=(-s $in[$i]);
	if($size < $smallest){
	   $smallest=$size;
	   if($extract_opt ==1){
		  print "\$extract_opt is $extract_opt \n";
		  push(@in, $smallest_file) if defined($smallest_file);
		  $smallest_file = splice(@in, $i, 1);
		  print "\n $smallest_file \n";
		  $i--;
	   }else{
		  $smallest_file=$in[$i];
	   }
	}
    }
    if($extract_opt==1){
         return(\$smallest_file, \@in);
    }else{ return(\$smallest_file); }
}

#______________________________________________________________
# Title     : get_largest_file
# Usage     : $largest_file_name=${&get_largest_file(@ARGV)};
# Function  : checks the size of files and returns the largest
#             one's name. If a file is not present in pwd or
#             specified absolute path, it ignores it.
# Example   :
# Keywords  : choose_largest_file, largest_file, find_largest_file
#             get_the_largest_file, choose_the_largest_file, get_biggest_file
#             fetch_largest_file, take_largest_file, get_bigger_file, get_larger_file
# Options   : _  for debugging.
#             #  for debugging.
#             e  for extract the largest from the input array
#                       leaving it one element less, in this case
#                       there will be two returning refs.
# Category  :
# Version   : 1.4
#--------------------------------------------------------------
sub get_largest_file{
    my @in;
    if(ref $_[0] eq 'ARRAY'){
	 @in = @{$_[0]};
    }else{
	 @in = @_;
    }

    my ($largest_file, $largest, $i, $extract_opt);
    for($i=0; $i< @in; $i++){
	if(($in[$i]=~/^\-?e$/i)&&(!(-f $in[$i])) ){
	   $extract_opt=1;
	   splice(@in, $i, 1);
	   $i--;
	}
    }
    for($i=0; $i< @in; $i++){
	my $size=(-s $in[$i]);
	if($size > $largest){
             $largest=$size;
             if($extract_opt ==1){
                    print "\$extract_opt is $extract_opt \n";
                    push(@in, $largest_file) if defined($largest_file);
                    $largest_file = splice(@in, $i, 1);
                    print "\n $largest_file \n";
                    $i--;
             }else{
                    $largest_file=$in[$i];
             }
        }
    }
    if($extract_opt==1){
	 return(\$largest_file, \@in);
    }else{ return(\$largest_file); }
}



#______________________________________________________________
# Title     : get_sequence_complexity
# Usage     : print "\n", ${&get_sequence_complexity(\$seq)};
# Function  : caculates the single sequence's sequence complexity
#             If the seq given is larger than 20, it divides it into
#             frags of 20 aa and gets the average of it.
# Example   :  ${&get_sequence_complexity(\$seq)};
#             while $seq='TTTTTACDEFGHIKLMNPQRSTVWYAAAAACCCADFADFA'
# Warning   :
# Keywords  : sequence_complexity, calc_sequence_complexity,
#             calc_seq_complexity, get_seq_complexity, seg
# Options   : _  for debugging.
#             #  for debugging.
#             'w=' for window size as the first arg
# Returns   : Ref. for a scalar digit.
# Argument  : ref. of string.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------
sub get_sequence_complexity{
	 my ($complexity, @seq,$i, $j, @frag);
	 my $win=20;
	 if(ref($_[0]) eq 'ARRAY'){
	  @seq=@{$_[0]};
	 }else{
	  $seq=${$_[0]} || $_[0];
	  @seq=split(//, $seq);
	 }
	 if(defined($_[1])){  $win=${$_[1]} || $_[1]; }

	 if(@seq <= $win){
	 my (%seq, @keys);
	 for($i=0; $i< @seq; $i++){
		$seq{$seq[$i]}++;
	 }
	 @keys= keys %seq;
	 $complexity=@keys/@seq;
	 }else{
	 my @frag=@{&divide_array(\@seq, "s=$win")};
	 my @complexity=();
	 for($i=0; $i < @frag; $i++){
		my (%seq, @keys);
		my @arr=@{$frag[$i]};
		for($j=0; $j< @arr; $j++){
		   $seq{$arr[$j]}++;
		}
		@keys=keys %seq;
		push(@complexity, @keys/$win);
	 }
	 $complexity=${&array_average(\@complexity)};
	 }
	 return(\$complexity);
}

#______________________________________________________________________________
# Title     : calc_factorial
# Usage     :
# Function  :
# Example   :
# Keywords  : calculate_factorial, get_factorial
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub calc_factorial{
    my ($number, $factorial, $i);
    $factorial=1;
    if(ref($_[0]) eq 'SCALAR'){
        $number=${$_[0]};
    }else{
        $number=$_[0];
    }
    for($i=1; $i <=$number; $i++){
        $factorial=$factorial*$i;
    }
    return(\$factorial);
}


#______________________________________________________________
# Title     : make_swiss_index
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub make_swiss_index{
	 my ($swiss, %index);
	 if(-e "$ENV{'SWDIR'}seq.dat" ){
	  $swiss="$ENV{'SWDIR'}seq.dat";
	 }elsif( -f "$ENV{'SWISS'}seq.dat" ){
	  $swiss="$ENV{'SWISS'}seq.dat";
	 }elsif( -e 'seq.dat'){
	  $swiss="seq.dat";
	 }elsif( -f "$ENV{'swiss'}seq.dat"){
	  $swiss="$ENV{'swiss'}seq.dat";
	 }else{
	 ASK: print "\n Where is your swissprot seq.dat file?\n";
	  $swiss=<>;
	  chomp($swiss);
	  if(-e "$swiss"){
		 goto OPEN;
	  }else{
		 goto ASK;
	  }
	 }
	 OPEN: open(DB, "$swiss");
	 while(<DB>){

	 if(/^ID[\t ]+(\w+) +/){
		$index{$1}=tell(DB);
		print "\n$1 $index{$1}";
	 }
    }
}

#_____________________________________________________________________________
# Title     : fetch_sequence_from_db
# Usage     : %sequence=%{&fetch_sequence_from_db($input_file, \@string)};
# Function  : accept seq names (with or without ranges like _10-111 )
#              and produces hash ref.
#             As an option, you can write(xxxx.fa) the sequences in pwd
#              with the file names with sequence names.
#             The default database used is FASTA format OWL database.
#              You can change this by S (for Swissprot either fasta
#              or full format), P for PDB fasta format data.
#             If you give the path name of DB, it will look for the
#              DB given.
#
#             This automatically checks sequence family number as
#               in >d1bpi___7.6.1
#               and attaches the number in final %sequence output
#
# Example   : %seq=%{&fetch_sequence_from_db(\@input, seq.fa, seq.fa.idx)};
#              while @input=qw( 11S3_HELAN_11-31 A1AB_CANFA A1AT_PIG )
# Keywords  : fetch_seq_from_db, fetch_sequence_from_database
# Options   : _  or #  for debugging.
#     w       for write fasta file
#     s=      for putting source DB file name manually
#     d=p100  for PDB100 fasta database from ENV
#     d=p40   for PDB40  fasta database from ENV
#     d=p     for PDB database (usually p100) from ENV
#     d=s     for Swissprot database from ENV
#     d=o     for OWL database from ENV
#     i=      for index filename. If not specified, this looks for it in the same dir as fast     ˜
#     t=      for msp_threshold
#  msp_threshold=0.0005  # when MSP file is given as input for getting seq names
#
# Returns   : ref of hash
# Argument  : gets names of sequences
#             eg) \@array, \%hash, \$seq, while @array=(seq1, seq2), $seq='seq1 seq1'
#                                               %hash=(seq1, xxxx, seq2, yyyy);
#
# Version   : 3.6
#------------------------------------------------------------------------------
sub fetch_sequence_from_db{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

        my(@DATABASE, @INDEX_FILE, %sequence, %seq_with_index, @input_seq_names,
	   %long_index, @Keys, $R_start, $NAME, $R_leng, $found_seq_count,
	   $eval_blastpgp_thresh, $seq_found1, $sequence, @keys, $index_file,
	   $source_DB_file, $matching_seq, $match_start, $match_stop);

        $eval_blastpgp_thresh=0.001; # default

        if($vars{'msp_threshold'}=~/(\S+)/ or $vars{'mt'}=~/(\S+)/ or $vars{'t'}=~/(\S+)/ ){
              $eval_blastpgp_thresh=$1;
              print "\n# (INFO) YOU have set the \$eval_blastpgp_thresh $eval_blastpgp_thresh\n\n";
        }
        if($vars{'s'}=~/(\S+)/ or $vars{'DB'}=~/(\S+)/ ){
              $source_DB_file=$1; push(@DATABASE, $source_DB_file);
        }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# getting input seq names from all sources
	#________________________________________________________
	for(0..@hash){ # from the given hash (keys names)
	     push(@input_seq_names, keys %{$hash[$_]} );
	}
	for(0..@raw_string){  ## from given sequence names
	     push(@input_seq_names, split(/ +/, $raw_string[$_]) );
	}
        for($i=0; $i<@file; $i++){  ## From MSP file input (get only MATCHED sequences)
             if($file[$i]=~/\.msp/){
                  print "\n# (INFO) MSP file input is detected !\n";
                  my ($seq_with_range);
                  open(MSP, $file[$i]);
                  while(<MSP>){
                       if(/^ *\S+ +(\S+) +\S+ +\S+ +\S+ +\S+ +\S+ +\S+ +(nr_\S+_DROME_\S+) +/){
                            push(@input_seq_names, $2) if $1 < $eval_blastpgp_thresh;
                       }elsif(/^[\t ]*\S+[\t ]+(\S+)[\t ]+\S*[\t ]*(\d+)[\t ]+(\d+)[\t ]+\S+[\t ]+(\d+)[\t ]+(\d+)[\t ]+(\S+) */){
                            $matching_seq=$6; $match_start=$4; $match_stop=$5;
                            $evalue=$1;
                            if($matching_seq=~/^(\S+)_\d+\-\d+/){
                                 $seq_with_range=$matching_seq;
                            }else{
                                 $seq_with_range="$matching_seq\_$match_start\-$match_stop";
                            }
                            push(@input_seq_names, $seq_with_range) if $evalue < $eval_blastpgp_thresh;
                            print "\n# (INFO) pushing $seq_with_range" if $verbose;
                       }
                  }
                  close(MSP);
                  splice(@file, $i, 1);
                  $i--;
             }
        }

	print "\n# (1) fetch_sequence_from_db: \@raw_string has: ", scalar(@raw_string), " elements";
	print "\n# (2) fetch_sequence_from_db: No. of seq to fetch is:",scalar(@input_seq_names);
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Choose the DBs and INDEX for fetching sequences. All files input must be DATABAE or INDEXfile
	#___________________________________________
        if(@file > 0){   print "\n# (i) GOOD! Input \@file has \"@file\"";
             for($i=0; $i< @file; $i++){
                if(-T $file[$i] and $file[$i]=~/\.fa[sta]?$/){      push(@DATABASE, $file[$i]);
                    if(-T "$file[$i]\.idx"){  push(@INDEX_FILE, "$file[$i]\.idx"); next; }
                }elsif((-T $file[$i]) and ($file[$i]=~/\.seq$/)){    push(@DATABASE, $file[$i]);
                }elsif((-T $file[$i]) and ($file[$i]=~/\.dat$/)){    push(@DATABASE, $file[$i]);
                }elsif(-T $file[$i] and $file[$i]=~/(\S+)\.idx$/){   push(@INDEX_FILE, $file[$i]);
                    push(@DATABASE, $1) if -s $1; next;
                }else{
                    print "\n#  WARN:  fetch_sequence_from_db:
                                        You put a file-name-like which is not a fasta DB. Error. I am removing $file[$i]";
                    splice(@file, $i, 1);
                    $i--;
                }
             }
	}else{
	   print "\n# (E)fetch_sequence_from_db: \@file is <= 0, ERROR\n"; exit;
	}

	if($vars{'d'}=~/^p[100]*$/){
	   if( -T  $ENV{'PDB_FASTA'} ){             push(@DATABASE,   $ENV{'PDB_FASTA'} );     }
	   elsif(  -T $ENV{'PDB_SEQ_FASTA'} ){      push(@DATABASE,   $ENV{'PDB_SEQ_FASTA'}  ); }
	   elsif(  -T $ENV{'PDB100_FASTA'} ){       push(@DATABASE,   $ENV{'PDB100_FASTA'} ); }
	   if(  -T $ENV{'PDB_FASTA_INDEX'} ){       push(@INDEX_FILE, $ENV{'PDB_FASTA_INDEX'} ); }
	}elsif( $vars{'d'}=~/^p\d+d$/ ){
	   if(  -T $ENV{'PDB100D_FASTA'} ){         push(@DATABASE,   $ENV{'PDB100D_FASTA'});     }
	   elsif(  -T $ENV{'PDBD100_FASTA'}  ){     push(@DATABASE,   $ENV{'PDBD100_FASTA'}); }
	   elsif(  -T $ENV{'PDB100D_SEQ_FASTA'}  ){ push(@DATABASE,   $ENV{'PDB100D_SEQ_FASTA'}); }
	   elsif(  -T $ENV{'PDBD100_SEQ_FASTA'}  ){ push(@DATABASE,   $ENV{'PDBD100_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB100D_SEQ_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB100D_SEQ_FASTA_INDEX'}); }
	   elsif(  -T $ENV{'PDBD100_SEQ_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDBD100_SEQ_FASTA_INDEX'}); }
	}elsif( $vars{'d'}=~/^p40/ ){
	   if(  -T $ENV{'PDB40_FASTA'} ){          push(@DATABASE,   $ENV{'PDB40_FASTA'});     }
	   elsif(  -T $ENV{'PDB40_SEQ_FASTA'}  ){  push(@DATABASE,   $ENV{'PDB40_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB40_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB40_FASTA_INDEX'}); }
	}elsif( $vars{'d'}=~/^p90/ ){
	   if(  -T $ENV{'PDB90_FASTA'}  ){         push(@DATABASE,   $ENV{'PDB90_FASTA'}    ); }
	   elsif(  -T $ENV{'PDB90_SEQ_FASTA'} ){   push(@DATABASE,   $ENV{'PDB90_SEQ_FASTA'}); }
	   if(  -T $ENV{'PDB90_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'PDB90_FASTA_INDEX'}); }
	}
	if( $vars{'d'}=~/^s *$/){
	   if(  -T $ENV{'SWISS_FASTA'} ){          push(@DATABASE,   $ENV{'SWISS_FASTA'});     }
	   elsif(  -T $ENV{'SWISS_SEQ_FASTA'} ){   push(@DATABASE,   $ENV{'SWISS_SEQ_FASTA'}); }
	   elsif(  -T $ENV{"SWISS_DIR\/seq.fa"} ){ push(@DATABASE,   $ENV{"SWISS_DIR\/seq.fa"}); }
	   if(  -T $ENV{'SWISS_FASTA_INDEX'} ){    push(@INDEX_FILE, $ENV{'SWISS_FASTA_INDEX'}); }
	   elsif(  -T $ENV{'SWINDEX'} ){           push(@INDEX_FILE, $ENV{'SWINDEX'}); }
	}elsif( $vars{'d'}=~/^o *$/){
		if(  -T $ENV{'OWL_FASTA'} ){            push(@DATABASE,   $ENV{'OWL_FASTA'});     }
		elsif(  -T $ENV{'OWL_SEQ_FASTA'} ){     push(@DATABASE,   $ENV{'OWL_SEQ_FASTA'}); }
		elsif(  -T $ENV{"OWL_DIR\/owl.fa"} ){   push(@DATABASE,   $ENV{"OWL_DIR\/owl.fa"}); }
		if(  -T $ENV{'OWL_FASTA_INDEX'} ){      push(@INDEX_FILE, $ENV{'OWL_FASTA_INDEX'}); }
		print "\n# Fetching sequences from OWL\n";
	}elsif( $vars{'d'}=~/^n *$/){
	   if(  -T $ENV{'NRDB_FASTA'} ){            push(@DATABASE,   $ENV{'NRDB_FASTA'});     }
 	   elsif(  -T $ENV{'NRDB_SEQ_FASTA'} ){     push(@DATABASE,   $ENV{'NRDB_SEQ_FASTA'}); }
			 if(  -T $ENV{'NRDB_FASTA_INDEX'} ){      push(@INDEX_FILE, $ENV{'NRDB_FASTA_INDEX'}); }
			 elsif(  -T $ENV{'NRDB_FASTA_IDX'} ){     push(@INDEX_FILE, $ENV{'NRDB_FASTA_IDX'}); }
	   print "\n# Fetching sequences from OWL\n";
	}elsif( $vars{'d'}=~/^\S+\.\S+$/ and -T $vars{'d'} ){ push(@DATABASE, $vars{'d'} );     }
	if( $vars{'i'}=~/\S+\.\S+$/ and -T $vars{'i'} ){ push(@INDEX_FILE, $vars{'i'} );   }

        if(@INDEX_FILE == 0 and @DATABASE > 0){
             if( ${&if_file_older_than_x_days("$DATABASE[0]\.idx", 5)} > 0 ){
                  print "\n# (i) fetch_sequence_from_db: $DATABASE[0]\.idx is old, rerunning make_seq_index_file\n";
                  $index_file=${&make_seq_index_file(\@DATABASE)};
                  push(@INDEX_FILE, $index_file);
             }elsif((-s "$DATABASE[0]\.idx") > 20){
                  print "\n# (i) fetch_sequence_from_db: $DATABASE[0]\.idx is larger than 20, USING IT";
                  push(@INDEX_FILE, "$DATABASE[0]\.idx");
             }else{
                  print "\n# (ERROR) fetch_sequence_from_db: Some weird error in pushing \$index_file to \@INDEX_FILE\n"; exit;
             }
	}elsif(@INDEX_FILE > 0 and @DATABASE > 0){
	}else{
	     print "\n\n# (W) \@INDEX_FILE and \@DATABASE are not big enough(0)\n";
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             #  Final check for ALL the inputs
             #___________________________________________________
             if( @DATABASE  < 1){ print "\n# fetch_sequence_from_db: DATABASE file no found. Error\n"; exit     }
             if( @INDEX_FILE < 1){
                  print "\n# (E) fetch_sequence_from_db: \@INDEX_FILE has less than 1 elem. Error\n";
                  push(@INDEX_FILE, ${&make_seq_index_file(@DATABASE)});
                  print "     fetch_sequence_from_db called make_seq_index_file to make @INDEX_FILE\n";
             }
        }
        if($debug==1){
             print "\n# DATABASE used     : @DATABASE";
             print "\n# INDEX_FILE used   : @INDEX_FILE";
             print "\n# input_seq_names   : @input_seq_names";
        }

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
        # To skip a seq name bug (from Sarah)
        #________________________________________________________
        if($input_seq_names[0]=~/\S\-\S+\-\S/){
             print  "\n# (W) NO good having more than 2 dashes in seq. name: $input_seq_names[0], dying \n";
             sleep(2);
        }

        ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        ##  Now I have @DATABASE, @INDEX_FILE, @input_seq_names
        ##_______________________________________________________________

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	#  Reading in index file to get 'seq' 'seek pos' to make %seq_with_index
	#__________________________________________________________________________
	print "\n#  fetch_sequence_from_db: \@INDEX_FILE @INDEX_FILE, \@DATABASE :@DATABASE\n";
	for($i=0; $i< @INDEX_FILE; $i++){
	   open(INDEX, "$INDEX_FILE[$i]");
	   while(<INDEX>){ if(/(\S+) +(\S+)/){  $long_index{$1}=$2;  }  }

	   for($j =0; $j < @input_seq_names; $j++){

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
                 #  If DATABASE has sequence names with ranges already index the seq with ranges
                 #____________________________________________________________________________________
                 if($input_seq_names[$j]=~/(\S+\_\d+\-\d+)$/ and $long_index{$1}){
                     $seq_with_index{$1}=$long_index{$1};
                 }elsif( ($input_seq_names[$j]=~/pdb_(\S+\_\d+\-\d+)/ or $input_seq_names[$j]=~/nr_(\S+\_\d+\-\d+)/ )
                   and $long_index{$1}){
                     $seq_with_index{$1}=$long_index{$1};
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
                 #  If DATABASE has sequence names without ranges index the seq without ranges
                 #____________________________________________________________________________________
                 }elsif($input_seq_names[$j]=~/(\S+)\_\d+\-\d+$/ and $long_index{$1}){
                     $seq_with_index{$input_seq_names[$j]}=$long_index{$1}; # !!!! <--- This line is critical
                 }elsif($input_seq_names[$j]=~/(\S+)\_\d+\-\d+/ and $long_index{"$1\_"}){ # to handle Tim's new pdb100.fa files
                     $seq_with_index{$input_seq_names[$j]}=$long_index{"$1\_"};
                     print "\n# Warning: $1 (from $input_seq_names[$j]) matched with $1\_ in $INDEX_FILE[$i],
                                                                      I hope this is correct!!\n";
                 }elsif($input_seq_names[$j]=~/nr_(\S+)\_\d+\-\d+/ and $long_index{"$1"}){ # to handle Tim's new pdb100.fa files
                     print "           (W) $input_seq_names[$j] matched nr_XXXX_ddd-ddd format removing nr_\n";
                     $seq_with_index{$1}=$long_index{$1};
                 }elsif($input_seq_names[$j]=~/pdb_(\S+)\_\d+\-\d+/ and $long_index{$1}){
                     print "           (W) $input_seq_names[$j] matched pdb_XXXX_ddd-ddd format removing pdb_\n";
                     $seq_with_index{$1}=$long_index{$1}; # !!!! <--- This line is critical
                 }
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`~``
                 #  If input_seq_name has SCOP superfamily numbers
                 #____________________________________________________________________________________
                 elsif($input_seq_names[$j]=~/^(\S+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/ and $long_index{$1}){
                     $seq_with_index{"$1\_$2"}=$long_index{$1}; # !!!! <--- This line is critical
                 }elsif($input_seq_names[$j]=~/\S/ and $long_index{$input_seq_names[$j]}){
                     $seq_with_index{$input_seq_names[$j]}=$long_index{$input_seq_names[$j]}
                 }else{
                     print chr(7);
                     print "\n# (E) $input_seq_names[$j](with, sans range) has NO corrspndng indx in $INDEX_FILE[$i]";
                 }

	   }
	   close INDEX;

	   if ( scalar(keys %seq_with_index) < 1){
                  print "\n# fetch_sequence_from_db: \%seq_with_index is too small, ERROR?\n";
	   }
	}

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
	#  Fetching sequences from DATABASE
	#_______________________________________________________________
	print "\n# fetch_sequence_from_db: Fetching seqs from @DATABASE with  @INDEX_FILE ";
	@Keys= sort {$seq_with_index{$a} <=> $seq_with_index{$b} } keys %seq_with_index;        ## <<< NOTE it is @Keys, not @keys
	print "\n# (3) fetch_sequence_from_db: No. of seq indexed is:", scalar(@Keys);

	for($f=0; $f< @DATABASE; $f++){

           open(DB_FASTA, $DATABASE[$f]);

	   F0: for($e=0; $e< @Keys; $e++){
		  my ($seq_found1, $super_fam_class, $NAME, $R_leng, $R_start, $sequence);
		  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		  # When seq name has range attachment, it handles
		  #________________________________________________
		  if($Keys[$e]=~/(\S+)_(\d+)\-(\d+)$/){
		      $NAME=$1;
		      $R_start=$2-1;      ## to fit in substr function
		      $R_leng =$3-$2+1; ## to fit in substr
		      print "\n# (4) fetch_sequence_from_db: Sequences have ranges ($R_start-$R_leng) only (not superfamily numb.) \n";
		  }
		  elsif($Keys[$e]=~/(\S+)_(\d+)\-(\d+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/){
		      $NAME=$1;
		      $R_start=$2-1;      ## to fit in substr function
		      $R_leng =$3-$2+1; ## to fit in substr
		      $super_fam_class=$4;
		      print "\n# (4) fetch_sequence_from_db: Sequences have ranges and superfamily numb.\n";
		  }
		  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		  # When superfamily (scop) number is attached
		  #___________________________________________________
		  elsif($Keys[$e]=~/(\S+)\_(\d+\.\d+\.\d+)[\.\d+\.\d+]*/){
                      $NAME=$1;
		      $super_fam_class=$2;
		      print "\n# (4) fetch_sequence_from_db: Sequences have SCOP superfamily numbers only \n";
		  }elsif($Keys[$e]=~/^ *(\S+)[\,]*$/){
		      print "\n# (4) fetch_sequence_from_db: Sequences DON't have ranges or SCOP superfam numb.\n";
							$NAME=$1;
		  }

		  print "\n# BEFORE reading in DB file. \$NAME is  $NAME";

		  if($seq_with_index{$NAME}=~/(\d+)/        # It is importnt having $seq_with_index{$Keys[$e]}
			   or $seq_with_index{$Keys[$e]}=~/(\d+)/
			   or $seq_with_index{"$NAME\,"}=~/(\d+)/    # this is for overcoming '>xxxx,'  entry(the comma)
                           or $seq_with_index{"$NAME\_"}=~/(\d+)/    # to handle Tim's  >c1eru_ 3.30.1.1.4
                           or $seq_with_index{"pdb\_$NAME"}=~/(\d+)/ # to handle Sarah's pdb_xxxxx
                           or $seq_with_index{"nr\_$NAME"}=~/(\d+)/ # to handle Sarah's nr_xxxxx
			   ){
			   my $finding_position= $1-300;
			   if( $finding_position >= 0 ){   seek(DB_FASTA, $1-300, 0);  # -300 is necessary
			   }elsif($finding_position < 0){  seek(DB_FASTA, 0, 0); }      ## This is essential !!!

			   while(<DB_FASTA>){
				  if(!$seq_found1){
					if(/\> *$NAME[\,_]? *\d*/){
						  $seq_found1=1;
						  print "\n# $NAME  is found in DB, Good ";
					  }
				  }else{
					  if(/^ *(\w+) *$/ ){
					      $sequence .=$1;  ## you should use $1 to avoid including NEW line
						  unless(eof DB_FASTA){ next   ## This is critically important to prevent error.
						  }else{ goto PUT_SEQ }     ## If the last seq has only one single line seq string, it could be a problem
					  }elsif( (/^ *\> *\S+/)  or (eof DB_FASTA) ){
                                               #======= When range is defined ==================
                                               PUT_SEQ:
                                               if($R_start =~/\d+/){
                                                                                                                     $sequence{$Keys[$e]}=substr($sequence, $R_start, $R_leng); next; #
                                                                                                                     print "\n# $sequence{$Keys[$e]}  is put to \%sequence";
                                               }
                                               #======= To handle superfamily information given ==========
                                               if($super_fam_class){
                                                       $sequence{$Keys[$e]}=$sequence;
                                                       $acquired_seq_count++;
                                               }
                                               #======= When range is NOT defined ==================
                                               else{
                                                       $sequence{$Keys[$e]}=$sequence;
                                                                                                                     print "\n# $sequence  is put to \%sequence";
                                               }
                                               $R_start='';
                                               $sequence='';
                                               $seq_found1='';  ## reset $R_start, $seq_found1,,
                                               next F0;
					  }
				  }
			   }

		  }else{
			   print "\n# Error, the sequence pos for $NAME (from $Keys[$e]) in DB doesnt exist in xxxx.idx file?\n";
		  }
	   }
	   close DB_FASTA;
	}
	#print "\n# (6) fetch_sequence_from_db: counted fetched seqs: $found_seq_count, $acquired_seq_count";
	#print "\n# (7) fetch_sequence_from_db: Fetching seq has finished \n";

	return(\%sequence);
}



#______________________________________________________________
# Title     : fetch_seq
# Usage     : &fetch_seq(@ARGV);
# Function  : fetches swissprot entry or fasta format seq with
#             given seq name(like  SAA_HORSE, SA*HORSE, SAA,..)
#             you can give multi files(SAA*, SAU*) at the same
#             time. This uses ENV setting of 'SWDIR'
# Example   : &fetch_swiss_seq(@ARGV);
# Keywords  : fetch_swissprot_sequence, fetch_sequence,
#             find_swiss_sequence, find_sequence
# Options   : _  for debugging.
#             #  for debugging.
#             -f for fasta format file output
#             -a is for ALL matched seq. (same as using glob=> *YEAST)
#             -c is for Creating seq.idx file
#             -h is for HELP!
#             -g is for GDF file format output
#             -l is for list of match entries(in 1 column)
#             -s is for species option (input name mst be species (YEAST, RAT, HUMAN..)
#             n= is for Number of seq you want to get from swissprot
#             s= is for Size limit. Min seq size in swiss, s=10  -> minimum 11 aa seq.
#             S= is for Size limit. Max seq size in swiss, s=1000 -> get less than 1000
#
# Argument  : swissprot seqname
# Category  :
# Version   : 1.7
#--------------------------------------------------------------
sub fetch_seq{
	 my @in=@_;
	 my ($FASTA_index, $FASTA, $where_index, %index, $question, $i,
	    $s,$t,$fasta,$index_file, $all, $species,$target, $matched, $seq, $gdf, $list, $count, $create);
	 my $SEQ_size_max=100000000;

	 if(@_ < 1){	  &HELP_fetch_seq;
	 }else{
	 F: for($t=0; $t<@in; $t++){ #'''''''''''' PROMPT ARGV processing ''''''''''''''''''
		if($in[$t]=~/^\-c$/i){
		   $create=1; splice(@in, $t, 1); $t--;
		   print "\n You should provide database\(e.g, seq.dat\) file with this opt, I guess you did\n";
		   print "\n If you wanted to make an index with any fasta db, you also have to\n";
		   print "  give the file name. e.g:\n     $0 -c /DB/swiss/seq.dat\n";
		   print "  or $0 -c my_db.fa\n\n";
		   next; }
		if($in[$t]=~/^\-af$/){ $fasta=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-afs$/){ $species=$fasta=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-ag$/){ $gdf=$all=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-g$/){    $gdf=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-f$/i){   $fasta=1; splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-a$/i){   $all=1;   splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-l$/i){   $list=$all=1;   splice(@in, $t, 1); $t--; next; }
		if($in[$t]=~/^\-s$/i){  $species=$all=1; splice(@in, $t, 1); $t--; next; }
		if( ($in[$t]=~/seq\.dat/)&&(-f $in[$t])){ ## if the path for swiss prot is given
		   $DB=$in[$t];  splice(@in, $t, 1); $t--; next;        }
		if( ($in[$t]=~/seq\.idx/)&&(-e $in[$t])){ ## if the path for swiss index is given
		   $index_file=$in[$t];	splice(@in, $t, 1); $t--; next;	}

		#''''''' SWiss prompt input file check ''''''''''''''''''
		if( -f $in[$t]){
		   open(TEMP, "$in[$t]");
		   while(<TEMP>){
			 if(/^ID[\t ]+\w+/){$DB=$in[$t]; splice(@in, $t, 1);$t--;next F;}}
		   close TEMP;
		}

		#'''''''' FASTA prmpt input file check '''''''''''''''''''
		if( (-f $in[$t]) && !(defined($FASTA))){
		   open(TEMP, "$in[$t]");
		   while(<TEMP>){
			 if(/^\> {0,4}\S+/){$FASTA=$in[$t]; $fasta=1;
			 if(-s "$FASTA\.idx"){ $FASTA_index="$FASTA\.idx"; }
		     splice(@in, $t, 1);$t--;next F;}}
		   close TEMP;
		}

		#'''''''' INDEX file automatic check ''''''''''''''''''
		if( -f $in[$t]){
		   open(TEMP2, "$in[$t]");
		   my ($first_pos, $Count, @splited);
		   while(<TEMP2>){
			 $Count++;
			 if( $Count>3 ){
				if(/^ {0,2}\S+ +(\d+)/){
				   if(defined($first_pos) && ($1-$first_pos ) > 1000 ){
					  $index_file=$in[$t];
					  splice(@in, $t, 1);$t--;next F;
				   }elsif( defined($first_pos) && ($1-$first_pos)<1000 ){
					  $FASTA_index=$in[$t]; $fasta=1;
					  if($FASTA_index=~/^(\S+)\.\w+$/){
					     if(-s $1){ $FASTA= $1; }
					  }
					  splice(@in, $t, 1);$t--;next F;
				   }
				   $first_pos=$1;
				}
			 }
		   }
		   close TEMP2;
		}
		if($in[$t]=~/^\-h$/i){ &HELP_fetch_seq; exit;}
		if($in[$t]=~/^n=(\d+)$/i){ $SEQ_num_to_fetch=$1;
		   splice(@in, $t, 1);$t--;next F;}
		if($in[$t]=~/^s=(\d+)$/){ $SEQ_size_min=$1; $fasta=1;
		   splice(@in, $t, 1);$t--;next F;}
		if($in[$t]=~/^S=(\d+)$/){ $SEQ_size_max=$1; $fasta=1;
		   splice(@in, $t, 1);$t--;next F;}
	 }

	 if(($create==1)&&(defined($DB)) ){ goto CREATE; }
	 elsif(($create==1) && (defined($FASTA)) ){ goto CREATE; }
	 elsif($create==1){
	    print "\n You must give db filename (e.g. seq.dat) with path to make an index";
	    print "\n  I can handle fasta db file to make an index\n";
	    exit;
	 }
	 }

	 if($SEQ_size_max < $SEQ_size_min){ print "\n Seq size Max is smaller than min\n"; exit; }

	 ##""""""""""""""""""""""" DB file if not defined """"""""""""""""""""""""""""""""""""""""""""
	 if (!defined($DB)){
	  if((!defined($FASTA))&&($fasta==1)&&(-T "$ENV{'FASTADB'}")){
		 $FASTA=$ENV{'FASTADB'};
	  }elsif(defined($FASTA) && ($fasta==1) &&($create !=1) ){
		 goto SW_INDEX;
	  }elsif(!defined($FASTA) && (defined($FASTA_index))&& !(-e "$ENV{'FASTADB'}") ){
		 print "\n NO fasta db is defined\n";
		 goto ASK;
	  }elsif(-e "$ENV{'SWDIR'}seq.dat" ){
		 $DB="$ENV{'SWDIR'}seq.dat";
	  }elsif(-e "$ENV{'FETCHSWISS'}seq.dat" ){
		 $DB="$ENV{'FETCHSWISS'}seq.dat";
	  }elsif(-e "$ENV{'FETCHSWISS'}" ){
		 $DB="$ENV{'FETCHSWISS'}";
	  }elsif(-e "$ENV{'SWDIR'}\/seq.dat" ){
		 $DB="$ENV{'SWDIR'}\/seq.dat";
	  }elsif( -f "$ENV{'SWISS'}seq.dat" ){
		 $DB="$ENV{'SWISS'}seq.dat";
	  }elsif( -f "$ENV{'SWISS'}\/seq.dat" ){
		 $DB="$ENV{'SWISS'}\/seq.dat";
	  }elsif( -e 'seq.dat'){
		 $DB="seq.dat";
	  }elsif( -f "$ENV{'swiss'}seq.dat"){
		 $DB="$ENV{'swiss'}seq.dat";
	  }elsif(-f "ENV{'HOME'}seq.dat"){
		 $DB="ENV{'HOME'}seq.dat";
	  }elsif(-f "ENV{'SWDIR'}\/seq.dat"){
		 $DB="ENV{'SWDIR'}\/seq.dat";
	  }else{
		ASK: print "\n Where is your swissprot seq.dat(or fasta db) file?\n";
			 print "  I recommand you to set the path for them in ENV vars\n";
			 print "  e.g. export SWDIR=/DB/Swiss/  to where you put seq.dat\n";
			 print "  e.g. export FASTADB=/DB/Swiss/my_swiss.fa  for fasta database\n";
		 $swiss=<STDIN>;
		 chomp($swiss);
		 if( -f $swiss){
			open(TEMP, "$swiss");
			while(<TEMP>){
			   if(/^ID[\t ]+\w+/){ $DB=$swiss; goto SW_INDEX; }
			   elsif(/^\> {0,3}\S+/){ $FASTA=$swiss; goto SW_INDEX;}
			}
			close TEMP;
		 }else{
			goto ASK;
		 }
	  }
	 }
	 ##""""""""""""""""""""""""""""" INDEX file """"""""""""""""""""""""""""""""""""""""
	 if( !defined($index_file)){
	  SW_INDEX:
	  if((!defined($FASTA_index))&&($fasta==1)&&(-T "$ENV{'FASTAINDEX'}")){
		 $FASTA_index=$ENV{'FASTAINDEX'};
	  }elsif(!defined($FASTA_index)&&(-T $FASTA)){
		 goto W;
	  }elsif(defined($FASTA_index)&&(-T $FASTA)){
		 goto MAIN_SEARCH;
	  }elsif(-e "$ENV{'FETCHSWISSINDEX'}seq.idx" ){
		 $index_file="$ENV{'FETCHSWISSINDEX'}seq.idx";
	  }elsif(-e "$ENV{'FETCHSWISSINDEX'}\/seq.idx" ){
		 $index_file="$ENV{'FETCHSWISSINDEX'}\/seq.idx";
	  }elsif(-e "$ENV{'SWDIR'}seq.idx" ){
		 $index_file="$ENV{'SWDIR'}seq.idx";
	  }elsif( -f "$ENV{'SWISS'}seq.idx" ){
		 $index_file="$ENV{'SWISS'}seq.idx";
	  }elsif( -f "$ENV{'SWISS'}\/seq.idx" ){
		 $index_file="$ENV{'SWISS'}\/seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}" ){
		 $index_file="$ENV{'SWINDEX'}";
	  }elsif( -e 'seq.idx'){
		 $index_file="seq.idx";
	  }elsif( -f "$ENV{'swiss'}seq.idx"){
		 $index_file= "$ENV{'swiss'}seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}seq.idx"){
		 $index_file= "$ENV{'SWINDEX'}seq.idx";
	  }elsif( -f "$ENV{'HOME'}seq.idx"){
		 $index_file= "$ENV{'HOME'}seq.idx";
	  }elsif( -f "$ENV{'SWINDEX'}seq.idx"){
		 $index_file="$ENV{'SWINDEX'}\/seq.idx";
	  }elsif( -f "$ENV{'swindex'}seq.idx"){
		 $index_file="$ENV{'swindex'}seq.idx";
	  }elsif(defined($DB)|| defined($FASTA) ){
		 print "\n Your swissprot is in $DB, but no seq.idx file for it.\n";
		 W: print "\n Where is seq.idx(or fasta idx file eg. $FASTA\.idx), type path and filename?\n";
		    print "  I recommand you to set the path for them in ENV vars later\n";
			print "  e.g. export SWINDEX=/DB/Swiss/  to where you put seq.dat index\n";
			print "  e.g. export FASTAINDEX=/DB/Swiss/my.fa.idx  for fasta db index\n";
			print "  Asking where 3 times. After, will ask creation of seq.idx or $FASTA.idx\n";
		 $question++;
		 $where_index=<STDIN>;
		 chomp($where_index);
		 if(-f $where_index){
			open(TMP, "$where_index");
		    while(<TMP>){
				if($_=~/^ {0,2}\S+ +\d+/){
				   $index_file=$where_index;
				   print "\n Your index file seems to be right \($index_file\) \n";
				   goto MAIN_SEARCH;
				}elsif($count > 4){ # read at least 4 lines and see if they are index
				   print "\n $where_index doesn't seem to be index file\n";
				   print "\n Terminate(t) or go on (g) trying\n";
				   $try=getc;
				   if($try=~/t/i){  exit; }
				   else{ goto W; }
				}else{
				   $count++;
				}
			}
			close TMP;
		 }else{
			if($question > 2){
			   print "\n I can create the index in pwd for you run $0 and \n";
			   print "\n you can copy seq.idx(or $FASTA\.idx) into your swissprot dir later\n";
			   goto CREATE;
			}
			goto W;
		 }

		 #""""""""""""""" CREATION of INDEX file """""""""""""""""""""""""""""""""""""""""""""
		 CREATE:
		 if(defined($DB)){ print "\n Can I create seq.idx in pwd? (y+return or return)\n" }
		 if(defined($FASTA)){ print "\n Can I create $FASTA\.idx in pwd? (y+return or return)\n" }
		 $yes_no=getc;
		 if($yes_no=~/y/i){
			if(defined($DB)){
			   print "\n seq.idx being created...\(1 min in my Linux\)\n";
			   open(DB, "$DB");
			   open(IDX, ">seq.idx");
			   print IDX "# swiss_index\n";
			   while(<DB>){
				 if(/^ID[\t ]+(\w+) +/){
					$index{$1}=tell(DB);
					print IDX "\n$1 $index{$1}";
				 }
			   }
			   close(DB);
			   close(IDX);
			   if(-s "seq.idx"){
				   print "\nGood. seq\.idx is created.";
				   print "\n Copy seq.idx to SWISSPROT dir or you can set\n";
				   print "absolute path ENV var \'SWINDEX\' to your seq.idx path\n";
				   print "e.g. #bash\> export SWINDEX=\/DB\/Swiss\/seq.idx\n\n";
				   if($create==1){ exit;  }
			   }else{
				   print "\n Creation of seq.dat seems to have gone wrong";
			   }

			}elsif(defined($FASTA)){
			   $F_idx="$FASTA\.idx";
			   print "\n $F_idx being created...\n";
			   open(FASTADB, "$FASTA");
			   open(FASTAIDX, ">$F_idx");
			   print FASTAIDX "# fasta_index\n";
			   while(<FASTADB>){
				 if(/^\> {0,4}(\S+) */){
					$index{$1}=tell(FASTADB);
					print FASTAIDX "\n$1 $index{$1}";
				 }
			   }
			   close(FASTADB);
			   close(FASTAIDX);
			   if(-s $F_idx){
				   print "\nGood! Copy $F_idx to your DB dir and set two ENV vars\n";
				   print "absolute path ENV var \'FASTADB\' to your fastadb path\n";
				   print "absolute path ENV var \'FASTAINDEX\' to your $F_idx path\n";
				   print "e.g. #bash\> export FASTADB   =\/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #bash\> export FASTAINDEX=\/DB\/mySwiss\/$F_idx\n";
				   print "e.g. #tcsh\> setenv FASTADB    \/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #tcsh\> setenv FASTAINDEX \/DB\/mySwiss\/$F_idx\n";
				   print "Unless, you can specify the database each time at prompt\n\n";
				   if($create==1){ exit;  }
			   }else{
				   print "\n Creation of seq.dat or $F_idx seems to have gone wrong";
			   }
			}
		 }else{
			exit;
		 }
	  }
	 }

	 #""""""""""""""""""""""""""" MAIN SERACH """""""""""""""""""""""""""""""""""""""""""""""
	 MAIN_SEARCH:
	 for($i=0; $i<@in; $i++){
	  my (@possible, @pos, %possible); my $target=$in[$i];
	  if($target=~/\*/){
		 $target=~s/\*/\\\w\{0,6\}/; # to handle glob input
		 $all=1;
	  }
	  if(defined($index_file)){
		 open(INDEX, "$index_file");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto SWISS;
	  }elsif(($fasta==1) && (defined($FASTA_index)) ){
		 open(INDEX, "$FASTA_index");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto FASTA;
	  }

	  SWISS:
	  @poss = sort keys %possible;

	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 exit;
	  }elsif($all !=1){
		 print "\n";
		 open (DB, "$DB");
		 if(defined($SEQ_num_to_fetch)){
			print "\n# You defined the number of sequence to fetch: $SEQ_num_to_fetch\n";
			$num_sequence=$SEQ_num_to_fetch;
		 }else{ $num_sequence=@poss; }

		 A:for($p=0; $p < $num_sequence; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};   # %possible has the name and index num
			 seek(DB, ($matched-52), 0);
			 while(<DB>){
			   if($gdf==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         printf ("%-24s %-3d %-7d %-14s %4s\n", "$poss[$p]\/1\-$1", 1, $1, $poss[$p], '0.0');
					 next A;
			      }
			   }
			   elsif(/^ {0,2}\/\// and  $fasta !=1){  # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  print "\/\/\n";
				  next A;
			   }elsif(/^ {0,2}\/\//  and  $fasta==1){ # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  $seq=~s/ //g;
				  if( ($SEQ_size_min < length($seq))&&(length($seq) < $SEQ_size_max) ){
					 print "\>$poss[$p]\n$seq\n"; $seq=''; next A;
				  }else{  $seq=''; $num_sequence++;  next A; }
			   }elsif( $fasta==1 and /^[\t ]+\w+/){
				  $seq.=$_;
				  next ;
			   }elsif($list==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         print "$poss[$p]\n";
					 next A;
			      }
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 close(DB);
	  }elsif($all==1){
		 print "\n";
		 open (DB, "$DB");
		 if(defined($SEQ_num_to_fetch)){ $num_sequence=$SEQ_num_to_fetch;
		 }else{ $num_sequence=@poss; }
		 A:for($p=0; $p < $num_sequence; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(DB, ($matched-51), 0);
			 while(<DB>){
			   if($gdf==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         printf ("%-24s %-3d %-7d %-14s %4s\n", "$poss[$p]\/1\-$1", 1, $1, $poss[$p], '0.0');
					 next A;
			      }
			   }elsif(/^ {0,2}\/\// and $fasta==1){ # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  $seq=~s/ //g;
				  if( ($SEQ_size_min < length($seq))&&(length($seq) < $SEQ_size_max) ){
					 print "\>$poss[$p]\n$seq\n"; $seq='';  next A;
				  }else{  $seq=''; $num_sequence++; next A; }
			   }elsif(/^ {0,2}\/\// and $fasta !=1){  # !!! DO NOT put $ in /^ {0,2}\/\// as there is something
				  print "\/\/\n";
				  next A;
			   }elsif(($fasta==1)&&(/^[\t ]+\w+/)){
				  $seq.=$_;
				  next ;
			   }elsif($list==1){
			      if(/ID[\t ]+$poss[$p] +\S+ +\S+ +(\d+)/){
			         printf "$poss[$p]\n";
					 next A;
			      }
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 close(DB);
	  }

	  FASTA:
	  @poss = sort keys %possible;
	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 exit;
	  }elsif($all !=1){
		 print "\n";
		 open (FAS, "$FASTA");
		 B:for($p=0; $p < @poss; $p++){
		 if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(FAS, ($matched-350), 0);
			 my $seq_found;
			 while(<FAS>){
			if((/^> {0,4}(\S+)/)&&($seq_found==1)){
				   next B;
	 			}elsif(/^> {0,4}($poss[$p])/){
				   print;
				   $seq_found=1;
				}elsif($seq_found==1){
				   print;
				}
			 }
		   }
		 }
		 close(FAS);
	  }elsif($all==1){
		 print "\n";
		 open (FAS, "$FASTA");
		 B2:for($p=0; $p < @poss; $p++){
		   if($poss[$p]=~/\w*$target\w*/){
			 $matched=$possible{$poss[$p]};
			 seek(FAS, ($matched-350), 0);
			 my $seq_found;
			 while(<FAS>){
				if((/^> {0,4}(\S+)/)&&($seq_found==1)){
				   next B2;
				}elsif(/^> *($poss[$p])/){
				   print;
				   $seq_found=1;
				}elsif($seq_found==1){
				   print;
				}
			 }
		   }
		 }
		 close(FAS);
	  }
	 }
}



#______________________________________________________________
# Title     : fetch_swiss_seq
# Usage     :
# Function  : fetches swissprot entry or fasta format seq with
#             given seq name(like  SAA_HORSE, SA*HORSE, SAA,..)
#             you can give multi files(SAA*, SAU*) at the same
#             time. This uses ENV setting of 'SWDIR'
# Example   : &fetch_swiss_seq(@ARGV);
# Warning   :
# Keywords  : fetch_swissprot_sequence, fetch_sequence,
#             find_swiss_sequence, find_sequence, fetch
# Options   : _  for debugging.
#             #  for debugging.
#             -f for fasta format file output
# Returns   :
# Argument  : swissprot seqname
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub fetch_swiss_seq{
	 my @in=@_;
	 my ($i, $index_file, $target, $matched, $seq);
	 if(@_ < 1){
	 print "\n Usage: $0 [-f] <any swissprot name entry>\n";
	 print "   -f is for fasta output format only\n";
	 print "\n You have to set ENV var, SWDIR to seq.dat path\n";
	 print chr(7);
	 }
	 for($i=0; $i<@in; $i++){
	  if($in[$i]=~/\-f$/i){
		 $fasta=1;
		 splice(@in, $i, 1);
		 next;
	  }
	 }

	 if(-e "$ENV{'SWDIR'}seq.dat" ){
	  open(DB, "$ENV{'SWDIR'}seq.dat");
	 }elsif( -f "$ENV{'SWISS'}seq.dat" ){
	  open(DB, "$ENV{'SWISS'}seq.dat");
	 }elsif( -e 'seq.dat'){
	  open(DB, "seq.dat");
	 }elsif( -f "$ENV{'swiss'}seq.dat"){
	  open(DB, "$ENV{'swiss'}seq.dat");
	 }

	 if(-e "$ENV{'SWDIR'}seq.idx" ){
	  $index_file="$ENV{'SWDIR'}seq.idx";
	 }elsif( -f "$ENV{'SWISS'}seq.idx" ){
	  $index_file="$ENV{'SWISS'}seq.idx";
	 }elsif( -e 'seq.idx'){
	  $index_file="seq.idx";
	 }elsif( -f "$ENV{'swiss'}seq.idx"){
	  $index_file= "$ENV{'swiss'}seq.idx";
	 }
	 for($i=0; $i<@in; $i++){
	  my @possible;
	  my $target=$in[$i];
	  $target=~s/\*/\\\w\{0,4\}/; # to handle glob input
	  open(INDEX, "$index_file");
	  while(<INDEX>){
		if( /(\w*$target\w*)/ ){
		   push(@possible, $1);
		}
	  }
	  close INDEX;
	  open(INDEX,  "$index_file");
	  if(@possible >1){
		 print "\n@possible", "\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i], \n be more specific!\n\n";
	  }else{
		 print "\n";
		 A:while(<INDEX>){
		   if(/(\w*$target\w*)[\t ]+(\d+)/){
			 $matched=$1;
			 seek(DB, ($2-51), 0);
			 while(<DB>){
			   if((/^\/\/$/)&&($fasta==1)){
				  $seq=~s/ //g;
				  print "\>$matched\n$seq\n";
				  $seq='';
				  next A;
			   }elsif((/^\/\/$/) && ($fasta !=1)){
				  print "\n";
				  next A;
			   }elsif(($fasta==1)&&(/^[\t ]+\w+/)){
				  $seq.=$_;
				  next ;
			   }elsif($fasta !=1){
				  print ;
			   }
			 }
		   }
		 }
		 print "========= Search for $ARGV[$i] was a success\n" if @in > 1;
	  }
	 }
}


#______________________________________________________________
# Title     : get_sequence_number
# Usage     :
# Function  : reads database and tells how many sequences are there
#             fasta format db is only accepted for now.
# Example   :
# Warning   :
# Keywords  : count_number_of_sequence, get_number_of_sequence
#             get_sequence_number_in_fasta
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub get_sequence_number{
	my @file=@{$_[0]};
	my %out;
	for($i=0; $i< @file; $i++){
	   my $seq_number_in_db;
	   open(DB, "$file[$i]");
	   while(<DB>){
		  if(/^\> {0,5}\w+/){
			 $seq_number_in_db++;
		  }
	   }
	   close DB;
	   $out{$file[$i]}=$seq_number_in_db;
	}
	return(\%out);
}


#______________________________________________________________
# Title     : write_msp_files
# Usage     : &write_msp_files(\%in1, \%in2, ['s'], [$filename],,)
# Function  : Writes input which is already in msp file format to
#              files either the name is given or generated
#              If more than one ref of hash is given, this will
#              concatenate all the hashes to one big one to
#              make one file.
#             When NO output xxx.msp file name is given, it creates
#              with the query sequence name.
# Example   :  &write_msp_files(@sso, 's', $out_file);
# Warning   : When NO output xxx.msp file name is given, it creates
#              with the query sequence name.
# Keywords  : write_msp,
# Options   : _  for debugging.
#             #  for debugging.
#             s  for each single file output for each hash input
#      filename  for putting output to the specified filename, should be xxx.msp
#
# Returns   : if 's' option is set, it will make say,
#               HI001.msp HI002.msp HI003.msp  rather than
#
#               HI001HI002HI003.msp
#  eg of one output(single file case)
#
#   1027     0.0     1     154   HI0004     1     154   HI0004
#   40       0.0     84    132   HI0004     63    108   HI0001
#   31       0.0     79    84    HI0004     98    103   HI0003
#
# Category  :
# Version   : 2.8
#--------------------------------------------------------------
sub write_msp_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
        \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
        \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

    my ($out_msp_file, $add_range, @final_out, $msp_file_out,
         @keys, $N, $temp_1, %hash, $query_seq_name, $single_out_opt);

    if($char_opt=~/r/){ $add_range      ='r' };
    if($char_opt=~/s/){ $single_out_opt ='s' };
    if(@file == 1){ $out_msp_file=$file[0]; $single_out_opt='' } # s is for single file output

    if($single_out_opt eq 's'){ #~~~~~~~~~~~` single files output option WITHOUT a given outfilename
         $msp_file_out='default_single_out.msp';
         for($i=0; $i< @hash; $i++){
             my %hash=%{$hash[$i]};
             my @keys =sort keys %hash;

             #------------------ Writing the first line ---------------------------
             for($j=0; $j< @keys; $j++){
                  if($keys[$j]=~/(\S+)_\d+\-\d+/){ $N = $1 }else{ $N = $keys[$j] }
                  if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
                       open(MSP_FILE, ">$msp_file_out") ||
                           die "# write_msp_files: I can not create $msp_file_out, check permission\n";
                       chomp( $hash{$keys[$j]} ); ## precaution
                       print MSP_FILE "# (H) $0 write_msp_files: $keys[$j]\n";
                       print MSP_FILE $hash{$keys[$j]}, "\n";
                       splice(@keys, $j, 1);
                       $j--; last;
                  }
             }
             #------------- Writing the rest of the lines ____________________
             for($j=0; $j< @keys; $j++){
                  chomp( $hash{$keys[$j]} );
                  print MSP_FILE $hash{$keys[$j]}, "\n";
             }
             print MSP_FILE "\n";
         }
         if(-s $msp_file_out){
              print "\n# write_msp_files: $msp_file_out is written \n";
         }else{
              print "\n# Error, write_msp_files\n"; exit
         }
         push(@final_out, $msp_file_out);
         close(MSP_FILE);
         return(\@final_out);
    }else{
         #~~~~~~~~~~~~~ DEfault ~~~~~~~~~~~~~~~~~~
         #  When output file name was given!
         #________________________________________
         if(@file==1){
             my($temp_1);
             open(MSP_FILE, ">$out_msp_file") ||  die "# write_msp_files: I can not create $out_msp_file, check permission\n";
             print MSP_FILE "# (H) $0 write_msp_files: @file\n";
             for($i=0; $i< @hash; $i++){
                 my %hash=%{$hash[$i]};
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # Sorting %hash values by the second column(Evalue)
                 #_______________________________________________________
                 @keys= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { $hash{$_}=~/^ *\S+[\t ]+(\S+)[\t ]+/ and [$_, $1] } keys %hash;

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # for Final output
                 #_____________________________
                 push(@final_out, $out_msp_file);

                 #--------- Writing the first line only --------------
                 for($j=0; $j< @keys; $j++){
                      if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }

                      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      # Following is to put the self match on top of the list
                      #________________________________________________________
                      if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
                          $temp_1=$keys[0]; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
                      }
                 }
                 for($j=0; $j< @keys; $j++){
                      chomp($hash{$keys[$j]});
                      print MSP_FILE $hash{$keys[$j]}, "\n";
                 }
                 print MSP_FILE "\n";
             }
             print MSP_FILE "\n";
             close(MSP_FILE);
             if(-s $out_msp_file and $out_msp_file !~/^ *\.msp$/){
                 print "\n# write_msp_files: $out_msp_file is written\n" if(-s $out_msp_file);
             }else{
                 print "\n# write_msp_files: ERROR. Either $out_msp_file is empty or \".msp\" is written\n";
             }
         }else{
             for($i=0; $i< @hash; $i++){
                 my %hash=%{$hash[$i]};
                 my @keys =sort keys %hash;
                 ($query_seq_name)=$hash{$keys[0]}=~/\S+ +\d+ +\d+ +(\S+) +\d+ +\d+ +\S+/;
                 $msp_file_out="$query_seq_name\.msp";
                 open(MSP_FILE, ">$msp_file_out") or die "\n# write_msp_files: Failed to open $msp_file_out\n";
                 print MSP_FILE "# (H) $0 write_msp_files: $query_seq_name\n";

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # for Final output
                 #_____________________________
                 push(@final_out, $msp_file_out);
                 #~~~~~~~ Writing the first line only ~~~~~~~~~~~~~~~~~~
                 for($j=0; $j< @keys; $j++){
                     if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }
                     if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
                      $keys[0]=$temp_1; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
                     }
                 }
                 for($j=0; $j< @keys; $j++){
                     chomp($hash{$keys[$j]});
                     print MSP_FILE $hash{$keys[$j]}, "\n";
                 }
                 print MSP_FILE "\n";
             }
             print MSP_FILE "\n";
             if(-s $out_msp_file and $out_msp_file !~/^ *\.msp$/){
                  print "\n# write_msp_files: $out_msp_file is written\n" if(-s $out_msp_file);
             }else{
                  print "\n# write_msp_files: ERROR. Either $out_msp_file is empty or only \".msp\" is written\n";
             }
             close MSP_FILE;
         }
     }
     if(@final_out ==1){ return( \$final_out[0] ); }else{
         return(\@final_out);
     }
}


#______________________________________________________________________
# Title    : write_aln_files
# Function : writes multiple seqs. in msf format (takes one or more than one seq.!!)
# Usage    : two argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
#             uses Perl5 pointers(references).
# Example  : &write_aln(\%hash, \$out_file_name);
#  CLUSTAL W (1.74) multiple sequence alignment
#
#
#  MMAF6040_1           -----MATDD--SIIVLDD----DDEDEA-AAQP-GPSNLPPN-PASTGPGPGLSQQATG
#  AF015956_1           -----MATAN--SIIVLDD----DDEDEA-AAQP-GPSHPLPN-AASPGAG---------
#  HSAB2381_80-900      KQRLLSVTSDEGSMNAFTGRGSPDTEIKINIKQESADVNVIGNKDVVTEEDLDVFKQAQE
#                             .* :  *: .: .    * * :    *  .  :   *  . .  .
#
# Options  :
#     $first_sequence_name= by f=  # to put a certain seq at the first in writing
# Keywords :
# Version  : 1.1
#----------------------------------------------------------------------
sub write_aln_files{

   $| =1;
   my($string, %input, $temp, $output_file, $first_sequence_name,
      $name, $k, %final_output);
   for($k=0; $k< @_; $k++){
       if(ref($_[$k]) eq 'HASH'){
           %input=%{$_[$k]};
       }elsif(ref($_[$k]) eq 'SCALAR'){
           $output_file=${$_[$k]};
       }else{
           if($_[$k]=~/f=(\S+)/){
                $first_sequence_name=$1;
           }else{
                $output_file=$_[$k];
           }
       }
   }
   open (ALN_FILE_OUT,">$output_file");		# $string is the seq string.

   print ALN_FILE_OUT 'CLUSTAL W (1.74) multiple sequence alignment', "\n\n";

   my(@names) = sort keys %input;
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # Putting the wanted top first seq name at the top
   #____________________________________________________
   if($first_sequence_name){
       for($k=0; $k< @names; $k++){
           if($names[$k]=~/$first_sequence_name/){
               $temp=$names[0];
               $names[0]=$names[$k];
               $names[$k]=$temp;
           }
       }
   }
   my($larg)  = length($input{$names[0]});

   for ($k=0; $k < $larg; $k+= 60){    # 60 residues interval
	for($i=0; $i < @names; $i++){    # number of sequences
             $names = $names[$i];
             $input{$names[$i]}=~ s/\n//g; ## this is to remove MANY new lines in the input !!
             $seq = substr($input{$names[$i]}, $k, 60);
             #$seq = &put_gaps_every_x_position_in_string($seq, 10, ' ');
             printf ALN_FILE_OUT ("%-18s %-60s\n", $names, $seq);
             $final_output{$output_file}.=sprintf("%-18s %-60s\n", $names, $seq);
	}
	printf ALN_FILE_OUT "\n";
	$final_output{$output_file}.="\n";
   }
   close(ALN_FILE_OUT);
   return(\%final_output);
}

#______________________________________________________________________
# Title    : write_msf
# Function : writes multiple seqs. in msf format (takes one or more than one seq.!!)
# Usage    : two argments:  $seq_hash_reference  and $output_file_name
#            takes a hash which has got names keys and sequences values.
#            uses Perl5 pointers(references).
# Example  : &write_msf(\%hash, \$out_file_name, ["o=$seq_order"]);
#             eg) $seq_order='asdf seq2 seq3 seq5';
#    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#    PileUp
#
#       MSF: 1205  Type: P    Check:  9937   ..
#
#     Name: PYC1_YEAST      oo  Len: 1205  Check:  7954  Weight:  1.00
#     Name: PYC2_YEAST      oo  Len: 1205  Check:  5807  Weight:  1.00
#     Name: PYC_MOUSE       oo  Len: 1205  Check:  6176  Weight:  1.00
#
#    //
#
#    PYC1_YEAST      MSQ.RKFAGL RDNFNLLGEK N......... .......... .KILVANRGE
#    PYC2_YEAST      MSSSKKLAGL RDNFSLLGEK N......... .......... .KILVANRGE
#    PYC_MOUSE       ...MLKFQTV RGGLRLLGVR RSSSAPVASP NVRRLEYKPI KKVMVANRGE
#
#    PYC1_YEAST      IPIRIFRTAH ELSMQTVAIY SHEDRLSTHK QKADEAYVIG EVGQYTPVGA
#    PYC2_YEAST      IPIRIFRSAH ELSMRTIAIY SHEDRLSMHR LKADEAYVIG EEGQYTPVGA
#    PYC_MOUSE       IAIRVFRACT ELGIRTVAVY SEQDTGQMHR QKADEAYLIG R..GLAPVQA
#
#    PYC1_YEAST      YLAIDEIISI AQKHQVDFIH PGYGFLSENS EFADKVVKAG ITWIGPPAEV
#    PYC2_YEAST      YLAMDEIIEI AKKHKVDFIH PGYGFLSENS EFADKVVKAG ITWIGPPAEV
#    PYC_MOUSE       YLHIPDIIKV AKENGVDAVH PGYGFLSERA DFAQACQDAG VRFIGPSPEV
#
# Keywords : write_msf_files, save_msf_files
# Version  : 2.2
#------------------------------------------------------------
sub write_msf{
     my($seq, $string, $name, $k, $i, $longest_seq_leng, @seq_order_final,
        $seq_order, @files_created, @names, $fill_seq_to_the_end);
     $| =1;
     if(@_ < 2){
          print "\n# write_msf: I need 2 arguments(hash and filename). Look at the header box\n";
          print chr(7); exit;
     }
     my($gap_char)='-';
     my(%input)=%{$_[0]};
     my($output_file)=${$_[1]} || $_[1];

     if($_[2]=~/o=(.+)/){
         $seq_order=$1;
         @seq_order=split(/ +/, $seq_order);
     }elsif(ref($_[2]) eq 'ARRAY'){
         @seq_order=@{$_[2]};
     }
     @names = sort keys %input;

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # If seq order information is given, sort the out output
     #________________________________________________________________
     if(@seq_order > 0){ # when seq names were given in an order, you just use it
         print "\n# (INFO) \@seq_order is given \n";
         for($i=0; $i< @seq_order; $i++){
             $seq_order_item=$seq_order[$i];
             if($seq_order_item=~/_rv$/){ $seq_order_item=~s/_rv$// }
             for($j=0; $j<@names; $j++){
                $names_item=$names[$j];
                if($names_item=~/_rv$/){ $names_item=~s/_rv$// }
                if($seq_order_item eq $names_item){
                    push(@seq_order_final, $names[$j]);
                    splice(@names, $j, 1); $j--;
                }else{ next }
             }
         }
         @names=@seq_order_final;
     }
     $longest_seq_leng=length($input{$names[0]});
     for $name (@names){
         $len = length($input{$name});
         if($len< 1){ print "\n# (ERROR) The length of seq. in \%input with $name key is 0, error!\n";
             exit;  }
         $longest_seq_leng=$len if $len > $longest_seq_leng;
     }

     push(@files_created, $output_file);
     open (MSF_FILE_OUT,">$output_file");        # $string is the seq string.

     print MSF_FILE_OUT " $output_file  MSF: $longest_seq_leng",' Type: P    Check:  9937   .. '; ## This is dummy
     print MSF_FILE_OUT "\n\n";


     for $name (@names){
           $len = length($input{$name});
           printf MSF_FILE_OUT (" Name: %-15s   Len: %-5s Check:  9999  Weight:  1.00\n", $name, $longest_seq_leng);
     }
     print MSF_FILE_OUT "\n";
     print MSF_FILE_OUT "\/\/\n\n";
#""""""""""""""""""""""""""""""""""""""""""""""""""
#             MSF file form
#==================================================
format MSF_FILE_OUT =
@<<<<<<<<<<<<<<<<<<<<  @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$names,         $seq
.

     for ($k=0; $k < $longest_seq_leng; $k+=50){    # 50 residues interval
           for($i=0; $i < @names; $i++){    # number of sequences
                  $names = $names[$i];
                  $input{$names[$i]}=~ s/\n//g;
                  #$input{$names[$i]}=~ s/_/$gap_char/g; # automatically changes '_' to '-'

                  $seq = substr($input{$names[$i]}, $k, 50);
                  $seq_leng=length($seq);

                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # This is to fill the empty space to make complete block of seq
                  #________________________________________________________________
                  if($fill_seq_to_the_end){
                        $seq .="$gap_char"x($longest_seq_leng-$seq_leng); # putting '---' at the blank line end
                        # Above option will coredump 'seaview' seq editor
                  }
                  $seq=~s/[\-_ ]/$gap_char/g; # setting the final gap_char you like

                  $seq = &put_gaps_every_x_position_in_string_special($seq, 10, ' ');
                  sub put_gaps_every_x_position_in_string_special{
                      my($string); if(ref($_[0])){ $string = ${$_[0]};
                                   }else{ $string = $_[0]; }
                      my($interval) = $_[1];     my($gap_char) = $_[2];
                      $string =~ s/(.{$interval,$interval})/$1$gap_char/g;
                      return($string);
                  }

                  select (MSF_FILE_OUT); ## to print out to a FILE
                  write MSF_FILE_OUT;
           }
           print "\n";                       # next block starts.
     }
     close(MSF_FILE_OUT);
     select STDOUT;  # <- this is necessary to normalize output for other sub
     return(\@files_created);
}




#______________________________________________________________
# Title     : get_seqblock
# Usage     :
# Function  :
# Example   : @blocks_in_hash=@{&get_seqblock(\%msf, 30)};
# Warning   :
# Keywords  : find_sequence_block, get_sequence_block,
#             make_seq_block, make_seqblock, find_seqblock
# Options   : _  for debugging.
#             #  for debugging.
#             m=  for margin length of the seqblock
#             t=  for threshold
#             l=  for min seqlet length
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#--------------------------------------------------------------
sub get_seqblock{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my ($connect_gap, @seq_frag, %digitized, $verbose, %hash, $best_block_opt);
	my $margin=3;
	my $threshold=0.8;
	my $min_seqlet_size=25;
	$connect_gap=5;
	my @vars=keys %vars;
	for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'm'){
		  $margin=$margin_ori=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 't'){
		  $threshold=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'l'){
		  $min_seqlet_size=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'c'){
		  $connect_gap=$vars{$vars[$i]} if( defined($vars{$vars[$i]}) );
	   }
	}
	if($char_opt=~/b/){ $best_block_opt='b' }
	if($char_opt=~/r/){ $range_in_name='r' }
	if($char_opt=~/c/){ $connect_opt ='c' }
	if($char_opt=~/v/){ $verbose='v' }
	for($o=0; $o<@hash; $o++){
	  %hash=%{$hash[$o]};
	  %digitized=%{&convert_char_to_0_or_1_hash($hash[$o])};
	}

	%added=%{&add_columns(\%digitized)}; # 11111 + 1010101 => 2121211

	&show_hash(\%added) if ($debug==1);

	%blocks=%{&get_high_score_blocks(\%added,
			   "m=$margin", "t=$threshold", "l=$min_seqlet_size", $verbose,
			   "c=$connect_gap", $connect_opt, $best_block_opt, $range_in_name)};

	my @keys=keys %blocks;
	for($e=0; $e< @keys; $e++){

	   my $range="$keys[$e]\-$blocks{$keys[$e]}";
	   my $seq_let_leng=$blocks{$keys[$e]} - $keys[$e] + 1;
	   if($seq_let_leng < $min_seqlet_size){
		  next;
	   }else{
	      push(@RANGE, $range);
	   }
	}
	@seq_frag=&get_seq_fragments(\%hash, @RANGE,
	    "l=$min_seqlet_size", "$range_in_name");
	return(\@seq_frag);
}


#______________________________________________________________
# Title     : add_columns
# Usage     :
# Function  :
# Example   :
# Warning   : if the attached name is too long(over 12 char),
#             it changes to 'Added_upX' while X is a numb.
# Keywords  : add_seq_columns, add_sequence_columns,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub add_columns{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $gap_char=' ';
	for($i=0; $i<@hash; $i++){
	  my %hash=%{$hash[$i]};
	  my @names=keys %hash;
	  my %final_hash_out;
	  my @final_added;
	  my $out_seq_name='Add';
	  for($j=0; $j<@names; $j++){
	     $out_seq_name.= "_$names[$j]";
		 my $string =$hash{$names[$j]};
		 my @ar_string;
		 if($string=~/\d{1,5}[ \,]\d{1,5}[ \,]\d{1,5}/){
			@ar_string =split(/$gap_char|\,/, $string );
		 }elsif($string=~/^\d{5,}$/){
			@ar_string =split(//, $string );
		 }
		 for($s=0; $s < @ar_string; $s++){
			$final_added[$s]=$ar_string[$s]+$final_added[$s];
		 }
	  }
	  if(length($out_seq_name) > 12){ $out_seq_name="Added_up${i}"; }
	  $final_hash_out{$out_seq_name}=join("$gap_char", @final_added);
	  push(@OUT_HASH, \%final_hash_out);
	}
        wantarray ? return(@OUT_HASH) : return($OUT_HASH[0]);
}


#____________________________________________________________________
# Title     : get_high_score_blocks
# Usage     : get_high_score_blocks(<ref. of hash for number string>)
# Function  : gets hash of key and number string and filters out the
#              number string region which is below certain threshold
#              determined inside this sub and returns a selected high
#              number regions
# Example   : %block_start_end=%{&get_high_score_blocks(\%input_numb_block)};
#             %out=%{&get_high_score_blocks(\%inp_numbs, 'v', 'b')};
# Warning   : This assumes that the inputs are multiply aligned seq
# Keywords  : high_scoring_regions
#             get_high_scoring_blocks, find_blocks, get_blocks
# Options   : _  for debugging.
#             #  for debugging.
#             b  for best_block_opt, returns best block only
#             v  for showing the final range hash output
#             c  for connect close blocks
#             c= for connect close blocks with specific closing gap size
#             m=  for margin length of the seqblock
#             t=  for threshold
#             l=  for min seqlet length
#
# Returns   :
# Argument  : accepts one single ref. of hash
# Category  :
# Version   : 1.4
#--------------------------------------------------------------------
sub get_high_score_blocks{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my $min_seqblock_leng=25;
	my $threshold=0.8;
	my (%block_start_end, @possible_block, %hash, $range_in_name,$connect_gap);
	my $margin=$margin_ori=2;  # $margin is m in .....mmm111111111111mmm.....
	my $gap_char=' ';
	my @vars=keys %vars;
	my $connection_gap=5;
	my $connect_opt=1;
	for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'm'){
		  $margin=$margin_ori=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 't'){
		  $threshold=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'l'){
		  $min_seqblock_leng=$vars{$vars[$i]};
	   }elsif($vars[$i] eq 'c'){
		  $connect_opt='c';
		  $connection_gap=$vars{$vars[$i]};
		  #print "\n get_high_score_blocks: \$connection_gap is $connection_gap\n";
	   }
	}
	if($char_opt=~/c/){ $connect_opt='c' }
	if($char_opt=~/r/){ $range_in_name='r' }
	if($char_opt=~/v/){
		print "\n  \$threshold         is $threshold    ";
		print "\n  \$margin            is $margin       ";
		print "\n  \$min_seqblock_leng is $min_seqblock_leng \n\n";
		print "\n  \$connection_gap   is $connection_gap \n\n";
		print "\n  \$connect_opt      is $connect_opt \n\n";
	}

	for($i=0; $i<@hash; $i++){
	   my @range;
	   my %hash_ori=%{$hash[$i]};
	   my @names=keys %hash_ori;
	   if(@names>1){ # If the hash has multi entry, make one added up hash
	      %hash=%{&add_columns(\%hash_ori)};
	      @names=keys %hash;
	   }else{  %hash=%hash_ori;    }
	   for($j=0; $j< @names; $j++){
		  my $string=$hash{$names[$j]};
		  if($string=~/\d{1,5}[ \,]\d{1,5}[ \,]\d{1,5}/){
			 @ar_string =split(/$gap_char|\,/, $string );
		  }elsif($string=~/^\d{4,}$/){ ## the string should be minimum 4 length
			 @ar_string =split(//, $string );
		  }
		  my $largest = ${&get_largest_element(\@ar_string)};
		  my $cut_line=$largest*$threshold;
		  #print "\n \$cutline in get_high_score_blocks is $cut_line \n" if $debug==1;
		  #~~~~~~~~~~~~  Cutting the tops The core algorythm #######
		  for($s=0; $s< @ar_string; $s++){
			 if($ar_string[$s] > $cut_line){ # possible_block is the increasing seqlet
				if(@possible_block == $min_seqblock_leng){
				   while( $ar_string[$s] > $cut_line){
					  $ar_string[$s]=1;
					  while($s+1+$margin > @ar_string){ $margin-- }
					  push(@possible_block, ($s+1+$margin));
					  $margin=$margin_ori;
					  $s++;
				   }
				   $ar_string[$s]=0;  #<--- Should be 0 than 1
				   $block_start_end{$possible_block[0]}=$possible_block[$#possible_block];
				   @possible_block=();
				}else{
				   $ar_string[$s]=1;
				   while(($s+1-$margin) < 0){ $margin-- };
				   push(@possible_block, ($s+1-$margin) );
				   $margin=$margin_ori;
				}
			 }elsif($ar_string[$s] <= $cut_line){
				$ar_string[$s]=0;
				@possible_block=();
			 }
		  }
		  #print "\n", @ar_string,"\n" if $debug==1;
		  #~~~~~~~~~~~~  Cutting the tops The core algorythm #######
	   }
	}
	#print "\n@ar_string\n";
	#&show_hash(\%block_start_end);
	&show_hash(\%block_start_end) if($char_opt=~/v/);

	#~~~~~~~~~~~~  Connecting blocks ~~~~~~~~~~~~~~~~~~~~~~
	if($connect_opt=~/c/){
	   my @keys=sort numerically keys %block_start_end;
	   sub numerically{  $a <=> $b;  }

	   ### sorting the %block_start_end
	   for($i=0; $i< @keys; $i++){
		  push(@block_s_e, $keys[$i], $block_start_end{$keys[$i]});
	   }

	   for($i=1; $i< $#block_s_e; $i++){  ## must be $#block_s_e to stop
		  $first_end   =$block_s_e[$i];   ## before it removes everything
		  $second_start=$block_s_e[$i+1];

		  #""""""" if gap is smaller than connection_gap given """"""""
		  if($connection_gap > ($second_start-$first_end) ){
			 splice(@block_s_e, $i, 2);
			 $i--;
		  }else{
		     $i++; # to skip to the next start correctly
		  }
	   }
	   %block_start_end=@block_s_e;
	}
	print "\n# Blocks start and end after connection(gap was $connection_gap)\n"  if($char_opt=~/v/);

	&show_hash(\%block_start_end)  if($char_opt=~/v/);

	#~~~~~~~~~~~~ Getting the largest ~~~~~~~~~~~~~~~~~~~~~~~
	if($char_opt=~/b/){
	   print "\n# Getting the largest block only to get all the blocks use a opt\n";
	   my @keys=keys %block_start_end;
	   my ($largest, %largest,$range_size, $largest_key);
	   for($i=0; $i< @keys; $i++){
		  $range_size = $block_start_end{$keys[$i]}-$keys[$i];
		  if($range_size > $largest){
			 $largest=$range_size;
			 $largest_key=$keys[$i];
			 #print "\n $largest_key \n";
		  }
	   }
	   $largest{$largest_key}=$block_start_end{$largest_key};
	   %block_start_end=%largest;
	   print "\n# The best block chosen (from to) \n"  if($char_opt=~/v/);
	   &show_hash(\%block_start_end) if($char_opt=~/v/);
	}
	return(\%block_start_end);
}






#______________________________________________________________
# Title     : delbut
# Usage     : delbut *.zip  (delete files except xxxx.zip)
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub delbut{
    my $i;
    @save_files=@{$_[0]} || @_;
    for(@save_files){
       unless(-e $_){
         print "\n\n \"$_\" does not exist, so nothing is deleted\n\n";
         print chr(7);
         exit;
       }
    }
    my @files=@{&read_file_names_only('.')};
    my @del_files=@{&subtract_array(\@files, \@save_files)};

    for($i=0; $i< @del_files; $i++){
       system("rm -f $del_files[$i]");
    }
    print "\n\n Subdirs are never deleted \n\n";
}

#______________________________________________________________
# Title     : get_msp_range
# Usage     : @range=@{&get_msp_range($seqlet)};
#             @temp=&get_msp_range($seqlet);
#
# Function  :
# Example   :
# Warning   :
# Keywords  : get_msp_file_ranges
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.5
#--------------------------------------------------------------
sub get_msp_range{
	 my $lines1=${$_[0]} || $_[0];
	 my ($SEQ, $num_seq, $matched_SEQ, @Ranges);
	 if($lines1 =~/^ *\d+ +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	  if($SEQ eq $matched_SEQ){ ## skipping self match
		  $num_seq++;
	  }else{
		  @Ranges=($1, $2, $4, $5);  ## <-- example. (10-20, 30-45)
	  }
	 }
	 return wantarray ? (\@Ranges, \$SEQ, \$matched_SEQ): \@Ranges;
}
#______________________________________________________________
# Title     : get_msp_enquiry_sequence
# Usage     :
# Function  : gets the name of sequence used as enquiry(target)
# Example   :
# Warning   :
# Keywords  : get_msp_target_sequence, get_msp_enquiry_sequence_name
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_msp_enquiry_sequence{
	 my $lines1=${$_[0]} || $_[0];
	 my ($SEQ, $matched_SEQ);
	 if($lines1 =~/^ *\d+ +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	 }
	 return \$SEQ;
}

#______________________________________________________________
# Title     : get_msp_matched_sequence
# Usage     :
# Function  : gets the name of sequence used as enquiry(target)
# Example   :
# Warning   :
# Keywords  : get_msp_matched_sequence_name
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_msp_matched_sequence{
	 my $lines1=${$_[0]} || $_[0];
	 my ($SEQ, $matched_SEQ);
	 if($lines1 =~/^ *\d+ +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\S+) +(\d+) +(\d+) +(\S+)/){
	  $SEQ        =$3;
	  $matched_SEQ=$6;
	 }
	 return \$matched_SEQ;
}


#______________________________________________________________
# Title     : get_linked_sequence
# Function  : opens msp file and links the sequences according
#             to the matches.
# Usage     :
# Example   : seq1 ------------------------------
#                            |||||||||||
#             seq2        --------------------------------
#             OUT  000000000011111111111000000000000000000
#
# Warning   :
# Keywords  : link_sequence_from_msp_file, linked_sequenced_length
#             get_clustered_sequence_length, get_annexed_sequence_length
#             connect_sequences, merge_sequences, combine_sequences
# Options   : _  for debugging.
#             #  for debugging.
# Returns   : A ref. of an array
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_linked_sequence{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my ($final_leng, $start_diff,@MSP, %seq_sizes, $final_leng);
         my ($max_head_overhang, $head_diff, $tail_diff,
		 $off_set, $max_tail_overhang, @LINKED, $LINKED);
	 my $Threshold=40;
	 for($i=0; $i< @file; $i++){
	  my($input_file) = ${$file[$i]} || $file[$i];
	  if($debug eq 1){ print "\n inputfile is $input_file\n" };
	  unless (-e $input_file){
		  print chr(7);
		  print "\n\n\t This is sub open_msp_files in $0  \n\n";
		  print "\t Fatal: The input file $input_file is not in the directory \n";
	  }
	  my %seq_sizes=%{&open_msp_files(\$input_file, '-s')};
	  my @NAmes=keys %seq_sizes;
	  for($s=0; $s< @NAmes; $s++){ # making '000000000000000.....';
		 my $len=$seq_sizes{$NAmes[$s]};
		 for($t=0; $t< $len; $t++){
			${"$NAmes[$s]"}[$t]=0;
		 }
	  }
	  open(FILE_1,"$input_file");
	  my @MSP=<FILE_1>;
	  close(FILE_1);
	  for($j=0; $j<@MSP; $j++){
		 if($MSP[$j]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\w+) +(\d+) +(\d+) +(\w+) +(.+)/i){
			if(($1 >= $Threshold)&& ($4 eq $7)){
			   push(@matched_members, $4);
			}elsif(($1 >= $Threshold)&& ($4 ne $7)){
			   $matched_segment_count++;
			   if($match_name ne $7){  push(@matched_members, $7);  }
			   $query_start=$2-1;  $query_end  =$3-1;
			   $query_seq  =$4;    $match_start=$5-1;
			   $match_end  =$6-1;  $desc       =$8;
			   $match_name =$7;
			   for($x=$query_start; $x<= $query_end; $x++){
				  ${"$query_name"}[$x]++;
			   }
			   for($y=$match_start; $y<= $match_end; $y++){
				  ${"$match_name"}[$y]++;
			   }
			}
		 }
	  }
	  for($j=0; $j<@MSP; $j++){
		 #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		 #                 $1                $2     $3    $4      $5     $6    $7     $8
		 #                 171     41.18      6      73  HI1690    9      76  HI0736 sodium...
		 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
		 if($MSP[$j]=~/^ *(\d+) +\d+\.?[e\-\d]* +(\d+) +(\d+) +(\w+) +(\d+) +(\d+) +(\w+) +(.+)/i){
			if(($1 >= $Threshold)&& ($4 eq $7)){
			   $query_name=$4;   $query_leng=$3;
			   push(@matched_members, $4);
			}elsif(($1 >= $Threshold)&& ($4 ne $7)){
			   $matched_segment_count++;
			   if($match_name ne $7){  push(@matched_members, $7);  }
			   $query_start=$2-1;  $query_end  =$3-1;
			   $query_seq  =$4;    $match_start=$5-1;
			   $match_end  =$6-1;  $desc       =$8;
			   $match_name =$7;
			}
			@matched_seq_array= @{"$match_name"};
			$matched_seq_array=join('', @matched_seq_array);
			my $start_diff= $query_start - $match_start;
			if($start_diff >= 0){
			   my $tail_diff= $start_diff + $seq_sizes{$match_name} - $seq_sizes{$query_name};
			   if($tail_diff > 0){
				  $max_tail_overhang = $tail_diff if $tail_diff > $max_tail_overhang;
				  for($x=0; $x< $tail_diff; $x++){
					 $tail_start=$seq_sizes{$match_name}-$tail_diff + $x;
					 @matched_seq_array=split(//,$matched_seq_array);
					 $tail_array[$x] +=$matched_seq_array[$tail_start];
				  }
			   }
			}elsif($start_diff < 0){
			   $head_diff = abs($start_diff);
			   $max_head_overhang=$head_diff if $head_diff > $max_head_overhang;
			   for($z=0; $z< $head_diff; $z++){
				  $head_array[$z] += ${"$match_name"}[$z];
			   }
			}
		 }
	  }
	 }
	 @LINKED=( @{"$match_name"}[0..($max_head_overhang-1)], @{"$query_name"}, @tail_array);
	 $LINKED=join('', @LINKED);
	 if($debug eq 1){
	 print __LINE__, " In open_msp_files \%sequence is", %sequence ,"\n";
	 }
	 $final_offset=$extened_number_line - $query_leng;
	 return(\@LINKED);
}

#______________________________________________________________________________
# Title     : get_averaged_prediction
# Usage     : %av_for_back_pred=%{&get_averaged_prediction(\%sec1, \%sec_rv)};
# Function  : The content of out %average is
#               $averaged{$position}=[$residue1, $sec_str2, $dif_reliability];
# Example   :
# Keywords  : get_average_predator_prediction, average_predator_prediction
#             get_averaged_sec_prediction, get_average_prediction
# Options   :
#   $reverse_order_of_one_hash=r by r
#   $give_weight_with_good_match=w by w # this is to give preference to well
#   $weight_factor= by w=
#                                        matching sec. str. I add '0.1'
# Author    : jong@salt2.med.harvard.edu sat@mrc-lmb.cam.ac.uk
# Version   : 1.2
#------------------------------------------------------------------------------
sub get_averaged_prediction{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

     my($position, $residue1, $residue2, %averaged, %hash1, %hash2, $sec_str1,
        $sec_str2, $sum_reliability, $average_reliability, $dif_reliability,
        $reverse_order_of_one_hash, $give_weight_with_good_match, $weight_factor);

     if($char_opt=~/r/){  $reverse_order_of_one_hash='r' }
     if($char_opt=~/w/){  $give_weight_with_good_match='w';     $weight_factor=0.05 }
     if($vars{'w'}=~/(\S+)/){ $give_weight_with_good_match='w'; $weight_factor=$1 }

     for($i=0; $i< @hash; $i++){
         unless(%averaged > 2){
              %hash1=%{$hash[$i]};
              %hash2=%{$hash[$i+1]};
              $i++;
         }elsif( %averaged ){
              %hash1=%averaged;
              %hash2=%{$hash[$i]};
         }

         @keys=sort { $a <=> $b} keys %hash1;
         for($k=0; $k< @keys; $k++){
              $position      =$keys[$k]; ## in case of predator prediction, key is the position of residue
              $rev_posi      =@keys-$position+1;
              unless($reverse_order_of_one_hash){
                  $rev_posi=$position;
              }
              $residue1      =$hash1{$position}->[0];
              $residue2      =$hash2{$rev_posi}->[0];

              if($residue1=~/$residue2/i){ # if they are the same aa
                   $sec_str1      =$hash1{$position}->[1]; ## usually one of 'c h e'
                   $sec_str2      =$hash2{$rev_posi}->[1]; ## usually one of 'c h e'
                   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                   # If both predictions are matching eg)  H->H
                   #________________________________________________
                   if($sec_str1=~/$sec_str2/i){
                       $sum_reliability += ($hash1{$position}->[2] + $hash2{$rev_posi}->[2]);
                       $average_reliability=$sum_reliability/2 + $weight_factor;  # usually 0.1
                       $averaged{$position}=[$residue1, $sec_str1, $average_reliability];
                       $average_reliability=$sum_reliability='';
                   }
                   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                   # If both predictions are NOT matching eg)  E->H
                   #________________________________________________
                   else{
                       if($hash1{$position}->[2] > $hash2{$rev_posi}->[2]){
                            $dif_reliability = ( $hash1{$position}->[2] - $hash2{$rev_posi}->[2]);
                            $averaged{$position}=[$residue1, $sec_str1, $dif_reliability];
                       }else{
                            $dif_reliability = ( $hash2{$rev_posi}->[2] - $hash1{$position}->[2]);
                            $averaged{$position}=[$residue1, $sec_str2, $dif_reliability];
                       }
                   }
              }
         }
     }
     return(\%averaged);
}





#______________________________________________________________
# Title     : get_average_sequence_size
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : get_av_sequence_size, get_average_seq_size
#             get_av_seq_size, average_seq_size, av_seq_size
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub get_average_sequence_size{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my( @OUT_AV, $size, @size, @names, %hash, $sum, $av);
	 for($i=0; $i<@hash; $i++){
	 %hash=%{$hash[$i]};
	 @names=keys %hash;
	 for($j=0; $j<@names; $j++){
		$size=length($hash{$names[$j]});
		push(@size, $size);
	 }
	 for($j=0; $j<@size; $j++){
		$sum+=$size[$j];
	 }
	 $av=int($sum/@names);
	 push(@OUT_AV, $av);
	 }
	 wantarray ? \@OUT_AV : \$OUT_AV[0];
}


#______________________________________________________________
# Title     : get_linux_kernel_version
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : get_kernel_version, kernel_version,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_linux_kernel_version {
	my($image, $version, $i);
	for($i=0; $i< @_; $i++){
		 if(ref($_[$i]) eq 'SCALAR'){
				$image  = ${$_[$i]};
		 }else{
				$image  = $_[$i];
		 }
		 unless(defined($image)){
				if(-e '/vmlinuz'){
					 $image='/vmlinuz';
				}elsif(-e '/boot/vmlinuz'){
					 $image='/boot/vmlinuz';
				}elsif(-f '/boot/bvmlinux' ){
					 $image='/boot/bvmlinux';
				}
		 }
		 print "\n# The final chosn \$image is $image\n";
		 my($str)             = "phlogiston";
		 my($version_start)   = 0;
		 my($version_length)  = 10;

		 open(DATA, $image) or return(undef);
		 #seek(DATA, $version_start, 0);
		 while(<DATA>){
				if(/(\d+\.\d+\.\d+)/){
						$version=$1;
						push(@versions, $version);
						last;
				}
		 }
		 close(DATA);
	}
	if(@versions > 1){
			return(\@versions);
	}else{
			return(\$version)
	}
}



#______________________________________________________________
# Title     : load_mount_info
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub load_mount_info {
	undef %mounted;
	undef %fs_type;

	open(MTAB, "</etc/mtab") or die "Can't read /etc/mtab: $!\n";
	while (<MTAB>) {
	my($dev, $mp, $type) = split;
	next if $dev eq 'none';
	$mounted{$dev} = $mp;
	$mounted{$mp}  = $dev;
	$fs_type{$dev} = $type;
	}
	close(MTAB);
}



#______________________________________________________________
# Title     : plot_vertically
# Usage     : &plot_vertically(\@query);
# Function  : This is a sub used for plot_domains.pl for
#             genome_analysis
# Example   :
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub plot_vertically{
	@numbers=@{$_[0]};
	print "\n    |===================================================\>\n";
	for($i=0; $i<@numbers;$i++){
	 printf ("%-4d\|", $i);
	 print "\*"x$numbers[$i], "\n";
	}
	print "    |===================================================\>\n";
}

#______________________________________________________________
# Title     : plot_histogram_horizontally
# Usage     : &plot_horizontally(\@query);
# Function  :
# Example   :
#  Input: $input= '00001111111113333333333444444444111111111111111';
#
#  Output:
#   00001111111113333333333444444444111111111111111
#   1-------------------------------------------47
#  |
#  |
#  |                       *********
#  |             *******************
#  |             *******************
#  |    *******************************************
#  |-----------------------------------------------
#
# Warning   :
# Keywords  : plot_horizontally, plot_numbers_horizontally, plot,
#             plot_numbers,
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------
sub plot_histogram_horizontally{
	my @numbers=@{$_[0]};
	my $leng= @numbers;
	my ($largest, @inversed, $m, $i);
	for($i=0; $i< @numbers; $i++){
	$largest=$numbers[$i] if $largest < $numbers[$i];
	}
	for($i=0; $i< @numbers; $i++){ # this inverse the digits
	$inversed[$i]=abs($numbers[$i]-$largest);
	}
	print "\n ", @numbers;
	print "\n 1", "\-"x($leng-4),$leng;
	print "\n\|";
	print "\n\|";

	for($m=0; $m< $largest; $m++){
	print "\n\|";
	for($i=0; $i<@inversed;$i++){
	   if($inversed[$i] > 0){
		 print " ";
		 $inversed[$i]--;
	   }else{
		 print "\*";
	   }
	}
	}
	print "\n\|", "\-"x@numbers;
	print "\n";
}




#______________________________________________________________
# Title     : condense_number_string
# Usage     :
# Function  : condenses the numbers by making an average with
#             given factor. If the factor is 2 on number seq
#              1334284425 , result will be 23543
#              133428442  ,                23541 <-- preserved end
#             Factor 3 =>
#              133428442  , (1+3+3)/3 = 2
#                           (4+2+8)/3 = 4,,,
# Example   : @output=@{&condense_number_string(\@input, $factor)};
#             with @input=qw(1 2 4 10 10 22 2 3 44 2 3); and $factor=3
# Warning   :
# Keywords  : compact_number_string, compact_digits, condense
#             condense_string
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub condense_number_string{
	my @ARRAY=@{$_[0]};
	my $factor = ${$_[1]} || $_[1];
	my ( $i, $j, @out );
	unless(defined($factor)){ $factor=1 }
	for($i=0; $i< @ARRAY; $i+=$factor){
	  my $temp_sum;
	  for($j=$i; $j < ($factor+$i); $j++){
		$temp_sum+=$ARRAY[$j]
	  }
	  push(@out, int($temp_sum/$factor) );
	}
	return(\@out);
}


#___________________________________________________________
# Title     : get_seq_fragments
# Usage     : @seq_frag=&get_seq_fragments(\%msf, @RANGE);
# Function  : gets sequence(string) segments with defined
#             ranges.
# Example   :
#  %test=('seq1', '1234AAAAAAAAAAAaaaaa', 'seq2', '1234BBBBBBB');
#  @range = ('1-4', '5-8');
#
#  %out = %{&get_seq_fragments(\%test, \@range)};
#  %out => (seq1_5-8   AAAAA
#           seq2_5-8   BBBBB
#           seq1_1-4    1234
#           seq2_1-4    1234 )
#
# Warning   :
# Keywords  : get_sequence_fragments,
# Options   : _  for debugging.
#             #  for debugging.
#             l=  for min seqlet length
#             r  for adding ranges in the seq names
#
# Returns   :
# Argument  :
# Category  :
# Version   : 1.8
#-------------------------------------------------------
sub get_seq_fragments{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 my $min_seqlet_size=10;
	 my @vars=keys %vars;
	 my $no_range_in_name=1;
	 for($i=0; $i< @vars; $i++){
	   if($vars[$i] eq 'l'){
		  $min_seqlet_size=$vars{$vars[$i]};
	   }
	 }
	 if($char_opt=~/v/){ print "\n \$char_opt is $char_opt  @char_opt\n"; }
	 if($char_opt=~/n/){ $no_range_in_name = 1 }
	 if($char_opt=~/r/){ $no_range_in_name = 0 }

	 print "\nget_seq_fragments \$no_range_in_name is $no_range_in_name \n";
	 for($i=0; $i< @hash; $i++){
	 my (%out_frag, $frag_name, $range_start, $range_end, @out_hash);
	 my %seqs = %{$hash[$i]};
	 my @names = keys %seqs;
	 if(@names==1){
	    for($j=0; $j < @names; $j++){
		   my $seq_name = $names[$j];
		   my $seq = $seqs{$seq_name};
		   for($k=0; $k< @range; $k++){
			  my $range = $range[$k];
			  if($no_range_in_name==1){
				 $frag_name = "$seq_name";
			  }else{
			     $frag_name = "$seq_name\_$range";
			  }
			  #if(length($frag_name)>14 ){
			  #	 $frag_name ='x'."${j}_${range}";
		      #}
			  ($range_start, $range_end)=$range=~/(\d+\.?\d*)\-(\d+\.?\d*)/;
			  my $frag_len = $range_end-$range_start+1;
			  if($frag_len < $min_seqlet_size){
			     next;
			  }
			  my $fragment = substr($seq, $range_start-1, $frag_len);
			  $out_frag{$frag_name}=$fragment;
		   }
		}
		push(@out_hash,  \%out_frag);
	 }elsif(@names > 1){
	    for($k=0; $k< @range; $k++){
		  my %out_frag=();
	      my $range=$range[$k];
		  ($range_start, $range_end)=$range=~/(\d+\.?\d*)\-(\d+\.?\d*)/;
	      my $frag_len = $range_end-$range_start+1;
		  if($frag_len < $min_seqlet_size){
		     next;
		  }
	      for($j=0; $j < @names; $j++){
	         my $seq_name=$names[$j];
			 my $seq = $seqs{$seq_name};
		     if($no_range_in_name==1){
				 $frag_name = "$seq_name";
			 }else{
			     $frag_name = "$seq_name\_$range";
			 }
			 #if(length($frag_name)>15 ){
			 #	$frag_name ='x'."${j}_${range}";
		     #}
			 if($range_start==0){ $range_start++; } ## This is a bugfix
			 my $fragment = substr($seq, $range_start-1, $frag_len);
			 $out_frag{$frag_name}=$fragment;
		  }
		  push(@out_hash, \%out_frag);
		}
	 }
	 }
	 if(@out_hash > 1){ return(@out_hash)
	 }elsif(@out_hash==1){ return($out_hash[0]) }
}




#________________________________________________________________________
# Title     : make_standalone_subroutines
# Usage     : &make_standalone_subroutines(@ARGV);
# Example   : &make_standalone_subroutines(@ARGV);
# Function  : Creates each subroutine derived xxx.pl file from Bio.pl or any
#             given library file. If there is a file for a sub already, it
#             skips.
# Class     : Utility
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------
sub make_standalone_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($each_sub, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found);
	$|=1;
	for($i=0; $i < @file; $i++){
		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		my @lib =<LIB_FILE>;
		FOR: for($j=0; $j < @lib; $j++){
				  my (%out_subs, $each_sub);
				  my $title_found;
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#+[_\-\*]{10,120} *$/)
					 &&($lib[$j+1]=~/^(# *title *: *([\w\-]+))[^\.pl]/i) ){
					  $each_sub=$2;
					  $title_found =1;
					  if( (-s "$each_sub\.pl") > 200 ){
						  print (-s "$each_sub\.pl"), "   ";
						  print "  $each_sub", " exists \n";
						  next FOR;
					  }elsif((-s "$each_sub\.pl") <= 200){
					     my $temp;
					     open (TEMP, "<$each_sub\.pl");
					     while(<TEMP>){
					        if(/^#[_\-\*]{10,120} *$/){ $temp++ }
					        elsif(/^# *title *: *[\w\-]+[^\.pl]/i ){
					           $temp++;
					        }elsif(/^# *\w+/){
					           $temp=$temp+0.5;
					        }
					     }
					     if($temp >2){
					        next FOR;
					     }
					  }

					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||($lib[$j]=~/^#---+ *$/) ||
							 ($lib[$j]=~/^#_____+ *$/) || ($lib[$j]=~/^#\*\*+ *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;    ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if( ($title_found==1)&&($lib[$j]=~/^sub +([\w\-]+) *\{/) ){
				     $each_sub=$1;
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  if($lib[$j]=~/^sub +([\w\-]+) *\{.+\}/){
						  goto WRITE;
					  }
					  $j++;
					  until($lib[$j]=~/^\}/){
					     $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
					  $j++;

					  WRITE:
				     open (EACH_FILE, ">$each_sub\.pl");
				     print EACH_FILE  "#\!\/perl\n";
				     print EACH_FILE  "# Made by $0 at: ", `date`, "\n";
				     print EACH_FILE $out_subs{$each_sub};
		           close EACH_FILE;
		           %out_subs=();
		           #chmod

				  }
			  }
	}#""""""""""""" end of for (@file)
	close LIB_FILE;
}



#___________________________________________________________
# Title     : is_html
# Usage     :
# Function  : Checks if it is an html file.
# Example   : $html=&is_html(\@test);
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-------------------------------------------------------
sub is_html{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $html=0;
	if( @string >0 ){
	 for($i=0; $i<@string; $i++){
		if($string[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			$html_head=1;
		}if($string[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			$html_end=1;
		}
	 }
	 if( ($html_head eq $html_end)&&($html_end=1)){
		$html=1;
	 }
	 if($debug==1){ print "\n \@string is @string\n"; }
	}elsif(@file>0){
	 for($i=0; $i< @file; $i++){
		my $all_lines;
		open(F, "$file[$i]");
		while(<F>){
		  $all_lines.=$_;
		}
		print "\n All the lines of $file[$i] is $all_lines\n" if $debug==1;
		if($all_lines =~/\< *HTML *\>/i){
		  if($all_lines=~/\< *\/ *HTML *\>/i){
			  $html=1;
			  print "\n html matched $html\n" if $debug ==1;
		  }
		}
	 }
	}elsif( @array>0 ){
	 @arr = @{$array[$i]};
	 for($i=0; $i< @arr; $i++){
		 if($arr[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			$html_head=1;
		 }if($arr[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			$html_end=1;
		 }
	 }
	 if( ($html_head eq $html_end)&&($html_end=1) ){
		$html=1;
	 }
	}elsif(@hash>0){
	 for($i=0; $i< @hash; $i++){
		@hash=%{$hash[$i]};
		for($i=0; $i< @hash; $i++){
			if($hash[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			  $html_head=1;
			}if($hash[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			  $html_end=1;
			}
		}

	 }
	}
	return($html);
}

#___________________________________________________________________
# Title     : get_column
# Usage     : &get_column(\@ar, 1,2 ,3);
#             &get_column(\%ha, 1,2 ,3);
#             &get_column(@ARGV);
#             # where prompt is like: column.pl temp.txt 1 2 3 4
# Function  : Prints any specified columns, can change order of them,
#             can filter values of columns to filter (max or min value)
#             Skipps blank line.
# Example   : For getting only necessary columns
#             Input: %Hash=(1, 'col1 col2 col3',
#                           2, 'col1 col2 col3',
#                           3, 'col1 col2 col3');
#             input format: &get_column(\%Hash, 3,2,1, 'k'); # k is opt
#             Ouput format: STDOUT as
#
#                1     col3 col2 col1
#                2     col3 col2 col1
#                3     col3 col2 col1
#
# Keywords  : columns, column.pl, column, get_columns, take_columns,
# Options   : #  for debugging.
#             _  for debugging.
#             k  for Key print when hash input is given.
#             n  for no first line display(Handy when you have title line
#                                          and wanna remove it)
#             ?max?=xxx for filtering column numbers by maximum of xxx
#             ?min?=yyy for filtering column numbers by minimum of yyy
#                      (eg, min4=100000 means 4th column minimum is 100000)
#                      (eg, 1min4=10, 2min3=10, means get 4th column values
#                           below 10 as the first output column. Get 3rd
#                           column values below 10 as the second out column.
#
# $combine = 1 by -c c   # c is for combining columns in different files
# $ignore  = 1 by -i i   # i is for ignoring leng diff in columns over 1 input
#
# Returns   : Ref of
# Argument  : Ref of Hash, Array or just filename, and wanted column numbers.
# Category  :
# Version   : 1.6
#---------------------------------------------------------------
sub get_column{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (%whole, $previous, @out, @columns);
	my $len =4;
	my @v_keys= sort keys %vars; ## To be able to exchange order of column.
	if($char_opt=~/i/i){
	    $ignore =1;
	}elsif($char_opt=~/c/i){
	    $combine=1;
	}

	if(@v_keys > 0){
	   for($i=0; $i< @v_keys; $i++){
	      if($v_keys[$i]=~/\d*(m..)(\d+)/i){
		 $M=$1.$2;
		 ${"$1$2"}= $vars{$&};
	         push(@columns, $2);
	      }
	   }
	}else{
	   @columns=@num_opt;
	}
	my $troubled_column ;

	if((@num_opt==0)&&(@file>1)){ $combine=1 }; # when no column num. is given assume $combine

	#""""""""""" When combine option is set """"""""""""""""""""""
	if(($combine==1)&&(@file > 1)){
	   for ($f=0; $f<@file; $f++){
		open(IN, "$file[$f]"); # real showing is now.
		my @all_lines=<IN>;
		if((@all_lines != $previous)&&($ignore !=1)){
		    print "\n The column lengths do not match in the inputs\n";
		    print "\n you can use -i option \n";
		    exit;
		}
		$previous=@all_lines;
		for($w=0; $w< @all_lines; $w++){
		    if($all_lines[$w]=~/^[\t ]*$/){ next }  # skipping blank line
		    chomp($all_lines[$w]);
		    $out[$w].="$all_lines[$w] ";
		}
		close IN;
	   }
	   push(@array, \@out);
	}

	###### File is given as input #######""""""""""""""""""""""""""""""""""""""
	if((@file >=1)&&(@array < 1)){
  	   my $file;
	   for $file(@file){
		 my ($line_num, $line_read);
		 my $change=0.1;
		 open(IN, "$file");
		 my @all_lines=<IN>;
                 for($q=0; $q < @all_lines; $q++){ # This open is only for getting largest column width size
		     $line_read++;
                     @splited=split(/ +/, $all_lines[$q]);
		     $l=${&get_longest_str_size(\@splited)};
		     if($l>$len){ $len=$l; $change++ }
		     if( ($line_read/$change) > 50 ){ last } # this is to check the column
		 }                                          # consistency and stops after some
		 if($debug==1){
		     print "\n$line_read lines read to get right column size\n";
		 }
		 close(IN);
		 my $line_counter;
		 for($x=0; $x < @all_lines; $x++){
                    if($all_lines[$x]=~/^[\t \#]*$/){ next }  # skipping blank line and comment lines
                    if($all_lines[$x]=~/^#/){ next }  # skipping  comment lines
		    $line_counter++;
		    if(($char_opt=~/n/i)&&($line_counter==1)){ next } ## NO title #
		    if(@M=$all_lines[$x]=~/(\S+)/g){
			  if(@columns < 1){
			    for($n=0; $n< @M; $n++){
			      $columns[$n]=$n+1;
			    }
			  }

	          for($r=0; $r< @columns; $r++){ # columns is from num_opt which is given at prompt (like 3 2 1)
				  $col =$columns[$r]-1;  #
				  if( defined( ${"max$col"} ) &&    # when max or min is defined
						defined( ${"min$col"} ) ){
						if( ( ${"max$col"}  > $M[$col])&&
	                   ( ${"min$col"}  < $M[$col]) ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
						if(  ${"max$col"} > $M[$col] ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"min$col"}) ){
						if(  ${"min$col"} < $M[$col] ){
							printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							printf "%-${len}s ";
						}
				  }else{
						printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
				  }
	          }
	          print "\n";
			}
		 } #for

	  }
	}    ###### Array is given as input #######""""""""""""""""""""""""""""""""""
	elsif(@array>0){  # if input is ('x y xx y','k t yy zz',,,,)
	  for($t=0; $t<@array; $t++){
             my @arr=@{$array[$t]};
	     print "\n Array input\n" if $debug ==1;
		 for($i=0; $i<@arr;$i++){
		   @splited=split(/ +/,$arr[$i]);
		   $l=${&get_longest_str_size(\@splited)};
  		   $len=$l if $l>$len;
	    }
	    for($i=0; $i< @arr; $i++){
			if($arr[$i]=~/^[\t ]*$/){ splice(@arr, $i, 1); $i--; next }
			if(($char_opt=~/n/i)&&($i==0)){ next } # skipping the first line
			if(@M=$arr[$i]=~/(\S+)/g){
			   if(@columns < 1){
				  for($n=0; $n< @M; $n++){
					 $columns[$n]=$n+1;
				  }
			   }
	           for($j=0; $j< @columns; $j++){
				  $col =$columns[$j]-1;  #

				  if( defined( ${"max$col"} ) &&    # when max or min is defined
						defined( ${"min$col"} ) ){
						if( ( ${"max$col"}  > $M["$col"] )&&
	                   ( ${"min$col"}  < $M[$col]) ){
							 printf "%-${len}s ",$M["$col"]  unless($M["$col"]  eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
						if(  ${"max$col"} > $M["$col"]  ){
							 printf "%-${len}s ",$M["$col"]  unless($M["$col"]  eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"min$col"}) ){
						if(  ${"min$col"} < $M["$col"]  ){
							printf "%-${len}s ",$M["$col"]  unless($M["$col"] eq '');
						}else{
							printf "%-${len}s ";
						}
				  }else{
						printf "%-${len}s ",$M["$col"] unless($M["$col"] eq '');
				  }

	           }
	           print "\n";
			}
	    }
	  }
	}  ##### Hash is given as input #######""""""""""""""""""""""""""""""""""
	elsif(@hash>0){
	  my @arr;
	  for($h=0; $h<@hash; $h++){
		  my @array=values %{$hash[$h]};
		  my @keys =keys %{$hash[$h]};
		  for($i=0; $i< @array; $i++){ # getting the longest str size
			 @arr=split(/ +/,$array[$i]);
			 $l=${&get_longest_str_size(\@arr)};
			 $len=$l if $l>$len;
		  }
		  for($i=0; $i< @array; $i++){
		    if($array[$i]=~/^[\t ]*$/){ splice(@array, $i, 1); $i--; next }
		    if(($char_opt=~/n/i)&&($i==0)){ next } #  skipping the first line
		    printf "%-10s", $keys[$i] if($char_opt=~/k/i); ## Option for key printing
		 	 if(@M=$array[$i]=~/(\S+)/g){
			   if(@columns < 1){
				  for($n=0; $n< @M; $n++){
					 $columns[$n]=$n+1;
				  }
			   }
	           for($j=0; $j< @columns; $j++){

	              $col =$columns[$j]-1;

				  if( defined( ${"max$col"} ) &&    # when max or min is defined
						defined( ${"min$col"} ) ){
						if( ( ${"max$col"}  > $M[$col])&&
	                   ( ${"min$col"}  < $M[$col]) ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"max$col"}) ){ #--- When max and min are not defined.----#
						if(  ${"max$col"} > $M[$col] ){
							 printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							 printf "%-${len}s ";
						}
				  }elsif( defined(${"min$col"}) ){
						if(  ${"min$col"} < $M[$col] ){
							printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
						}else{
							printf "%-${len}s ";
						}
				  }else{
						printf "%-${len}s ",$M[$col] unless($M[$col] eq '');
				  }

	           }
	           print "\n";
			}
		  }
	  }
	}
}






#____________________________________________________________________
# Title    : write_sdb_file
# Function : gets a hash ref. and writes the SDB file with 'sprintf'
# Usage    : @out=@{&write_sdb_file(\%seq)};
# Example  : @out=@{&write_sdb_file(\%seq, 'v')};  ## for STDOUT as well
#    ___________________________________________________________________________
#    Title      : EST_YEAST.sdb
#    Full Name  : Telomerase_yeast_699aa
#    Nicknames  :
#    EMBL       :
#    PDB        :
#    Swissprot  :
#
# Argument : \%ref_of_seq
# Keywords : write_sdb
# Returns  :
# Options  : v  for verbose representation. This will print boxes on STDOUT
#            n  for no '#' leader.
#            e  for Endline( '-----------------------------..' )
# Version  : 1.1
#---------------------------------------------------------------
sub write_sdb_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($Commont_Symbol, $delimiter, $Enclosed_came, $end_found, $end_line, $entry,
	  $length, $line,  $name, $name_found, $name_found, $num,
	  $original_dir, $output, $out_string, $pre, $pwd, $start_line, $string, $string1,
	  $temp, $title_found, $type_DSSP, @arg_output, @Final_out, @k, @keys, @names, @out, @out_hash,
	  @out_hash_final, @output_box, @outref, @read_files, @str1, @str2,  @string1,
	  %correct_head_box_entry, %Final_out, %hash, %input, %out_hash_final
	 );
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($Entry_length) =9 ;
	my($VL) =60; ## key length( like in  # Title )  ## value length
	$num    =80;
	if($char_opt =~ /n/i){
	  $Commont_Symbol=' '; ## Comment symbol. For help display, you can change into ' '
	}else{
	  $Commont_Symbol='';   #  Comment symbol. Default head_box display.
	}
	for($x=0; $x < @hash; $x++){
		my(%input) = %{$hash[$x]};  my(@keys)= sort (keys %input); my(@out);

		#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
		##  PUTTING an order in the printout entries. To make 'Title' come first
		#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
		for($i=0; $i < @keys; $i++){
		  if($keys[$i]=~/^Title/i){
			  $temp=$keys[0];	   $keys[0]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Enclosed?/i){
			  $temp=$keys[$#keys];	   $keys[$#keys]=$keys[$i];   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Usage$/i){
			  $temp=$keys[1];	   $keys[1]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Function/i){
			  $temp=$keys[2];	   $keys[2]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Example/i){
			  $temp=$keys[3];	   $keys[3]=$keys[$i];	   $keys[$i]=$temp;
		  }elsif($keys[$i]=~/^Version/i){
			  $temp=$keys[$#keys-2];  $keys[$#keys-2]=$keys[$i];   $keys[$i]=$temp;
			  #### To make null version value to '1.0'
			  if($input{$keys[$#keys-2]}=~/^ *$/){ $input{$keys[$#keys-2]}='1.0'; }
		  }elsif($keys[$i]=~/^Warning/i){
			  $temp2=$keys[$#keys-1]; $keys[$#keys-1]=$keys[$i];   $keys[$i]=$temp2;
		  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		##       Writing starting line
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my($start_line) = "$Commont_Symbol".'_'x"$num"."\n";
		if( $char_opt =~ /v/i){
			print $start_line;      } # Prints to STDOUT,

		my($Enclosed_came);  ## <<-- This should be HERE !
		$Entry_length=${&get_longest_str_size(\@keys)};
		for( $i =0; $i < @keys; $i++){  #### @keys has been sorted before.
		  my($Len) = length($input{$keys[$i]});
		  my $delimiter = ':';
		  my($entry) = $keys[$i];
		  $entry =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e; ## capitalizing word
		  if($entry=~/^Enclosed?$/i){ $Enclosed_came = 1; }
		  my(@input) = split(/\n+/, $input{$keys[$i]});
		  if(@input > 0){
			  for($j =0; $j < @input; $j++){
				 ## If NO entry name(blank) is given    ##
				 if($j > 0){  ## If the value is a multi line.
					  $entry = '';   $delimiter=' ';    }
				 if( $char_opt =~ /v/i){
					  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
					  ###  This is to reduce the entry length of Enclosed content lines   ##
					  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
					  if( ($Enclosed_came==1)&&($entry eq '') ){
						  $Entry_length=2; $VL=80; }
					  printf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry , $input[$j]);
				 }
				 if(($Enclosed_came==1)&&($entry eq '')){ $Entry_length=2; $VL=80; }
				 $out[$k++]=sprintf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry,$input[$j]);
				 if($entry=~/^Enclosed?/){ $Enclosed_came = 1; }   }}
		  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		  ##   If the entries have null descriptions, just print entries  ######
		  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		  elsif(@input ==0){
				 if( $char_opt =~ /v/i){
					 printf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry, ' ');   }
				 $out[$k++]=sprintf("$Commont_Symbol %-${Entry_length}s $delimiter %-${VL}s\n", $entry, ' ');
		  }
		}
		############################################################
		##       Writing  Ending  line                            ##
		############################################################
		$end_line = "$Commont_Symbol".'-'x"$num"."\n";
		if( ($char_opt =~ /v/i)&&($char_opt =~ /e/i) ){  print $end_line;  }
		if( $char_opt =~ /e/i){  push(@out, $end_line)   }
		unshift(@out, $start_line);
		push(@Final_out, \@out);
	}
	if(@Final_out > 1){ @Final_out; }
	elsif( @Final_out==1){ $Final_out[0] }
} #<--- END of write_sdb_file
#________________________________________________________________________
# Title     : push_if_not_already
# Usage     : @out=@{&push_if_not_already(\@mother_array, \@adding_array )};
#             @out=@{&push_if_not_already(\@mother_array, $adding_scalar)};
# Function  : returns ref. of an array for a list of non-repetitive entry.
# Example   :
# Warning   :
# Keywords  : add_if_not_already, add_element_if_not_already, if_not_already
#             add_element_if_not_already, push_element_if_not_already,
#             if_no_already_push, put_element_if_not_already, add_new_element
#             add_new_items_only, push_new_items_only, push_new_elements_only
#             put_if_not_already,
# Options   :
# Returns   : a ref. of an array.
# Argument  : two references. The first should be an array ref. The 2nd can be either
#             scalar or array reference.
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------
sub push_if_not_already{
	my($already_in, $already, $i, @push_items_given);
	my(@out_array)=@{$_[0]};
	if(ref($_[0]) ne 'ARRAY'){ print "\n push_if_not_array need ref\n"; exit; }
	push(@push_items_given, ${$_[1]}) if(ref($_[1]) eq 'SCALAR');
	@push_items_given=@{$_[1]} if(ref($_[1]) eq 'ARRAY');
	for $already (@out_array){  ## This for is to remove repetitive
	  for ($i=0; $i< @push_items_given; $i++){
		 if($already eq $push_items_given[$i]){ splice(@push_items_given,$i, 1); $i--; }
	  }
	}
	push(@out_array, @push_items_given);
	return(\@out_array);
}

#_______________________________________________________________
# Title     : compare_sec_template_with_db
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : sec structure mapping, map sec str, map_sec_structure
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub compare_sec_template_with_db{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	$ref_target_hash = shift @hash;
	%target = %{$ref_target_hash};
	@names = keys %target;
	$name_target = $names[0];
	if($name_target =~/\w+(\d+)/){ $ori_target_seq_len = length($1); }
	@target_frag = split(/ +/, $target{$name_target} );

	for($i =0; $i< @hash; $i ++){
	 %db=%{$hash[$i]};
	 @db_name = keys %db;
	 for($j=0; $j < @db_name; $j ++){
		$name = $db_name[$j];
		@db_frag = split( / +/, $db{$name} );
		for($k=0; $k < @target_frag; $k ++){
		  if( ($target_frag[$k]=~/H(\d+)/i)&&($db_frag[$k]=~/H(\d+)/i) ){
			 $simple_match_output{$name}++;
			 $leng_diff = abs($1 - $2)/15;
			 $simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }elsif( ($target_frag[$k]=~/E(\d+)/i)&&($db_frag[$k]=~/H(\d+)/i) ){
			 $simple_match_output{$name}--;
			 #$leng_diff = abs($1 - $2)/10;
			 #$simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }elsif( ($target_frag[$k]=~/H(\d+)/i)&&($db_frag[$k]=~/E(\d+)/i) ){
			 $simple_match_output{$name}--;
			 #$leng_diff = abs($1 - $2)/10;
			 #$simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }elsif( ($target_frag[$k]=~/E(\d+)/i)&&($db_frag[$k]=~/E(\d+)/i) ){
			 $simple_match_output{$name}++;
			 $leng_diff = abs($1 - $2)/15;
			 $simple_match_output{$name} = $simple_match_output{$name}- $leng_diff;
		  }
		}
	 }
	}
	return(\%simple_match_output);
}



#___________________________________________________________________
# Title     : get_peptide_occurance
# Usage     :
# Function  : gets the number of occurances of peptide(with given size) for
#             any number of sequences given.
# Example   : %stat=%{&get_peptide_occurance(\%pro_sequence, $size)};
#              while %pro_sequence has one or more sequences like
#              seq1 AAAAAAAAAAAA, seq2 BBBBBBBBBBBBBB, ...
#              $size is number. For dipeptide=2, tripeptide=3, tetrapep=4...
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  : eg=> (\%ref_hash, 4)
# Category  :
# Version   : 1.2
#---------------------------------------------------------------
sub get_peptide_occurance{
	 my($k, $i, $s, $peptide, $pep_entry_num, @name, %stat);
	 %all=%{$_[0]};
	 $size=$_[1];
	 @name=keys %all;
	 for($k=0; $k<@name; $k++){
		if($all{$name[$k]}=~/[BZX]/i){ next;
		}else{
			$seq_leng += length($all{$name[$k]});
	      my @seq=split(//, $all{$name[$k]});
			my $all_occur_pep;
	      for($i=0; $i< (@seq-($size-1)); $i ++){
	        my $peptide;
	        for($s=0; $s < $size; $s++){
		       $peptide .= $seq[$i+$s];
	        }
	        $stat{$peptide}++;
			  $all_occur_pep ++;
	        print "\n$peptide  $stat{$peptide}" if $debug==1;
	        $pep_entry_num=keys %stat;
			  if( ($debug==1)&&($pep_entry_num%100 == 0 ) ){
				  print "\n Present peptide entries are:  $pep_entry_num  out of $all_occur_pep residues \n";
			  }
	      }
		 }
	 }
	 return(\%stat);
}


#___________________________________________________________________
# Title     : open_lottery_file
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#---------------------------------------------------------------
sub open_lottery_file{
	my ($i, @lines, @bin1, @bin2, @bin3, @bin4, @bin5, @bin6, @bonus, @allbins);
	open(F, "$_[0]");
	if($_[1] =~/\-a/i){ $single_array_return=1 };
	@lines = <F>;
	for($i=0; $i< @lines; $i++){
		if($lines[$i]=~/ {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,3}(\d\d) {1,2}(\d\d)/){
	push(@bin1, $1);
	#print $1, "\n";
	push(@bin2, $2);
	push(@bin3, $3);
	push(@bin4, $4);
	push(@bin5, $5);
	push(@bin6, $6);
		  push(@bonus, $7);
		}
	}
	if($single_array_return == 1){ @allbins=( @bin1, @bin2, @bin3, @bin4, @bin5, @bin6, @bonus); }
	else{  return(\@bin1, \@bin2, \@bin3, \@bin4, \@bin5, \@bin6, \@bonus); }
}


#_________________________________________________________________
# Title     : get_probable_half
# Usage     :
# Function  : This produces a hash ref. which is supposed to be most probable
#             according to the given array. It divides array into halves
#             gets the more probable half until it gets one single number.
# Example   :
# Warning   :
# Keywords  : get_frequent_halves,
# Options   :
# Returns   :
# Argument  : \@array
# Category  :
# Version   : 1.0
#-----------------------------------------------------------------
sub get_probable_half{
	my (%hash, $i, @keys, @values);
	%hash=%{$_[0]};
	@keys=  sort {$a<=$b} keys   %hash;
	@values= values %hash;
	print "\n Hash is ",%hash,"\n";
	if(@keys == 1){
		 return(\%hash); last;
	}elsif(@keys >1){
		if((@keys % 2) != 0){ # make the number even
			 $keys[@keys]=$keys[$#keys];
		}
		@first_half=@keys[0..(@keys/2-1)];
		@second_half=@keys[(@keys/2)..$#keys];
		print "@first_half", "\n";
		print "@second_half", "\n";
		my($sum1, $sum2, %hash1, %hash2);
		for($i=0; $i<(@keys/2); $i++){
			 $sum1 +=$hash{$first_half[$i]};
			 $sum2 +=$hash{$second_half[$i]};
			 $hash1{$first_half[$i]} =$hash{$first_half[$i]};
			 $hash2{$second_half[$i]}=$hash{$second_half[$i]};
		}
		if($sum1 > $sum2){ &get_probable_half(\%hash1);}
		else{  &get_probable_half(\%hash2);}
	}
}

#_______________________________________________________________
# Title     : divide_array
# Usage     : &show_array(&divide_array(\@input, 6));
# Function  : divides any array to the denominator given.
#             If you give array of 100 elem, with 5, you will
#             get 5 arrays with 20 elem each.
# Example   :
# Warning   :
# Keywords  : split_array_into_pieces, split_array, chop_array,
#             fragment_array,
# Options   : s=  for dividing the array with sub array size
#                 eg) to get 20 elem length sub arrays from
#                     a big array
#                     @ar_ref=@{&divide_array(\@array, 's=20')};
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#-----------------------------------------------------------
sub divide_array{
	 my ($size,$remaining, $size_div, $s);
	 my @array = @{$_[0]};
	 my  @final_array_ref=();
	 if(ref($_[1])){
		if(${$_[1]}=~/^\d+$/){
		   $denominator = ${$_[1]};
		}elsif(${$_[1]}=~/s=(\d+)$/i){
	       $size=$1;
		   $size_div=1;
	    }
	 }elsif($_[1]=~/^\d+$/){
	    $denominator = $_[1];
	 }elsif($_[1]=~/s=(\d+)$/i){
		$size=$1;
		$size_div=1;
	 }

	if((@_ ==1)&&($denominator == 0)){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}

	if($size_div==1){
	   while(@array){
		  push(@final_array_ref, [splice(@array, 0, $size)]);
	   }
	}else{
	   my $frag_ar_size = int(@array/$denominator);
	   if($debug eq 1){ print "\n Frag arr size is :  $frag_ar_size \n" }
	   $remaining = @array % $denominator;
	   if($debug eq 1){ print "\n Remnant elem size is : $remaining \n" }
	   for($i=0; $i < $denominator; $i++){
		  if($remaining > 0){
			  push(@final_array_ref, [splice(@array, 0, ($frag_ar_size+1),)] );
		      $remaining --;
		  }elsif(($remaining == 0)&&(@array>0)){
			  push(@final_array_ref, [splice(@array, 0, ($frag_ar_size),)] );
		  }
	   }
	}
	return(\@final_array_ref);
}



#__________________________________________________________________________
# Title     : split_fasta_files
# Usage     : @names_of_single_files=@{&split_fasta_files(\@files)};
# Function  :
# Example   :
# Keywords  : divide_fasta_files, split_fasta_db_files, divide_fasta_db_files
#             make_single_fasta_files, write_single_fasta, write_single_fasta_files
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#----------------------------------------------------------------------------
sub split_fasta_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my (@each_single_seq_names, $out_single_fasta_file_name );
	for($i=0; $i< @file; $i++){
		open(FASTA, "$file[$i]");
		while(<FASTA>){
		   if(/\> *(\S+)/){
			   $out_single_fasta_file_name="$1\.fa";
			   open(OUT_SINGLE, ">$out_single_fasta_file_name");
			   print OUT_SINGLE $_;
			   push(@each_single_seq_names, $1);
		   }elsif(/\w+/){
			   print OUT_SINGLE $_;
		   }

		}
		close OUT_SINGLE;
	}
	return(\@each_single_seq_names);
}



#______________________________________________________________________________
# Title     : split_files
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
#  $division_factor= by d=
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub split_files{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

     my($division_factor, $base_name);
     $division_factor=2;
     if($vars{'d'}=~/(\d+)/){ $division_factor=$1 };

     for($i=0; $i< @file; $i++){
         open(FILE, $file[$i]);
         $base_name=${&get_base_names($file[$i])};
         $extension=${&get_extension_names($file[$i])};
         $total_line_num=@lines=<FILE>;
         $splited_frag_size=int(@lines/$division_factor);
         for($j=0; $j< $division_factor; $j++){
             #$frag_file="$base_name\_s${j}\.split${j}";
             $frag_file="$base_name\_s${j}\.$extension";
             push(@splited_files, $frag_file);
             open(FRAGMENT_FILE, ">$frag_file");
             for($k=0; $k<= $splited_frag_size; $k++){
                 print FRAGMENT_FILE $lines[$k];
             }
             splice(@lines, 0, $splited_frag_size);

         }
         print FRAGMENT_FILE @lines; # Writing the very last remaining lines
         close (FRAGMENT_FILE);
     }
     return(\@splited_files);
}


#_______________________________________________________________
# Title     : split_sequence
# Usage     : %out=%{&split_sequence(\%input, 2 )};
# Function  : divides any string to the denominator given.
# Example   : &show_array( &divide_string(\%input, 3) );
#              while  $input is 'seq', '12345789ABCDEFHIJKLMN'
#              The output will be 'seq_1_half', '1234578'
#                                 'seq_2_half', '9ABCDEF'
#                                 'seq_3_half', 'HIJKLMN'
# Warning   :
# Keywords  : divide_string, split_string, chop_string, divide_sequence
#             split_sequence(look at separate split_sequence sub),
# Options   :
#  $reverse_second_half=S by S -S
#  $reverse_first_half =F by F -F
#  $reverse_rest   =R by R -R  ## reversing all except the first
#  $reverse_all  =A by A -A # reverse all the fragments
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#-----------------------------------------------------------
sub split_sequence{
		my(@string, $frag_str_size, $remaining, @hash, $i, $j,
			 @seq_names, $denominator, %input_seq, @final_hash_ref,
			 $frag, $reverse_second_half, $reverse_first_half,
			 $reverse_rest, $reverse_all);

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Checking arguments
		#________________________________________________
	for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'HASH'){
					push(@hash, $_[$i]);
	   }elsif(ref($_[$i]) eq 'SCALAR'){
					 if(${$_[$i]}=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
							$denominator = int($1);            # like  9.5
					 }
			 }elsif($_[$i]=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
			   $denominator = int($1);            # like  9.5
			 }elsif($_[$i]=~/F/){ # this can handle fraction number!!
					 $reverse_first_half='F';
			 }elsif($_[$i]=~/S/){ # this can handle fraction number!!
					 $reverse_second_half='S';
			 }elsif($_[$i]=~/R/){ # this can handle fraction number!!
					 $reverse_rest='R'; $reverse_second_half='s';
			 }elsif($_[$i]=~/A/){ # this can handle fraction number!!
					 $reverse_all='A'; $reverse_rest='r'; $reverse_first_half='f';
					 $reverse_second_half='S';
	   }elsif($_[$i]=~/\S/){
	       print "\n# $0: split_sequence, You put some strange stuff to me!!\n\n";
	       exit;
	   }
	}

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
		# When s option is given while denominator is over 2, set r option
		#____________________________________________________________________
	if($denominator > 2 and $reverse_second_half){
	   $reverse_rest='r';
	}

	if($denominator == 0){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}

		print "\n# split_sequence: \$denominator is $denominator, with \@hash\n";

	for($i=0; $i< @hash; $i++){
			 my %input_seq=%{$hash[$i]};
			 @seq_names=keys %input_seq;
			 my (%out_seq);

			 for($k=0; $k < @seq_names; $k++){
					 $seq_name=$seq_names[$k];

					 @string = split(//, $input_seq{ $seq_name });
					 $frag_str_size = int(@string/$denominator);
					 if($debug eq 1){ print "\n Frag str size is :  $frag_str_size \n" }
					 $remaining = @string % $denominator;

					 for($j=0; $j < $denominator; $j++){
								$frag_number=$j+1;

								if($remaining > 0){
										$frag=join('', splice(@string, 0, ($frag_str_size+1) ) );
										$remaining --;
										$seq_name_split="$seq_name\_$frag_number";

										#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
										# Before final, set the seq types
										#_________________________________________________________________
										if($frag_number==1 and $reverse_first_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number==2 and $reverse_second_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number >2 and $reverse_rest){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($reverse_all){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}
										$out_seq{$seq_name_split}=$frag;
								}elsif( $remaining == 0  and  @string>0){
										$seq_name_split="$seq_name\_$frag_number";
										$frag=join('', splice(@string, 0, $frag_str_size,) );

										#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
										# Before final, set the seq types
										#_________________________________________________________________
										if($frag_number==1 and $reverse_first_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number==2 and $reverse_second_half){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($frag_number >2 and $reverse_rest){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}elsif($reverse_all){
												$frag=reverse($frag);
												$seq_name_split="$seq_name_split\_rv";
										}
										$out_seq{$seq_name_split}=$frag;
								}
					 }
			 }
			 push(@final_hash_ref, \%out_seq);
	}
	wantarray? \@final_hash_ref : $final_hash_ref[0];
}




#_______________________________________________________________
# Title     : divide_string
# Usage     : &show_array(&divide_string(\$input, 6));
# Function  : divides any string to the denominator given.
# Example   : &show_array( &divide_string(\$input, 3) );
#              while  $input is '12345789ABCDEFHIJKLMN'
#              The output will be '1234578 9ABCDEF HIJKLMN'
# Warning   :
# Keywords  : divide_string, split_string, chop_string, divide_sequence
#             split_sequence(look at separate split_sequence sub),
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.4
#-----------------------------------------------------------
sub divide_string{
	my (@array, $i, $j, $denominator, @temp, @string, $frag_str_size,
	   $remaining, $frag);

	for($i=0; $i< @_; $i++){
	   if(ref($_[$i]) eq 'ARRAY'){
		  push(@array, @{$_[$i]});
	   }elsif(ref($_[$i]) eq 'SCALAR'){
		  if(${$_[$i]} =~/^(\d+)$/){
			 $denominator = $1;
		  }else{
			 push(@array, ${$_[$i]});
		  }
	   }elsif($_[$i]=~/^(\d+\.?\d*)$/){ # this can handle fraction number!!
		  $denominator = int($1);            # like  9.5
	   }else{
		  push(@array, $_[$i]);
	   }
	}
	if($denominator == 0){
	   print "\n Denominator is 0, error, setting to 1\n";
	   $denominator = 1;
	}
	for($i=0; $i< @array; $i++){
	   my @temp;
	   my @string = split(//, $array[$i]);
	   $frag_str_size = int(@string/$denominator);
	   if($debug eq 1){ print "\n Frag str size is :  $frag_str_size \n" }
	   $remaining = @string % $denominator;
	   for($j=0; $j < $denominator; $j++){
	 	    if($remaining > 0){
			    $frag=join('', splice(@string, 0, ($frag_str_size+1) ) );
	            push(@temp, $frag);
			    $remaining --;
		    }elsif( $remaining == 0  and  @string>0){
								$frag=join('', splice(@string, 0, $frag_str_size,) );
	            push(@temp, $frag);
		    }
	   }
	   push(@final_array_ref, \@temp);
	}
	wantarray? \@final_array_ref : $final_array_ref[0];
}

#____________________________________________________________
# Title     : write_html_headbox
# Usage     : &write_html_headbox($outfilename, \%entries);
# Function  : write html format headbox explanation with
#              given hashes of headbox content.
# Example   :
# Warning   : It takes off the last '/' when $URL has it
#
# Keywords  : write_headbox_html, write headbox in html,
#               write_headbox_files
# Options   : 'd' for date inclusion at the top of the page
#  f=   for default ftp dir name
#
# Category  :
# Version   : 1.7
#-----------------------------------------------------------
sub write_html_headbox{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(%in, $name, $name1);
		my $URL='ftp://cyrah.med.harvard.edu/pub/Perl/';

		if($vars{'f'}=~/(\S+)\/$/){ ## checking if 'f' opt is set(for default ftp dir name)
				$URL=$1;
		}

	for ($f=0; $f<@file; $f++){
				 $output_file=$file[$f];
				 open(FILE, ">$output_file");
				 print FILE "\<html\>\n";
	    if($char_opt =~/d/i){
						print FILE "\<H2  ALIGN=CENTER\>";
		    print FILE "\<FONT COLOR=\"#DC143C\"\>$file[$f]\<\/H2\>\<\/FONT\>\n";
		    print FILE "\<br\>\n";
						print FILE "\<H6 ALIGN=RIGHT\>", `date`, "Created by  \<A href=\"$0\.html\"\>$0\<\/A\>\<\/H6\> \<hr\>";
	    }

	    for($i=0; $i< @hash; $i++){
		    my %in =  %{$hash[$i]};
		    my @keys = sort keys %in;
		    $name= $in{'Title'};
		    if($name=~/(\S+)\.pl/){ ## to prevent making 'xxxx.pl.pl'
		       $name1=$1;
		    }else{
		       $name1=$name;
		    }
						print FILE "\<H3\>\<A href=\"$name1\.pl\.html\"\>$name1\<\/A\>\<\/H3\>";
						print FILE "Download \<A href=\"${URL}\/$name1\.pl\"\>$name1\.pl\<\/A\>\n";
		    print FILE "\<pre\>\n";
		    for($j = 0; $j < @keys; $j ++){
			    if($keys[$j]=~/(title)/i){
			    }elsif( ($keys[$j]=~/\w+/i)&&( $in{$keys[$j]}=~/\w+/) ){
				   chomp( $in{$keys[$j]} );
	            printf FILE ("\<b\>%-10s\<\/b\> %s\n", $keys[$j], $in{$keys[$j]});
			    }
		    }
		    print FILE "\<\/pre\>";
		    print FILE "\<hr\>\n\n";
	   }
	   print FILE "\<\/html\>\n";
	   close FILE;
	}
}


#________________________________________________________________________
# Title     : open_sdb_files
# Usage     : %entries = %{&open_sdb_files(\$file_to_read )};
# Function  :
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  : read_sdb_files,read_sdb,
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------
sub open_sdb_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($title_found, %Final_out, $variable_string, $TITLE, $title, @keys,
	  $end_found, $line, $entry, $entry_match, $End_line_num, $remove_blank,
	  $title_entry_null, $end_found, $Enclosed_entry, $Enclosed_var,
	  $blank_counter, $title_entry_exist, $entry_value, $temp_W, $Warning_part
	);

	for($r=0; $r<@file; $r++){
		open(SEQ_IN, "$file[$r]");
		my @whole_file =<SEQ_IN>;

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		for($i=0; $i<@whole_file; $i++){
	$whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!
	#"""""""""""""""""""""""""""""""""""""""""""
	##  The first and second line of box 1 ##
	#"""""""""""""""""""""""""""""""""""""""""""
	if( ($whole_file[$i]=~/^[_\*\-\/]{55,}$/)&&    ##  '______' is discarded
	   ($whole_file[$i+1]=~/^ {0,4}([TitlNam]+e) {0,8}:? {0,20}(\S[\-\w\.:]*) *(Copyright.*)/i) ){
	   $TITLE = $1;      $title = "$2\n";   $Final_out{'Warning'}.="$3\n";
	   $entry_match=$TITLE; ## The very first $entry_match is set to 'Title' to prevent null entry
	   if($TITLE =~ /^Title|Name$/i){   #
	       if( ($title=~/^\s+$/)||( $title eq "\n") ){
		  $title_entry_null =1;  $title = '';  }    }
	   $Final_out{$TITLE}=$title;
	   $title_found ++ ;   $i++;  ## << this is essential to prevent reading the same line again.
	}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	## The first and second line of box 2, #__________ or #**************
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($whole_file[$i]=~/^[_\*]{20,}$/)&&
	   ($whole_file[$i+1]=~/^ *(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*) */i) ){
	   $title_found ++ ;        $i++;
	   $entry_match=$1;       $entry_value=$2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;  ## Capitalize words
	   $Final_out{$entry_match}.= "$entry_value\n";
	   last if $title_found > 1;  next;   }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	##  With PROPER entry 2 : descriptins like. 'Ussage : ssssssxxjkk  kj'
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($title_found==1)&&
	   ($whole_file[$i]=~ /^ {0,3}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)] {0,9}(\S.*) */i)){
	   $entry_match = $1;
			  $entry_value = $2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   $Final_out{$entry_match}.= "$entry_value\n"; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	#  With proper entry 3 : descriptins like. 'Ussage :', But blank description ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&($title_found==1)&&
	   ($whole_file[$i]=~ /^ {0,3}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)]( {0,})$/i)){
	   $entry_match = $1;
			  $entry_value = $2;
	   $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
	   $Final_out{$entry_match}.= " $entry_value\n"; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  all space line matching                 ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
	   ($title_found==1)&&($whole_file[$i]=~/^ {0, 110}$/) ){
	   $blank_counter++;
	   if($blank_counter > 2){ $blank_counter--; }
	   else{ $Final_out{$entry_match}.= " \n";  }     }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  Anything after 3 space to 14 positions eg: '#           HHHHHHHHH'
	###  To match 'examples' etc. INC. ':'
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^( {0,50})(\S.+)/) ){
	   $Final_out{$entry_match}.= "$2\n"; $blank_counter=0; }

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###  Anything after 1 space to 11 positions  ##
	###  To match 'examples' etc. EXC. ':'       ##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($end_found != 1)&&
	   ($title_found==1)&&($whole_file[$i]=~/^ {0,16}([^:.]+)/) ){
	   $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	###-------End of the read_box reading--------##
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	elsif( ($title_found==1)&&
	   ($whole_file[$i]=~ /^[\*\-_]{15,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
	   $End_line_num = $i;       $end_found++;
	   last;      }
		} ## < End of for loop


		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### If title is not there at all     ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		@keys=keys %Final_out;
		for(@keys){
	 if(/^Title$/i){    ## No Entry of Title at all??
	    $TITLE =$&;
	    $title_entry_exist = 1;
	    if($Final_out{$_}=~/^ *$/){   ## if Title => Null or just space
	       $title_entry_null = 1;    }  }  }

		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### When title entry is not there    ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if( $title_entry_exist != 1){
	  for($s=$End_line_num+1; $s < $End_line_num+20; $s++){
	     if( $whole_file[$s] =~ /^sub {1,5}([\w\.]+) {0,6}\{/){
		$Final_out{'Title'} = "$1\n";   last;       }
	     elsif( $whole_file[$s] =~/^________________________________+/){
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{'Title'} = "$0";     last;
	     }else{
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{'Title'} = "$0";
	     }
	  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		### When title is blank              ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		elsif($title_entry_null ==1){  ## It looks for 'sub xxxxx{ ' line to get title
	  ### $End_line_num is the last line read.
	  for($s = $End_line_num+1; $s < $End_line_num+20; $s++){
	     if( $whole_file[$s] =~ /^sub {1,5}(\w+\.*\w*) {0,7}{/){
		$Final_out{$TITLE} = "$1\n";    last;     }
	     elsif( $whole_file[$s] =~/^#________________________+/){
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{$TITLE} = "$0";     last;
	     }else{
		#######################################
		## Uses running file name as titile  ##
		#######################################
		$Final_out{$TITLE} = "$0";
	     }
	  }
		}
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		## Error handling, if no head box is found   ####
		#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		if($title_found < 1){ print "\nFatal: No headbox found by read_head_box2 sub.\n";  }
		push(@ref_of_seq_entry, \%Final_out);
	}
	if(@ref_of_seq_entry > 1){
		@ref_of_seq_entry;
	}else{ return (\%Final_out) }
}

#______________________________________________________________________________
# Title     : open_stride_dat_files
# Usage     : @out=@{&open_stride_dat_files(@ARGV)};
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu
# Category  :
# Version   : 1.2
#------------------------------------------------------------------------------
sub open_stride_dat_files{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my( @residues, @sec_str, $seq_name, $seq, %stride_sec_str_hash,
        $stride_dat_file, @stride_sec_str_hash_final);

     for($i=0; $i< @file; $i++){
         my $stride_dat_file=$file[$i];
         print "\n# (INFO) \$stride_dat_file $stride_dat_file is being opened\n";
         open(STRIDE_DAT_FILE, $stride_dat_file);
         while(<STRIDE_DAT_FILE>){
            if(/^NM[\t ]+(\S+)\.brk/){
                $seq_name=$1;
            }elsif(/^NM[\t ]+(\S+)/){
                $seq_name=$1;
            }elsif(/^ *SQ +(\S+)/){
                $seq=$1;
            }elsif(/^ *SS +(\S+)/){
                $sec_str=$1;
                my %stride_sec_str_hash;
                @residues=split(//, $seq);
                @sec_str=split(//, $sec_str);
                for($j=0; $j < @residues; $j++){
                    $stride_sec_str_hash{$j}=[$residues[$j], $sec_str[$j], '1.0',  "$seq_name"];
                }
                push(@stride_sec_str_hash_final, \%stride_sec_str_hash);
            }
         }
         close(STRIDE_DAT_FILE);
     }
     if(@stride_sec_str_hash_final > 1){
         return(\@stride_sec_str_hash_final);
     }elsif(@stride_sec_str_hash_final==1){
         return(\%stride_sec_str_hash);
     }
}

#_______________________________________________________________
# Title     : get_pdb_file_start_number
# Usage     :
# Function  :
# Example   :
# Warning   :
# Keywords  : start_number_of_pdb, startnumber, start number of PDB,
#             get_start_number_of_pdb_file,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub get_pdb_file_start_number{
	my($start_number, $pdb_file);
	if( ref($_[0]) ){
	 $pdb_file = ${$_[0]};
	}else{
	 $pdb_file = $_[0];
	}
	open (PDB_FILE, "$pdb_file");
	while(<PDB_FILE>){
	 if(/^ATOM      1 +\w+ +\w+ +[\w]* +(\d+)  +/){
		 $start_number = $1; last;
	 }
	}
	\$start_number;
}
#_______________________________________________________________
# Title     : write_modeller_top_file
# Usage     : &write_modeller_top_file(\%hash, [v]);
# Function  : Writes Modeller command file format.
# Example   :
#     $modelname = 'gfct';
#     $template = '1ovt';
#     %hash=($modelname, $template);
#     &write_modeller_top_file(\%hash);
# Warning   :
# Keywords  :
# Options   : v  for verbose. You will get STDOUT of the result as well as file
# Returns   : a file of xxxx.top form.
# Argument  : 1 hash ref which has model name and template name -> (\%hash)
#             while %hash is (modelname, tempalatename)
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub write_modeller_top_file{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
format MODELLER_TOP_FORMAT =
INCLUDE
SET ALNFILE = @<<<<<<<<<<<<<<<<
$ali_file
SET KNOWNS = @<<<<<<<<<<<<<<<<
$pdb_file
SET SEQUENCE = @<<<<<<<<<<<<<<<<
$model
SET ATOM_FILES_DIRECTORY = './:../atom_files'
SET STARTING_MODEL = 1
SET ENDING_MODEL = 1
CALL ROUTINE = 'model'
.
format STDOUT =
INCLUDE
SET ALNFILE = @<<<<<<<<<<<<<<<<
$ali_file
SET KNOWNS = @<<<<<<<<<<<<<<<<
$pdb_file
SET SEQUENCE = @<<<<<<<<<<<<<<<<
$model
SET ATOM_FILES_DIRECTORY = './:../atom_files'
SET STARTING_MODEL = 1
SET ENDING_MODEL = 1
CALL ROUTINE = 'model'
.
	########## Program starts ####################
	for($i=0; $i<@hash;$i++){
		($model, $pdb_file) = each %{$hash[$i]};
		$out_file = "$model.top";
		$ali_file = "$model.ali";
		open (MODELLER_TOP_FORMAT, ">$out_file");
		$pdb1 = "$ENV{'PDB'}\/$pdb_file.brk";
		$pdb2 = "$ENV{'PDB'}\/$pdb_file.pdb";
		if( !(-e $pdb1 ) && !( -e $pdb2 ) ){
			print "\n Error the file $pdb1  or  $pdb2\n";
		}
		$model ="\'$model\'";
		$pdb_file ="\'$pdb_file\'";
		$ali_file ="\'$ali_file\'";
		write MODELLER_TOP_FORMAT;
		if( $char_opt=~/v/i){ write STDOUT; }
	}
}
#_______________________________________________________________
# Title     : write_modeller_ali_file
# Usage     : &write_modeller_ali_file(\%model, \%template, [\$outfilename], [v]);
# Function  : Writes Modeller alignment format.
# Example   :
#             $out = 'test.ali';
#             %model =    qw(model AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAccccccccccc);
#             %template = qw(templ CCAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCC 3 42);
#             &write_modeller_ali_file(\%model, \%template, \$out);
# Warning   :
# Keywords  :
# Options   : You can put 2 numbers for the second set of key and element for
#             the second hash input as the starting and ending points of
#             template(i.e. pdb file seq). Unless I calculate the size of seq.
#             By default, it reads PDB file defined by ENV setting of 'PDB' and
#             gets the starting number of pdb. If starting number is defined
#             explicitly at input hash, the given starting number is used instead
#             of PDB's.
#             v  for verbose. You will get STDOUT of the result as well as file
# Returns   : a file of xxxx.ali form.
# Argument  : 2 ref. of hash for seq. and optional output.name and option(s).
#             If second input hash (for template) has 3rd and 4th element which are
#             numbers they are regarded as the starting and ending number of the
#             template(i.e. pdb file seq)
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub write_modeller_ali_file{
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.2 >""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	if($debug==1){ print "   \@hash has \"@hash\"\n   \@raw_string has \"@raw_string\"
		\@array has \"@array\"\n   \@char_opt has \"@char_opt\"\n   \@file has \"@file\"\n"; }
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
format MODELLER_ALI_FORMAT =
>P1;@<<<<<<<<<<<<<<<
$name
structureX: @<<<<<<<<<<<<<<: @<<<: : @<<<<<: : @<<<<<<<<<<<<<: :
$name $start_seq $seq_leng $name
@*
$seq
*
.
format STDOUT =
>P1;@<<<<<<<<<<<<<<<
$name
structureX: @<<<<<<<<<<<<<<: @<<<: : @<<<<<: : @<<<<<<<<<<<<<: :
$name $start_seq $seq_leng $name
@*
$seq
*
.
	########## Program starts ####################
	if($file[0]){
		 open (MODELLER_ALI_FORMAT, ">$file[0]");
	}else{
		@seq  = %{$hash[0]};
		$name = $seq[0]; print "\n\$name is $name" if $debug ==1;
		open (MODELLER_ALI_FORMAT, ">$name.ali");
	}
	for($i=0; $i<@hash;$i++){
		@seq  = %{$hash[$i]};
		$name = $seq[0]; print "\n\$name is $name" if $debug ==1;
		$seq  = $seq[1]; print "\n\$seq  is $seq" if $debug ==1;
		$seq_leng = length($seq);
		## checking PDB entry of the template ##
		$pdb1 = "$ENV{'PDB'}\/$name.brk";
		$pdb2 = "$ENV{'PDB'}\/$name.pdb";
		if( (-e $pdb1 ) || ( -e $pdb2 ) ){
			$pdb_file = $pdb1;
			$start_seq = ${&get_pdb_file_start_number( $pdb_file )};
		}
		## Handling the starting and ending seq points of template
		if( defined($seq[2])&&($seq[2]=~/^\d+/) ){
		  $start_seq=$seq[2]; $seq_leng+=$start_seq; }
		elsif( defined($start_seq) ){
		  $seq_leng+=$start_seq; }
		else{ $start_seq =1; }
		if( (defined($seq[2])) &&($end_seq != $seq_leng ) ){
		  print "\n Your template seq length does not match with actual seq size
										\n I will put the calculated value \"$seq_leng\" as the template length\n\n";
		}
		print "\n\$seq_leng is $seq_leng\n" if $debug ==1;
		write MODELLER_ALI_FORMAT;
		if( $char_opt=~/v/i){ write STDOUT; }
	}
}

#_______________________________________________________________
# Title     : make_template_from_sec_str
# Usage     : %target   = %{&make_template_from_sec_str(\%seq)};
# Function  : makes template of sec. str. like: 'H5 E4 E2' out of '__HHHHH__EEEE__EE__'
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#-----------------------------------------------------------
sub make_template_from_sec_str{
	my(%out, @name, @fragments, %in, $name, $leng, $frag_seq, $name2);
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	for($j = 0; $j < @hash ; $j ++){
	 %in = %{$hash[$j]};

	 @name = keys  %in;
	 print "\@name is @name\n", if($debug eq 1);

	 for($t=0; $t < @name; $t++){
		$name = $name[$t];
			print "\$name is $name\n", if($debug eq 1);
		$leng=length($in{$name});
			print "$leng\n", if($debug eq 1);
		$name2 ="$name"."$leng"; # to attach sequence length
		@fragments = split(/_+/, $in{$name});
			print "\@fragments is @fragments\n", if($debug eq 1);
		for($i = 0; $i < @fragments; $i++){
		  if($fragments[$i] =~/(\w)\w+/){
			  $fraglength = length($fragments[$i]);
			  $frag_seq .= "$1"."$fraglength "; # space is delimiter  'H5 E3 E5 E4'
			  print "\$frag_seq is $frag_seq\n", if($debug eq 1);
		  }
		  $out{$name2}=$frag_seq;
		}
	 }
	}
	return(\%out);
}



#_______________________________________________________________
# Title     : calculate_protein_volume
# Usage     : %volumes=%{&calculate_protein_volume(\%seq)}
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-----------------------------------------------------------
sub calculate_protein_volume{
	my %final_volume;
	my %volume=("A","88.6" ,"C","108.5","D","111.1","E","138.4",
					"F","189.9","G","60.1" ,"H","153.2","I","166.7",
					"K","168.6","L","166.7","M","162.9","N","117.7",
					"P","122.7","Q","143.9","R","173.4","S","89.0",
					"T","116.1","V","140.0","W","227.8","Y","193.6",
					"a","88.6" ,"c","108.5","d","111.1","e","138.4",
					"f","189.9","g","60.1" ,"h","153.2","i","166.7",
					"k","168.6","l","166.7","m","162.9","n","117.7",
					"p","122.7","q","143.9","r","173.4","s","89.0",
					"t","116.1","v","140.0","w","227.8","y","193.6");
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >""""""""""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]};my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]};my(@file)   =@{$A[4]};my(@dir   )  =@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my %seq_hash =  %{$hash[0]};
	my @names = keys %seq_hash;
	#if($debug == 1){
	  print "\n ",__LINE__, " Seq names are @names\n";
	# }

	for( $i=0 ; $i < @names; $i++){
	  my @seq=split(//, $seq_hash{$names[$i]});
	  for( $j=0; $j < @seq; $j ++){
		  $final_volume{$names[$i]} += $volume{$seq[$j]};
	  }
	}
	\%final_volume;
}




#_______________________________________________________________
# Title     : extract_words
# Usage     : @words = @{&extract_words(\$string)};
# Function  :
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#-----------------------------------------------------------
sub extract_words{
	#""""""""""""""""""""""< handle_arguments{ head Ver 1.1 >"""""""""""""""""""""""""""""""
	my(@A ) = &handle_arguments( @_ ); my( $num_opt )=${$A[7]}; my( $char_opt )=${$A[8]};
	my(@hash)  =@{$A[0]}; my(@file)   =@{$A[4]}; my(@dir   )  =@{$A[3]}; my(@array)=@{$A[1]};
	my(@string)=@{$A[2]}; my(@num_opt)=@{$A[5]}; my(@char_opt)=@{$A[6]}; my(@raw_string)=@{$A[9]};
	my($i, $j, $c, $d, $e, $f, $g, $h, $k, $l, $p, $q, $r, $s, $t, $u, $v, $w, $x,$y,$z);
	#"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	if($debug==1){
	  print __LINE__, " Args to extract_words are: \"@raw_string\"\n";
	}
	for($i=0; $i < @raw_string; $i ++){
	  if(ref($raw_string[$i]) eq 'SCALAR'){
		 $line = ${$raw_string[$i]};
	  }else{
		 $line = $raw_string[$i];
	  }
	  push( @words, split(/[\W\-\_]+/, $line) );
	}
	if($debug==1){
	  $num = @words;
	  print __LINE__, " Num of words are : \"$num\"\n";
	}
	\@words;
}

#________________________________________________________________________
# Title     : replace_subroutines
# Usage     :
# Function  : replaces subroutines of given file(s) with supplied subs.
#             Doesn't care version
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------
sub replace_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 1.6 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my($i,$j,$c,$d,$e,$f,$g,$h,$k,
	$l,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($each_sub, %out_subs, $VER, %left_out, @lib, $ver, $sub_name,
	  $real_sub_entry_found);
	my %hash=%{&merge_hash(@hash)};
	@array= keys %hash;
	my @values= values %hash;

	for($i=0; $i < @file; $i++){
		open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){
			  for($s=0; $s < @array; $s++){
				  if($array[$s] =~/^([_a-zA-Z\-]+)(\d*\.*\d*)$/){
					  $sub_name=$1;
					  $VER =$2;
				  }
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#[_\-\*]{10,130} *$/)&&($lib[$j+1]=~/^(# *title *: *$sub_name)[^\.pl]/i) ){
					  $real_sub_entry_found=0;
					  $out_subs{"$sub_name"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||($lib[$j]=~/^#[\-_\*]{10,130} *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^# *version *: *([\d+\.\d+]*) */i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$sub_name"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;  ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if($lib[$j]=~/^sub +$sub_name *\{/){
					  $out_subs{"$sub_name"}.="$lib[$j]";
					  $j++;
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$sub_name"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$sub_name"}.="$lib[$j]";  ## to fetch '}'

					  $j++;

					  splice(@array, $s, 1); ## removing the subnames found
					  $s--;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $out_subs{"$sub_name$ver"}=$out_subs{$sub_name};
						  delete $out_subs{$sub_name};
					  }
				  }
			  }
			  $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			                                  ## just in case you want the left out ones.
		}
		close LIB_FILE;
		open (LEFT_FILE, ">$file[$i]");
		for($h= 0; $h < @values; $h++){### appending the new subs.
			 $left_out{$file[$i]} .= $values[$h];
		}
		print LEFT_FILE $left_out{$file[$i]};
		close LEFT_FILE;

	}#""""""""""""" end of for (@file)

	@no_of_subs_fetched = keys %out_subs;
	if(@array>0){
		print chr(7);
		print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
	}
	return( \%left_out ); # this has all the sub routines and other lines.
}

#______________________________________________________________________________
# Title     : write_subroutines
# Usage     : @out_file=@{&write_subroutines(\%head_box)};
# Function  : Writes subroutine file xxxx.psub with given headbox including
#              hash
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub write_subroutines{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my(@final_output_file, $sub_title, $subroutine_file);
     for($i=0; $i< @hash; $i++){
         my %hash=%{$hash[$i]};
         $sub_title=$hash{'Title'};
         $subroutine_file="$sub_title\.psub";
         open(SUBROUTINE_FILE, ">$subroutine_file");
         push(@final_output_file, $subroutine_file);
         #print SUBROUTINE_FILE "#\!\/usr\/bin\/perl\n";
         @keys=qw(Title Function Usage Example Keywords Options Author Category Version);
         print SUBROUTINE_FILE "#"."____"x20, "\n";
         for($j=0; $j < @keys; $j++){
             if($hash{$keys[$j]}){
                printf SUBROUTINE_FILE ("# %-9s : %s\n", $keys[$j], $hash{$keys[$j]});
             }
         }
         print SUBROUTINE_FILE "#"."----"x20, "\n";
         print SUBROUTINE_FILE $hash{'Content'}, "\n";
         close SUBROUTINE_FILE;
     }
     return(\@final_output_file);
}


#________________________________________________________________________
# Title     : read_subroutines
# Usage     : @out_subs=@{&read_subroutines(\@file, $separate_hash_entry_opt)}; or
#             %out_subs=%{&read_subroutines(\@file)};
# Function  : retunrns ALL subroutines with the keys as subroutine names
#             with version like ('show_array2.2' => 'subroutine in one string')
#             It reports the subroutines not found in searched file(s)
# Example   :
# Warning   :
# Keywords  :
# Options   : 'nv' for no version attachment in the keys of returning hash of subroutines
#             'r'  for getting remnant file content rather than the sub routines
#             't'  for leaving the original file without the sub routines taken.
#     $separate_hash_entry_opt=s by s
# Returns   :
# Argument  :
# Category  :
# Version   : 1.2
#--------------------------------------------------------------------
sub read_subroutines{
     #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
     my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
     my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
     my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
     my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
     my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
     if($debug==1){print "\n\t\@hash=\"@hash\"
          \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
          \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
     #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
     my($each_sub, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found,
        %final_out_subs, %out_subs, $separate_hash_entry_opt, $long_subname,
        @final_separate_entry_out);
     if($char_opt=~/s/){ $separate_hash_entry_opt = 's';
        print "\n# (INFO) \$separate_hash_entry_opt opt is set"
     }

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # Parsing input files of perl programs
     #_____________________________________________
     for($i=0; $i < @file; $i++){
         print "\n# (INFO) Opening $file[$i] to read subroutines\n";
         open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";
         @lib =<LIB_FILE>;

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
         #  This for loop does not allow return until each sub is finished
         #_____________________________________________________________________
         for($j=0; $j < @lib; $j++){
             my(%out_subs_separate);

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
             # Reading the first delimiter line and 'Title' line altogether
             #_______________________________________________________________
             if( ($lib[$j]=~/^#+[_\-\*]{10,120} *$/)
                  and $lib[$j+1]=~/^(#+ *title *: *([\w\-\.]+))/i ){

                  $long_subname=$1;
                  $sub_name=$2;
                  if($sub_name=~/\.pl$/){ next } ## to avoid the very first headbox

                  if($separate_hash_entry_opt){
                       $out_subs_separate{'Title'}=$sub_name;
                  }else{
                       $out_subs{"$sub_name"}.="$lib[$j]$long_subname\n";
                  }
                  $j+=2;

                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Until I hit end of headbox or sub {} line
                  #________________________________________________
                  until( $lib[$j]=~/^#+[\-\*_]{10,160} *$/ or
                         $lib[$j]=~/^sub +\w[\w\.]+/ ){
                        $lib[$j]=~s/( *)$//;  #<-- removing ending space
                        #"""""""""""""""""""""""""""""""""""
                        #  Taking version no.
                        #___________________________________
                        if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^# *version *: *([\d+\.\d+]*) */i) ){
                           if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
                           elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
                           elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
                        }
                        if($separate_hash_entry_opt and $lib[$j]=~/^# *(\S\S\S+) *: *(.*)$/){
                           $sub_entry=$1;
                           $out_subs_separate{$sub_entry}=$2;
                           $j++;
                        }elsif($separate_hash_entry_opt and $lib[$j]=~/^#  +(.*)$/){
                           $out_subs_separate{$sub_entry}.="    $1\n";
                           $j++;
                        }else{
                           $out_subs{"$sub_name"}.="$lib[$j]";
                           $j++;
                        }
                   }
                   $out_subs{"$sub_name"}.="$lib[$j]";
                   $j++;    ## essential to remove #------------- line
               }

               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
               # Reading sub {   } part , the Content part
               #__________________________________________
               if($lib[$j]=~/^sub {1,9}([\w\-]+) *\{/){
                   print "\n# (INFO) Found sub $1\{  \tline ";
                   $sub_name=$1;
                   $out_subs{"$sub_name"}.="$lib[$j]";
                   $out_subs_separate{'Content'}.="$lib[$j]";
                   $j++;
                   until($lib[$j]=~/^\}/){
                       $out_subs{"$sub_name"}.="$lib[$j]";
                       $out_subs_separate{'Content'}.="$lib[$j]";
                       $j++;
                       print "\n# (INFO) reading in the content of sub" if $verbose;
                   }
                   $out_subs{"$sub_name"}.="$lib[$j]";  ## to fetch '}'
                   $out_subs_separate{'Content'}.="$lib[$j]";  ## to fetch '}'
                   $j++;
                   unless(defined($ver)){ $ver = '1.0' }
                   unless($char_opt=~/nv/i){ ## if No version attachment option is set
                       $final_out_subs{"$sub_name$ver"}=$out_subs{$sub_name};
                       $out_subs_separate{'Version'}=$ver;
                   }
                   print "\n# (INFO) Pushing the read lines for one sub to \@final_separate_entry_out";
                   push(@final_separate_entry_out, \%out_subs_separate);
               }
           }##------- for @lib loop

           if($char_opt =~/[rt]/i){
              $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
           }
	   close LIB_FILE;
	   if($char_opt =~/t/i){ ## t is for takeout(take the subroutine out of the original file!!
	       open (LIB_FILE, ">$file[$i]");
	       print LIB_FILE $left_out{$file[$i]};
	       close LIB_FILE;
	   }
           print $final_out_subs;

	}#""""""""""""" end of for (@file)

	if($char_opt =~ /r/i){
	   return( \%left_out ); # to get the files sans the subroutines.
	}else{
	   if($separate_hash_entry_opt){
              if(@final_separate_entry_out==1){
	          return( $final_separate_entry_out[0]);
	      }else{
	          print "\n\n# (INFO) returning multiple subroutine read";
	          return(\@final_separate_entry_out);
	      }
	   }else{
	      return( \%final_out_subs );
	   }
	}
}


#________________________________________________________________________
# Title     : fetch_subroutines
# Usage     :
# Function  : retunrns subroutines with the keys as subroutine names with version
#             like in the form( 'show_array2.2' => 'subroutine in one string')
#             It reports the subroutines not found in searched file(s). This
#             requires the names of sub you want while read_subroutines will
#             read any subroutines with their headbox to a hash.
# Example   :
# Warning   :
# Keywords  :
# Options   : 'nv' for no version attachment in the keys of returning hash of subroutines
#             'r'  for getting remnant file content rather than the sub routines
#             't'  for leaving the original file without the sub routines taken.
#             'h'  for headbox only output.
# Returns   :
# Argument  :
# Category  :
# Version   : 2.5
#--------------------------------------------------------------------
sub fetch_subroutines{
   #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
   my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
   my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
   my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
   my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
   my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
   if($debug==1){print "\n\t\@hash=\"@hash\"
      \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
      \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
   #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my($each_sub, %hash2, %out_subs, %left_out, @lib, $ver);

   @array=@{&remove_dup_in_array(\@string)};

   print "\n# $0: I am fetching @array from @file\n\n";

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # (1) Processing each file of sub lib file(yes, it can read many source lib
   #__________________________________________________________________________
   for($i=0; $i < @file; $i++){
        open(LIB_FILE, "<$file[$i]")|| die  "\n $file[$i]  <- $! \n";

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (2) reading the whole lib. file into array
        #__________________________________________
	@lib =<LIB_FILE>;

	for($j=0; $j < @lib; $j++){
	     my $title_found;

	     for($s=0; $s < @array; $s++){ # array has subroutine names
	         $each_sub = $array[$s];

	         #"""" Taking the headbox """""""""""""
	         if( ($lib[$j]=~/^#+[_\-\*]{8,140} *$/)
		     and ($lib[$j+1]=~/^(#+ *Title[ \t]*\:[ \t]*$each_sub\b).*/i) ){

		     $out_subs{"$each_sub"} .="$lib[$j]$1\n";
		     $j+=2;
		     $title_found=1;
		     until( ($lib[$j]=~/^#+[\-_\*]{8,150} *$/) or
		           ($lib[$j]=~/^ {0,2}sub *$each_sub *\{ *[version]*\:?(\S*)/)  ){
                            $version_number=$1;
			    $lib[$j]=~s/ *$//;  #<-- removing ending space

			    #"""""""""""""""""""""""""""""""""""
			    #  Taking version no.
			    #"""""""""""""""""""""""""""""""""""
			    if( ($char_opt !~ /nv/i)
			        and ($lib[$j]=~/^# *Version *\: *([\d+\.\d+]*) */i) ){
				  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
				  elsif($1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
				  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
			    }
			    unless($ver){
			        $ver=$version_number;
			    }
			    $out_subs{"$each_sub"}.="$lib[$j]";
			    $j++;
		     }
		     $out_subs{"$each_sub"}.="$lib[$j]";
		     $j++;    ## essential to remove #------------- line
		 }
		 if($char_opt =~ /h/i){ # 'h'  for headbox only output.
		      goto SPLICE2;
		 }
	         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	         # Reading sub {  }, when headbox is included, for JONG's lib
	         #____________________________________________________________
                 if( $title_found==1  and  $lib[$j]=~/^sub +$each_sub\b *\{/ ){
                      $out_subs{"$each_sub"}.="$lib[$j]";
                      $j++; $title_found='';
                      until($lib[$j]=~/^\}/){
                           $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
                      }
                      $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
                      $j++;
                      SPLICE2:
                      splice(@array, $s, 1); ## removing the subnames found
                      $s--;
                      unless(defined($ver)){ $ver = '1.0' }
                      unless($char_opt=~/nv/i){ ## if No version attachment option is set
                           $hash2{"$each_sub${ver}"}=$out_subs{$each_sub};
                           %out_subs=();
                      }else{
                           $hash2{"$each_sub"}=$out_subs{$each_sub};
                           %out_subs=();
                      }
		  }
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	          # Reading sub {  }, when headbox is NOT included
	          #_________________________________________________________
		  elsif($lib[$j]=~/^ {0,2}sub +$each_sub\b *\{[\t ]*#* *(.*)/ ){
                      print "\n# (WARN) $0: $file[$i] does not have headbox(jong\' lib style)\n";
                      print "\n#    I will assume you do NOT have it in your sub lib. All subs will have 1.0 version no.\n";
                      if($1=~/(\d+\.?\d*)/){
                           $version_number=$ver=$1;
                      }
                      $out_subs{"$each_sub"}.="$lib[$j]";
                      $j++; $title_found='';
                      until($lib[$j]=~/^\}/){
                           $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
                      }
                      $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
                      $j++;
                      goto SPLICE2; # it is in a previous line
                  }
	     }
	     if($char_opt =~/[rt]/i){
		     $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
	     }
	}
	close LIB_FILE;
	if($char_opt =~/t/i){
             open (LIB_FILE, ">$file[$i]");
	     print LIB_FILE $left_out{$file[$i]};
	     close LIB_FILE;
	}

   }#""""""""""""" end of for (@file)

   $no_of_subs_fetched = keys %out_subs;
   if(@array>0){
       print chr(7), chr(7);
       print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
   }

   if($char_opt =~ /r/i){
       return( \%left_out ); # to get the files sans the subroutines.
   }else{
       return( \%hash2 );
   }
}


#________________________________________________________________________
# Title     : update_subroutines
# Usage     : &update_subroutines(\@file, \%fetched_subs);
# Function  : replaces subroutines of given file(s) with supplied subs.
#             If the given subroutine versions are not higher than the
#             ones in the program, no upgrade would happen.
#             This can read version information from '# Version  : 1.0' line
#              or sub xxxxx{  # Version : 1.0   line
# Example   : &update_subroutines($file, \%fetched_subs);
# Warning   :
# Keywords  : upgrade_subroutines,
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 2.8
#--------------------------------------------------------------------
sub update_subroutines{
  #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
  my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
  my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
  my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
  my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
  my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
  if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # (1) %subs_from_lib is a default variable from &handle_arguments
  #______________________________________________________
  my %subs_from_lib=%{&merge_hash(@hash)};
  my @subs_from_lib = keys %subs_from_lib;            # @subs_from_lib are subroutine names

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
  # update_subroutines can handle many input perl files
  #__________________________________________________________________
  for($i=0; $i < @file; $i++){
        open(TARGET_FILE, "<$file[$i]") or die  "\n $file[$i]  <- $! \n";

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
        # (2) Reading in all the perl_file_lines into arrary
        #___________________________________________________
	my @perl_file_lines =<TARGET_FILE>;
	close TARGET_FILE;
        my (%temp, %temp_with_version_info, %final_out, %latest_sub_hash, $VER, $sub_name,$ver,
	    $first_line, @found_subs, $sub_name2);

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # (3) checking each line in @perl_file_lines of target file with already provided %subs_from_lib of subroutines
        #____________________________________________________________________________________________________
        for($j=0; $j < @perl_file_lines; $j++){
	      my ($loop_count, $title_found, $sub_name,
                  $title_found, $sub_found, $VER, $ver);
              if( $perl_file_lines[$j]=~/^(#\!\/\w+.+perl)/){ ## first line match
                  $final_out{$file[$i]}.=$perl_file_lines[$j];
		  $j++;
		  print "\n# (INFO) Good! I found the very first line, $1 !!\n";
              }
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # (4) Putting update information  line-> # Last Update by ./update_subrout
              #______________________________________________________________________________
              if($perl_file_lines[$j]=~/^# *last *update */i){
                  $final_out{$file[$i]}.="# Last Update by $0: ".`date`;
                  $j++;
              }elsif($perl_file_lines[$j+1]=~/^# *last *update */i){
                  $final_out{$file[$i]}.="$perl_file_lines[$j]# Last Update by $0: ".`date`;
                  $j+=2;
              }elsif($j < 4 and (!$perl_file_lines[$j]=~/^# *last *update */i) ){
                  $final_out{$file[$i]}.="# Last Update by $0: ".`date`.$perl_file_lines[$j];
                  $j++;
              }

	      if($perl_file_lines[$j]=~/^__END__/){ last } ## this is to stop $0 reading in junk sub calls after __END__ line

              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~````
              # (5) this is to read non-subroutine related stuff
              #_______________________________________________________
              until( ($perl_file_lines[$j]=~/^#+[_\-\*\~]{8,160} *$/
                  and $perl_file_lines[$j+1]=~/^(#+ *Title *[:\;]* *\S+\b)[^\.]? */i )   ## NO 'xxxx.pl'
                  or ($perl_file_lines[$j]=~/^ {0,1}sub +[\w\-\.]+ *\{/)                # until we hit  sub xxxx{  line
                  or $j == $#perl_file_lines ){          ## when there is no headbox
                     $final_out{$file[$i]}.=$perl_file_lines[$j];  ## %final_out is the final output hash with perl_file_lines!!
                     $j++;                                         # %final_out stores only non-subroutine stuff until the end
              }
              print "\n# (INFO) \$final_out\{\$file\[\$i\]\} so far is: \n$final_out{$file[$i]}\n" if $verbose;

              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
              # (6) NOW we found the headbox and let's go through all @subs_from_lib entry to match
              #     Going through all sub names. Now I am reading sub routine parts. @subs_from_lib has the names of subs to fetch
              #__________________________________________________________________________________________________________
              SUBS: for($s=0; $s < @subs_from_lib; $s++){
                  if($subs_from_lib[$s] =~/^([_a-zA-Z\-\d]+)(\d+\.*\d*) *$/){
                       $sub_name=$1;
                       if($2){  $VER =$2; }else{ $VER='1.0' } ## <--- this line is redundant
		  }elsif($subs_from_lib[$s] =~/^(\S+) *$/){  #### This is critically necessary
		       $sub_name=$1;
		       print "\n# (WARN) $0: \$subs_from_lib\[\$s\] doesnt seem to have ver info. Is this O.K?\n";
                       $VER='1.0';
		  }

                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # (7) matching #______________ and # Title :,  IE, matching HEADBOX region
                  #___________________________________________________________________________
                  if( ($perl_file_lines[$j]=~/^#+[_\-\*\~]{8,160} *$/)
                        && ($perl_file_lines[$j+1]=~/^(#+ *Title *[\:]* *$sub_name)[^\.]? */i) ){

                        $temp{"$sub_name"}.="$perl_file_lines[$j]$1\n";

                        $j+=2;
                        $title_found=1;
                        until( $perl_file_lines[$j]=~/^#+[\-_\*]{10,180} *$/ ){
                           $perl_file_lines[$j]=~s/ *$//;
                           if($perl_file_lines[$j] =~ /^# *Version *[\:\;]? *([\d*\.*\d*]*) */i){
                               if( $1 =~/^ *$/){ $ver = '1.0';  }     ##  make null to 1.0
                               elsif($1 =~/^(\d+)$/){ $ver = "$1\.0"; } ### make  1   to 1.0
                               elsif($1 =~/^(\d+\.\d+)$/){ $ver = $1; }
                           }
                           $temp{"$sub_name"}.="$perl_file_lines[$j]";
                           $j++;
                        }
                        $temp{"$sub_name"}.="$perl_file_lines[$j]";
                        $j++;
		  }

                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # (8) matching sub xxxxx{
                  #_____________________________________________
	          if($perl_file_lines[$j]=~/^sub +$sub_name *\{ *(.*)/){
                       $possible_version_info=$1;
                       if($possible_version_info=~/(\d+\.?\d*)/){
                           $ver=$1;
                       }
		       $sub_found =1;
                       $temp{"$sub_name"}.="$perl_file_lines[$j]";

		       $j++;
		       until( $perl_file_lines[$j] =~/^\}/){
                            $temp{"$sub_name"}.="$perl_file_lines[$j]";
                            $j++;

                            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`````
                            # following is out of paranoia
                            #___________________________________________
                            if( $perl_file_lines[$j] =~/^#[_\-\*]{8,150} *$/
                                and $perl_file_lines[$j+1]=~/^(# *Title *[:]* *$sub_name)[^\.pl]/i ){
                                $temp{"$sub_name"}.="\n\}\n"; $j++; # fixes missing '}' in the read sub hash.
                                goto SPLICE;
                            }elsif($perl_file_lines[$j+1]=~/^sub +[\w\-\.]+ *\{/){
                                $temp{"$sub_name"}.="\n\}\n";
                                goto SPLICE;
                            }
                            $loop_count++;
                            if($loop_count > 10000){
                                $final_out{$file[$i]} .="\n\}\n";    # fixes missing '}' in the input file
                                $temp{"$sub_name"}.="\n\}\n";   # fixes missing '}' in the read sub hash.
                                goto SPLICE;
                            }
		       }
		       if($perl_file_lines[$j]=~/^\}/){ $temp{"$sub_name"}.=$perl_file_lines[$j];  }

		       SPLICE:
                       push(@found_subs, splice(@subs_from_lib, $s, 1) );
		       $s--;
		       unless(defined($ver)){ $ver = '1.0' }
                       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                       # Changing  $temp{$sub_name}  to  $temp{"$sub_name$ver"}
                       #_________________________________________________________
                       if(!$temp_with_version_info{"$sub_name$ver"}){
                           $temp_with_version_info{"$sub_name$ver"}=$temp{$sub_name};
                       }
                       %temp=();
                       last;
		  }else{ # end of  if ($perl_file_lines[$j]=~/sub xxxxxx/)
                       next; # next to for(@subs_from_lib)
                  }
              }## END of for(@subs_from_lib)

              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # (9) putting %temp hash to %latest_sub_hash
              #____________________________________________________
              if( $title_found and $sub_found and  $VER < $ver ){
                    print "\nTitle found, SUB found for $sub_name VER $sub_name${VER} \< ver $ver";
                    unless($latest_sub_hash{ "$sub_name$ver" }){
                        $latest_sub_hash{ "$sub_name$ver" }= $temp_with_version_info{ "$sub_name$ver" };
                    }
                    delete($subs_from_lib{"$sub_name$VER"} );
                    $VER=$ver=$title_found=$sub_found='';
              }elsif( $title_found and $sub_found and  $VER >=$ver ){
                    print "\nTitle found,  SUB found for $sub_name  VER \>= ver ";
                    unless($latest_sub_hash{ "$sub_name$VER" }){
                        $latest_sub_hash{ "$sub_name$VER" }= $subs_from_lib{"$sub_name$VER"};
                    }
                    delete($temp_with_version_info{"$sub_name$ver"});
                    delete($subs_from_lib{"$sub_name$VER"});
                    $VER=$ver=$title_found=$sub_found='';
              }elsif( !$title_found  and $sub_found ){
                    print "\nTitle Not found, SUB found for $sub_name";
                    unless($latest_sub_hash{ "$sub_name$VER" }){
                        $latest_sub_hash{ "$sub_name$VER" }= $subs_from_lib{"$sub_name$VER"};
                    }
                    delete($subs_from_lib{"$sub_name$VER"});
                    delete($temp_with_version_info{"$sub_name$ver"});
                    $VER=$ver=$title_found=$sub_found='';

              }elsif( $title_found  and !$sub_found   ){
                    print "\nTitle found, SUB not found for $sub_name";
                    $VER=$ver=$title_found=$sub_found='';
                    next;
              }elsif( !$title_found and !$sub_found  ){
                    $final_out{$file[$i]}.=$perl_file_lines[$j];
              }
        } # for (@perl_file_lines)

        %merged_final_hash=%{&merge_hash(\%subs_from_lib, \%latest_sub_hash)};
        @values= values %merged_final_hash;

        open (LEFT_FILE, ">$file[$i]");
        for($h= 0; $h < @values; $h++){ ### appending the new subs.
             $final_out{$file[$i]} .= $values[$h];
        }
        print LEFT_FILE $final_out{$file[$i]};
        close LEFT_FILE;
    }#""""""""""""" end of for (@file)
    return( \%final_out ); # this has all the sub routines and other lines.
}






#________________________________________________________________________
# Title     : takeout_subroutines
# Usage     :
# Function  : retunrns subroutines with the keys as subroutine names with version
#             like in the form( 'show_array2.2' => 'subroutine in one string')
#             It reports the subroutines not found in searched file(s)
#             fetch_subroutines  also has this feature.
# Example   :
# Warning   : If there is no headbox and version no. It thinks the version
#             is 1.0
# Keywords  : take_out_subroutines, take_subroutines, cut_subroutines,
#             cutout_subroutines, remove_subroutines
# Options   : 'nv' for no version attachment in the keys of returning hash of subroutines
#             'r'  for getting remnant file content rather than the sub routines
# Returns   :
# Argument  :
# Category  :
# Version   : 1.6
#--------------------------------------------------------------------
sub takeout_subroutines{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my($each_sub, %duplicate, %out_subs, %left_out, @lib, $ver, $real_sub_entry_found);
	@array = @ori_array = grep { ! $duplicate{$_}++ } @raw_string;  ## removing duplicates

	for($i=0; $i < @file; $i++){
		@array=@ori_array;
		open(LIB_FILE, "$file[$i]") or die  "\n $file[$i]  <- $! \n";
		@lib =<LIB_FILE>;
		for($j=0; $j < @lib; $j++){
			  for($s=0; $s < @array; $s++){
				  $each_sub = $array[$s];
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#[_\-\*\~]{20,140} *$/)&&
				      ($lib[$j+1]=~/^(# *title *: *$each_sub)[^\.pl]/i) ){
					  $real_sub_entry_found=0;
					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||
					         ($lib[$j]=~/^#[\-_\*\~]{20,140} *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 #"""""""""""""""""""""""""""""""""""
							 #  Taking version no.
							 #"""""""""""""""""""""""""""""""""""
							 if( ($char_opt !~ /nv/i) && ($lib[$j]=~/^# *version *: *([\d+\.\d+]*) */i) ){
								  if( $1=~/^[ ]*$/){ $ver = '1.0'; }     ##  make null to 1.0
								  elsif( $1=~/^(\d+)$/){ $ver = "$1\.0"; } ### make  2   to 2.0
								  elsif($1=~/^([\d+\.\d+]+)$/){ $ver = $1; } ##  assign version
							 }
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;  ##<< essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if($lib[$j]=~/^sub +$each_sub *\{/){
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;
					  until($lib[$j]=~/^\}/){
						  $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'

					  $j++;

					  splice(@array, $s, 1); ## removing the subnames found
					  $s--;
					  unless(defined($ver)){ $ver = '1.0' }
					  unless($char_opt=~/nv/i){ ## if No version attachment option is set
						  $out_subs{"$each_sub$ver"}=$out_subs{$each_sub};
						  delete $out_subs{$each_sub};
					  }
				  }
			  }
			  $left_out{$file[$i]}.=$lib[$j]; ## Remnant file content of the operation
			                                  ## just in case you want the left out ones.
		}
		close LIB_FILE;
		open (LEFT_FILE, ">$file[$i]");
		print LEFT_FILE $left_out{$file[$i]};
		close LEFT_FILE;
	}#""""""""""""" end of for (@file)

	@no_of_subs_fetched = keys %out_subs;
	if(@array>0){
		print chr(7);
		print "\n# Following subs are not found in \"", "@file","\"\n  ", "@array", "\n\n";
	}
	if($char_opt =~ /r/i){
	   return( \%left_out ); # to get the files without the subroutines.
	}else{
	   return( \%out_subs );
	}
}

#________________________________________________________________________
# Title     : get_subroutine_calls
# Usage     : @sub_name_array= @{&get_subroutine_calls(\@AR))};
# Function  : gets all the subroutine calls( like &show_hash ) in the given
#             file name or array of lines which is the content of a file,
#             text etc. If there is no input arg, it reads the running
#             program as default input
# Example   :
# Keywords  : get_sub_names,get_subroutine_names, get_sub_calls,
#             get_subroutine_calls, find_sub_calls, find_subroutine_calls
# Options   :
# Category  :
# Version   : 2.2
#--------------------------------------------------------------------
sub get_subroutine_calls{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(@keywords)=qw(getc shift system warn undef abs cmp close fork caller
				   eval time chdir connect disconnect wait main);
	my($keywords)=join(' ', @keywords);
	my(@arr, @sub_names, @nondup, %duplicate, @sub_calls);
	if(@_== 0){  open(FILE, "$0"); @arr = <FILE>;  } # open self
	elsif( @file > 0){
           for($i=0; $i < @file; $i++){
                  open(FILE, "$file[$i]");
                  push(@arr, <FILE>);
           }
	}
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
	# When subroutine names are specified as input
	#_______________________________________________
	if(@array>0){
	   for($i=0; $i<@array; $i++){
		 push(@arr, @{$array[$i]} ) if ref($array[$i]) eq 'ARRAY';
	   }
	}
	if( @raw_string>0){ push(@arr, @raw_string) }

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Main pattern matching part for  &xxxxxx ;  line
	#__________________________________________________
	for($i=0; $i< @arr; $i ++){
            if($arr[$i]=~/^#+/){ next }
            $arr[$i]=~s/^(.+)(# ..+)$/$1/;  # I have to remove all COMMENTS as comments can have &xxx

            if($arr[$i]=~/\$\~ = *\S/){
                    next;
            }elsif($arr[$i]=~/^format *=/){
                    until($arr[$i]=~/^\./){  $i++ }
            }elsif(@sub_calls=$arr[$i]=~/\&([\w\.\-]+)/g ){  # for  $X = &round(&some(&some2(\$ssssss))); sort of thing
                    push(@sub_names, @sub_calls);
            }elsif($arr[$i]=~/\&[^\&]+\w+\:\:(\w[\w\-\.\:]+) {0,3}.*[\;\,]/){ ## to handle &main::ssssub::sussb
                    push(@sub_names, $1);
            }elsif($arr[$i]=~/\&(\w[\w\-\.\:]+) {0,3}.*[\;\,]/){
                    push(@sub_names, $1);
            }elsif($arr[$i]=~/= *([a-zA-Z][\w\-]+) {0,3}\;/){
                    push(@sub_names, $1) unless($keywords=~/\b$1\b/);
            }elsif($arr[$i]=~/= *[\@\%\$] *\{ *([a-zA-Z_\-]+[\d]*).+\} *\;/){ # for =${ xxxx }; or =${&xxxxx};
                    push(@sub_names, $1);
            }
	}
	@nondup = grep { ! $duplicate{$_}++ } @sub_names;  ## removing duplicates
	for($i=0; $i< @keywords; $i++){
            for($j=0; $j < @nondup; $j ++){
                    if($keywords[$i] eq $nondup[$j]){
                           splice(@nondup, $j, 1); $j--;
                    }
            }
	}
	return(\@nondup);
}



#________________________________________________________________________
# Title     : set_special_options   (derived from set_debug_option)
# Usage     : &set_special_options;
# Function  : If you put special chars like '#' or  '##', '###..' at the
#             prompt of any program which uses
#             this sub you will get verbose printouts for the program if
#             the program has a lot of comments.
# Example   : &set_special_options.pl  ##    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of debugging printouts
#             ##  for even more debugging printouts
#             +   for more outputs(more calculations are shown, like statistics)
#             ++  even more outputs.(
#    $DEBUG    becomes 1 by '#'
#    $DEBUG2   becomes 1 by '##'
#    $VERBOSE  becomes 1 by '+'
#    $VERBOSE2 becomes 1 by '++'
#
# Returns   :  $debug, $verbose
# Argument  : Nothing in a program.
# Category  :
# Version   : 1.0
#             generalized debug var is added for more verbose printouts.
#--------------------------------------------------------------------
sub set_special_options{
	my($j, $i, $level, $key, %special_chars);
	%special_chars=('DEBUG'=>'#', 'VERBOSE'=>'+');

	for $key (keys %special_chars){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/([$special_chars{$key}]+)/){
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 ${"$key"}=1; print chr(7);
			 print __LINE__," \$$key  is set to ", ${"$key"}, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"$key$i"}=1;
				 print __LINE__," \$${key}${i} is set to ", ${"$key$i"}, "\n";
			 }
		 }
	 }
	}
}

#________________________________________________________________________
# Title     : set_debug
# Usage     : &set_debug;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Category  :
# Version   : 1.8
#             generalized debug var is added for more verbose printouts.
#--------------------------------------------------------------------
sub set_debug{
	my($j, $i, $level);
	unless( defined($debug) ){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 $debug=1;
				  print chr(7);
			 print __LINE__," \$debug  is set to ", $debug, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"debug$i"}=1;
				 print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
			 }
		 }
	 }
	}
}

#________________________________________________________________________
# Title     : open_self
# Usage     : @lines =  &open_self;
# Function  :
# Example   :
# Warning   :
# Keywords  : read self, read_self, open self, open itself
# Options   :
# Returns   : one array
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------
sub open_self{
	open(SELF, "$0");
	my(@Line)=<SELF>;
	return( \@Line );
}



#________________________________________________________________________
# Title     : tell_seq_length
# Usage     : %hash_out = %{&tell_seq_length(\%hash_in)};
# Function  : tells the sequence sizes of given sequences
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------
sub tell_seq_length{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
		\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
		\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my(@out_hash, %hash);
	for($i=0; $i < @hash; $i++){
	 %hash = %{$hash[$i]};
	 @keys = keys %hash;
	 for ($j=0; $j < @keys; $j ++){
		if($hash{$keys[$j]}=~/\,\S+\,/){ @string= split(/\,/, $hash{$keys[$j]});
		}else{ @string= split(//, $hash{$keys[$j]}); }
		$h -> {$keys[$j]} = @string;  ## $h is the ref. of the anonymous hash
	 }                               ## This is equivalent to "$h{$keys[$j]}= $length;"
	 push(@out_hash , $h ) ;
	}
	if(@out_hash == 1){ $out_hash[0]; }
	elsif(@out_hash < 1){ die "\nSomething is wrong at tell_seq_length\n"; }
	elsif(@out_hash > 1){ return(@out_hash); }
}

#________________________________________________________________________
# Title     : do_window_scan
# Usage     : @out_array = @{&do_window_scan(\@input_array, $win_size)};
#             Often, bioters(Bio Computer Scientists) need to scan a long sequences
#             of DNA or Protein like(ABADFAFASDFASFASDFDFA or 109384717817947) to
#             caculate something out of them.
#             This routine is providing such scanning
#             function in perl.
# Function  : This is the core part of any window (of sequences)
#             scanning function.
# Example   :
# Warning   :
# Keywords  : scan_sequence, scan_window
# Options   :
# Returns   :
# Argument  :
# Category  :
# Version   : 1.3
#--------------------------------------------------------------------
sub do_window_scan{
	my(@string1) = @{$_[0]};
	my($win_size) = ${$_[1]} || $_[1];
	my($w, $start, $end, $average1, @window_1, $w_abs, @out_string),

	$start = -($win_size - ($win_size%2))/2; ## If the window size is odd numbers (eg 5),
									 ## the starting average position is 0  from (-2,-1,0,1,2)
									 ## If it is even numbers (eg 4)
									 ## the starting position is 0 from (-2,-1,0,1)
	$end = @string1 - ($win_size - ($win_size%2))/2;
								 ## End point is also dependent on oddity of the numbers
								 ## for window size.
	 for ($w= $start; $w < $end ; $w ++){
		$moving_window = $w + $win_size - 1;
		@window_1= @string1[$w .. $moving_window ];
		if($w < 0){          #### This if is to prevent the circularization of the array
			$w_abs = abs($w); #### (like  6 7 1 2 3 4 5 , for  . . 1 2 3 4 5 from 1234567 )
			splice(@window_1, 0, $w_abs);  # $w_abs is the absolute value of $w
		}

		################ PUT YOUR calc HERE #####

		 $average1= ${average_of_array(\@window_1, 'int')};

		################ PUT YOUR calc HERE #####

		push(@out_string, $average1);
		print "\nWinSize:$win_size halfwin: $half_win_size str size: $string_size \(from offset: $offset to  mov_wind: $moving_window \) AV: $average1 of  win1:", @window_1, "\n";
	 }
	 \@out_string;
}

#________________________________________________________________________
# Title     : scan_window_and_calc_something
# Usage     :
# Function  : scans any given length window of sequence and computes something.
# Example   :
# Warning   :
# Keywords  :
# Options   : average for getting average of given window size.
#             sum for getting sum of given window size.
# Returns   :
# Argument  :
# Category  :
# Version   : 1
#--------------------------------------------------------------------
sub scan_window_and_calc_something{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{