/usr/local/CPAN/Bio-MCPrimers/Bio/Data/Plasmid/CloningVector.pm


package Bio::Data::Plasmid::CloningVector; 

our $VERSION = '2.5';

#
# Tim Wiggin, Stephen G. Lenk (C) 2006.
#
# This program is free software; you can redistribute it and/or 
# modify it under the same terms as Perl itself.
#
# Licensed under the Perl Artistic License.
#
# This software comes with no guarantee of usefulness. 
# Use at your own risk. Check any solutions you obtain. 
#
# Neither Stephen G. Lenk or Tim Wiggin assume responsibility 
# for the use of this software.
#
# Use: cloning_vector_data ( 
#          $vector_file_name,  # vector file name 
#          $re_ra,             # restriction enzyme site patterns
#          $re_name_rh,        # names of restriction enzymes
#          $ecut_loc_ra,       # absolute cut location in enzyme site
#          $vcut_loc_ra )      # frame cut location (0, 1, 2) in vector
#
# Returns: 0 = fails (file not found or bad data) 
#          1 = success
#
# File format: 
#
#    '#' at start of line is a comment
#    Blank lines with no characters (just <cr> are permitted)
#    Data lines are tab separated by item:
#
# Name  Sequence  Cut position in site sequence  Cut position in vector frame
#
# MCPRIMERS_DATA_DIR - if this environments variable is defined, 
#                      it is a file path prefix for the vector data file.

use strict;
use warnings;

sub cloning_vector_data {

    my ( $vector_file_name,  # vector file name 
         $re_ra,             # restriction enzyme site patterns
         $re_name_rh,        # names of restriction enzymes
         $ecut_loc_ra,       # absolute cut location in enzyme site 
         $vcut_loc_ra        # frame cut location (0, 1, 2) in vector 
       ) = @_;  

    # define name of text file
    if (defined $ENV{"MCPRIMERS_DATA_DIR"}) {

        if ($^O =~ /^MSW/) { 

            # Microsoft
            $vector_file_name = $ENV{"MCPRIMERS_DATA_DIR"} . "\\" . $vector_file_name;
        }
        else {

            # Other (OSX, Linux, Unix)
            $vector_file_name = $ENV{"MCPRIMERS_DATA_DIR"} . "/" . $vector_file_name;
        }
    } 

    # open text file
    open(IN_FILE, $vector_file_name) or return 0;
    my @fileArray = <IN_FILE>;
    close(IN_FILE);

    # extract data from file aray
    foreach my $row (@fileArray){
  
        if (substr($row,0,1) eq "#" or length $row == 1) {
        
            # skip coments and blank lines
            next;
        }
        elsif ($row =~ /(.+)\t([ATCGatcg]+)\t(\d+)\t(\d+)/) {
  
            # use valid data lines
            $re_name_rh->{$2} = $1;     # name keyed by base sequence
            push @{$re_ra}, $2;         # base sequence in site
            push @{$ecut_loc_ra}, $3;   # cut location in enzyme
            push @{$vcut_loc_ra}, $4;   # cut location
        }
        else {

            return 0;
        }

    }

    return 1;
}