Seeder::Background - Background object


Seeder documentation Contained in the Seeder distribution.

Index


Code Index:

NAME

Top

Seeder::Background - Background object

VERSION

Top

Version 0.01

DESCRIPTION

Top

This module provides the get_background method.

SYNOPSIS

Top

    use Seeder::Background;
    my $background = Seeder::Background->new(
        seed_width      => "6",
        hd_index_file   => "6.index",
        seq_file        => "seq.fasta",
        out_file        => "seq.bkgd",
        strand          => "forward",
    );
    $background -> get_background;

EXPORT

Top

None by default

FUNCTIONS

Top

new

 Title   : new
 Usage   : my $background = Seeder::Background->new(%args);
 Function: constructor for the Seeder::Background object
 Returns : new Seeder::Background object
 Args    :
     seed_width       # Seed width
     hd_index_file    # Index file
     seq_file         # Sequence file
     out_file         # Output file
     strand           # Strand (forward or revcom)

get_background

 Title   : get_background
 Usage   : $background -> get_background;
 Function: coordinate the collection of Hamming distances
 Args    : none

_generate_word

 Title   : _generate_word
 Usage   : $self->_generate_word;
 Function: generate all combinations of nucleotides (A, C, G, T)
 Returns : reference to an array of words
 Args    : none

_get_distance

 Title   : _get_distance
 Usage   : $self->_get_distance;
 Function: collect distance occurences for each oligo
 Returns : reference to a 2D array of occurences
 Args    : none

_output_bkgd

 Title   : _output_bkgd
 Usage   : $self->_output_bkgd;
 Function: writes background Hamming distances to output file
 Args    : none

AUTHOR

Top

François Fauteux, <ffauteux at cpan.org>

BUGS

Top

Please report any bugs or feature requests to bug-Seeder at rt.cpan.org, or at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Seeder. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

SUPPORT

Top

You can find documentation for this module with the perldoc command.

    perldoc Seeder

You can also look for information at:

* RT: CPAN's request tracker

http://rt.cpan.org/NoAuth/Bugs.html?Dist=Seeder

* AnnoCPAN: Annotated CPAN documentation

http://annocpan.org/dist/Seeder

* CPAN Ratings

http://cpanratings.perl.org/d/Seeder

* Search CPAN

http://search.cpan.org/dist/Seeder

ACKNOWLEDGEMENTS

Top

This algorithm was developed by François Fauteux, Mathieu Blanchette and Martina Strömvik. We thank the Perl Monks <http://www.perlmonks.org/> for their support.

COPYRIGHT & LICENSE

Top


Seeder documentation Contained in the Seeder distribution.
package Seeder::Background;

use 5.006;
use warnings;
use strict;
use Seeder qw(:all);
use Carp;
use Bio::SeqIO;
use Algorithm::Loops qw(NestedLoops);
use List::MoreUtils;

use base qw(Exporter);
our @EXPORT;
our %EXPORT_TAGS = (
    'all' => [
        qw(
            new
            get_background
            )
    ]
);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our $VERSION = '0.01';

sub new {
    my ( $class, %args ) = @_;
    my $self;
    $self->{seed_width} =
        (      ( defined $args{seed_width} )
            && ( $args{seed_width} >= 6 )
            && ( $args{seed_width} <= 8 ) )
        ? $args{seed_width}
        : croak "Please define a seed width between 6 and 8!";
    $self->{seq_file} =
        defined $args{seq_file}
        ? $args{seq_file}
        : croak "Please define a promoter seq file!";
    $self->{hd_index_file} =
        defined $args{hd_index_file}
        ? $args{hd_index_file}
        : croak "Please define a hamming dist index file!";
    $self->{out_file} =
        defined $args{out_file}
        ? $args{out_file}
        : croak "Please define an output file!";
    $self->{strand} =
        defined $args{strand}
        ? $args{strand}
        : croak "Please define strand(s)!";
    bless( $self, $class );
    return $self;
}

sub get_background {
    my $self = shift;
    $self->read_hd_index;
    $self->generate_oligo;
    $self->_generate_word;
    $self->lookup_coord;
    $self->_get_distance;
    $self->_output_bkgd;
}

sub _generate_word {
    my $self = shift;
    my @position;
    for my $depth ( 0 .. $self->{seed_width} - 1 ) {
        push @position, [qw(A C G T)];
    }
    my @word = NestedLoops( \@position, sub { join( q{}, @_ ) } );
    $self->{word_ref} = \@word;
    return \@word;
}

sub _get_distance {
    my $self = shift;
    my @bkgd;
    $self->{nucleotide} = [qw ( 0 0 0 0 )];
    for my $oligo_indice ( 0 .. $#{ $self->{oligo_ref} } ) {
        push @bkgd, [ split( q{}, 0 x ( $self->{seed_width} + 1 ) ) ];
    }
    my $seqIO =
        Bio::SeqIO->new( '-format' => 'fasta', -file => $self->{seq_file} );
    while ( my $seq_obj = $seqIO->next_seq() ) {
        my @seq_oligo;
        my $seq = $seq_obj->seq;
        $seq = uc $seq;
        $seq =~ s{[^ACGT]}{X}g;
        $seq =~ tr{ACGT}{0123};
        my @seq = split( q{}, $seq );
        for my $n (@seq) {
            $self->{nucleotide}->[$n] += 1 if ( $n =~ m{\d} );
        }
        for my $x ( 0 .. ( length($seq) - $self->{seed_width} ) ) {
            my $subseq = substr( $seq, $x, $self->{seed_width} );
            if ( $subseq !~ m{X} ) {
                $subseq =~ tr{ACGT}{0123};
                my $oligo_indice = decode($subseq, 4);
                $seq_oligo[$oligo_indice] = 1;
            }
            else {
                ();
            }
        }
        if ( $self->{strand} eq "revcom" ) {
            my $revcom = reverse $seq;
            $revcom =~ tr{0123}{3210};
            for my $x ( 0 .. ( length($revcom) - $self->{seed_width} ) ) {
                my $subseq = substr( $revcom, $x, $self->{seed_width} );
                if ( $subseq !~ m{X} ) {
                    $subseq =~ tr{ACGT}{0123};
                    my $oligo_indice = decode($subseq, 4);
                    $seq_oligo[$oligo_indice] = 1;
                }
                else {
                    ();
                }
            }
        }
        else {
            ();
        }
        for my $oligo_indice ( 0 .. $#{ $self->{oligo_ref} } ) {
            if ( defined $seq_oligo[$oligo_indice] ) {
                $bkgd[$oligo_indice][0] += 1;
            }
            else {
                my $status;
            HD: for my $depth ( 1 .. 3 ) {
                    if (List::MoreUtils::any { defined $seq_oligo[$_]; }
                        @{ $self->{hd_index_ref}->[$oligo_indice] }[
                        $self->{from_ref}->[ $self->{seed_width} - 1 ][$depth]
                        .. $self->{to_ref}
                        ->[ $self->{seed_width} - 1 ][$depth]
                        ]
                        )
                    {
                        $status = 1;
                        $bkgd[$oligo_indice][$depth] += 1 and last HD;
                    }
                    else {
                        ();
                    }
                }
                if ( !defined $status ) {
                    my $live_hd_index = $self->generate_hd_index(
                        \@{ $self->{oligo_ref}->[$oligo_indice] } );
                HD: for my $depth ( 4 .. $self->{seed_width} ) {
                        if (List::MoreUtils::any { defined $seq_oligo[$_]; }
                            @$live_hd_index[
                            $self->{from_ref}
                            ->[ $self->{seed_width} - 1 ][$depth]
                            .. $self->{to_ref}
                            ->[ $self->{seed_width} - 1 ][$depth]
                            ]
                            )
                        {
                            $bkgd[$oligo_indice][$depth] += 1
                                and last HD;
                        }
                        else {
                            ();
                        }
                    }
                }
                else {
                    ();
                }
            }
        }
    }
    $self->{bkgd_ref} = \@bkgd;
    return \@bkgd;
}

sub _output_bkgd {
    my $self = shift;
    open( OUT, ">>$self->{out_file}" )
        or croak "Cannot open $self->{out_file}\n";
    print( OUT "A\t$self->{nucleotide}->[0]\n" );
    print( OUT "C\t$self->{nucleotide}->[1]\n" );
    print( OUT "G\t$self->{nucleotide}->[2]\n" );
    print( OUT "T\t$self->{nucleotide}->[3]\n" );
    for my $oligo_indice ( 0 .. $#{ $self->{bkgd_ref} } ) {
        print( OUT "$self->{word_ref}->[$oligo_indice]",
            "\t@{$self->{bkgd_ref}->[$oligo_indice]}\n"
        );
    }
    close OUT;
}

1;    # End of Seeder::Background