Seeder::Finder - Finder object


Seeder documentation Contained in the Seeder distribution.

Index


Code Index:

NAME

Top

Seeder::Finder - Finder object

VERSION

Top

Version 0.01

DESCRIPTION

Top

This module provides the find_motifs method.

SYNOPSIS

Top

    use Seeder::Finder;
    my $finder = Seeder::Finder->new(
        seed_width    => "6",
        n_motif       => "10",
        hd_index_file => "6.index",
        seq_file      => "seq.fasta",
        bkgd_file     => "seq.bkgd",
        out_file      => "motif.out",
        strand        => "forward",
    );
    $finder -> find_motifs;

EXPORT

Top

None by default

FUNCTIONS

Top

new

 Title   : new
 Usage   : my $finder = Seeder::Finder->new(%args);
 Function: constructor for the Seeder::Finder object
 Returns : a new Seeder::Finder object
 Args    :
     seed_width       # Seed width
     motif_width      # Motif width
     n_motif          # Number of motifs
     hd_index_file    # Index file
     seq_file         # Sequence file
     bkgd_file        # Background file
     out_file         # Output file
     strand           # Strand (forward or revcom), if the "revcom" option is
                        selected, the forward strand and the reverse
                        complement are included in the analysis




find_motifs

 Title   : find_motifs
 Usage   : $finder -> find_motifs;
 Function: coordination of the motif finding process
 Args    : none

_read_seq

 Title   : _read_seq
 Usage   : $self->_read_seq;
 Function: read the sequence file, count number of sequences
 Returns : reference to sequence tables
           ( $self->{n_seq} )
 Args    : none

_read_bkgd

 Title   : _read_bkgd
 Usage   : $self->_read_bkgd;
 Function: read the background Hamming distance file
 Returns : reference to a 2D array of background Hamming distances and 
           reference to an array of nucleotide frequencies
 Args    : none

_oligo_count

 Title   : _oligo_count
 Usage   : $self->_oligo_count;
 Function: count oligos in sequences
 Returns : reference to a 2D array of oligo counts
 Args    : none

_extent

 Title   : _extent
 Usage   : $self->_extent;
 Function: verify that motif extension width is even
 Returns : motif extension width
 Args    : none

_build_hd_matrix

 Title   : _build_hd_matrix
 Usage   : $self->_build_hd_matrix;
 Function: calculate Hamming distance between oligos and sequences
 Returns : reference to a 2D array of Hamming distances
 Args    : none

_pr_sum

 Title   : _pr_sum
 Usage   : my $distribution = _pr_sum( $n_seq, \@freq );
 Function: generate the probability distribution of a sum of i.i.d. random
           variables
 Returns : reference to an array of real numbers in the range from 0 to 1
 Args    : reference to oligo probability distribution, number of sequences

_convolution

 Title   : _convolution
 Usage   : my $p = _convolution($p, $f, $m);
 Function: convolution of two distributions
 Returns : reference to an array of real numbers in the range from 0 to 1
 Args    : reference to the distributions to be convoluted

_iupac

 Title   : _iupac
 Usage   : $self->_iupac;
 Function: set IUPAC degenerate symbol correspondence
 Returns : reference to a hash of IUPAC degenerate symbol
 Args    : none

_get_seed

 Title   : _get_seed
 Usage   : $self->_get_seed;
 Function: coordinate seed site selection
 Args    : none

_mtc

 Title   : _mtc
 Usage   : @q_value = _mtc(@p_value);
 Function: generate a list of q-values from a list of p-values
 Returns : array of q-values
 Args    : array of p-values
 Note    : This is an adaptation of the algorithm described in Storey, J.D.
           and Tibshirani, R. (2003) Statistical significance for genomewide
           studies, Proc Natl Acad Sci U S A, 100, 9440-9445

_frequency_matrix

 Title   : _frequency_matrix
 Usage   : $self->_frequency_matrix;
 Function: convert a set of instances into a frequency matrix, frequencies for
           sequences holding multiple instances are weighted proportionally
 Returns : reference to a 2D array of nucleotide frequencies
 Args    : none

_probability_matrix

 Title   : _probability_matrix
 Usage   : $self->_probability_matrix;
 Function: convert a frequency matrix into a probability matrix
 Returns : reference to a 2D array of probabilities
 Args    : none

_weight_matrix

 Title   : _weight_matrix
 Usage   : $self->_weight_matrix;
 Function: convert a probability matrix into a weight matrix
 Returns : reference to a 2D array of position weights
 Args    : none

_select_site

 Title   : _select_site
 Usage   : $self->_select_site;
 Function: select the best site among instances for each sequence given
           position weight matrix
 Returns : reference to a 2D array of sites
 Args    : none

_extend_motif

 Title   : _extend_motif
 Usage   : $self->_extend_motif;
 Function: extend seeds to motif width
 Returns : reference to a 2D array of sites
 Args    : none

_information_content

 Title   : _information_content
 Usage   : $self->_information_content;
 Function: calculate total information content
 Returns : total information content
 Args    : none

_mask_site

 Title   : _mask_site
 Usage   : $self->_mask_site;
 Function: mask the occurence in the sequence and in the count matrix
 Args    : none

_output_data

 Title   : _output_data
 Usage   : $self->_output_data;
 Function: writes predicted motif to the output file
 Args    : none

AUTHOR

Top

François Fauteux, <ffauteux at cpan.org>

BUGS

Top

Please report any bugs or feature requests to bug-motif at rt.cpan.org, or at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Seeder. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

SUPPORT

Top

You can find documentation for this module with the perldoc command.

    perldoc Seeder

You can also look for information at:

* RT: CPAN's request tracker

http://rt.cpan.org/NoAuth/Bugs.html?Dist=Seeder

* AnnoCPAN: Annotated CPAN documentation

http://annocpan.org/dist/Seeder

* CPAN Ratings

http://cpanratings.perl.org/d/Seeder

* Search CPAN

http://search.cpan.org/dist/Seeder

ACKNOWLEDGEMENTS

Top

This algorithm was developed by François Fauteux, Mathieu Blanchette and Martina Strömvik. We thank the Perl Monks <http://www.perlmonks.org/> for their support.

COPYRIGHT & LICENSE

Top


Seeder documentation Contained in the Seeder distribution.
package Seeder::Finder;

use 5.006;
use strict;
use warnings;
use Seeder qw(:all);
use Carp;
use POSIX;
use Bio::SeqIO;
use Bio::LiveSeq::DNA;
use Bio::LiveSeq::SeqI;
use List::Util;
use List::MoreUtils;
use Math::Spline;

use base qw(Exporter);
our @EXPORT;
our %EXPORT_TAGS = (
    'all' => [
        qw(
            new
            find_motifs
            )
    ]
);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our $VERSION = '0.01';

sub new {
    my ( $class, %args ) = @_;
    my $self;
    $self->{seed_width} =
        (      ( defined $args{seed_width} )
            && ( $args{seed_width} >= 6 )
            && ( $args{seed_width} <= 8 ) )
        ? $args{seed_width}
        : croak "Please define a seed width between 6 and 8!";
    $self->{motif_width} =
        defined $args{motif_width}
        ? $args{motif_width}
        : croak "Please define a motif width!";
    $self->{n_motif} =
        defined $args{n_motif}
        ? $args{n_motif}
        : croak "Please define a number of motifs!";
    $self->{seq_file} =
        defined $args{seq_file}
        ? $args{seq_file}
        : croak "Please define a promoter seq file!";
    $self->{bkgd_file} =
        defined $args{bkgd_file}
        ? $args{bkgd_file}
        : croak "Please define a bkgd file!";
    $self->{hd_index_file} =
        defined $args{hd_index_file}
        ? $args{hd_index_file}
        : croak "Please define a hamming dist index file!";
    $self->{out_file} =
        defined $args{out_file}
        ? $args{out_file}
        : croak "Please define an output file!";
    $self->{strand} =
        defined $args{strand}
        ? $args{strand}
        : croak "Please define strand(s)!";
    bless( $self, $class );
    return $self;
}

sub find_motifs {
    my $self = shift;
    $self->_extent;
    $self->_read_seq;
    $self->_read_bkgd;
    $self->read_hd_index;
    $self->_oligo_count;
    $self->generate_oligo;
    $self->lookup_coord;

    for my $motif_no ( 1 .. $self->{n_motif} ) {
        $self->{launch_time} = time;
        $self->{iter}        = 0;
        $self->{motif_no}    = $motif_no;
        $self->_iupac;
        $self->_build_hd_matrix;
        $self->_get_seed;
        $self->_output_data;
    }
}

sub _read_seq {
    my $self = shift;
    my $seqIO =
        Bio::SeqIO->new( '-format' => 'fasta', -file => $self->{seq_file} );
    my ( @seq_tbl, @seq_mask, @revcom_tbl, @revcom_mask );
    while ( my $seq_obj = $seqIO->next_seq() ) {
        my $seq_id = $seq_obj->display_id;
        my $seq    = $seq_obj->seq;
        $seq = uc $seq;
        my @seq = ( $seq_id, $seq );
        push @seq_tbl,  [@seq];
        push @seq_mask, [@seq];
        if ( $self->{strand} eq "revcom" ) {
            my $revcom = reverse $seq;
            $revcom =~ tr{ACGTUMRWSYKVHDBXN}{TGCAUKYWSRMBDHVXN};
            my @revcom = ( $seq_id, $revcom );
            push @revcom_tbl,  [@revcom];
            push @revcom_mask, [@revcom];
        }
        else {
            ();
        }
    }
    $self->{seq_tbl_ref}     = \@seq_tbl;
    $self->{seq_mask_ref}    = \@seq_mask;
    $self->{revcom_tbl_ref}  = \@revcom_tbl;
    $self->{revcom_mask_ref} = \@revcom_mask;
    $self->{n_seq}           = $#seq_tbl + 1;
    return (\@seq_tbl, \@seq_mask, \@revcom_tbl, \@revcom_mask);
}

sub _read_bkgd {
    my $self = shift;
    my ( @bkgd, @nf );
    open( IN, "$self->{bkgd_file}" )
        or croak "Cannot open $self->{bkgd_file}\n";
    while (<IN>) {
        my @bkgd_line = split( m{\s}, $_ );
        shift @bkgd_line;
        push @bkgd, [@bkgd_line];
    }
    for my $nucleotide ( 0 .. 3 ) {
        $nf[$nucleotide] = $bkgd[0][0];
        shift @bkgd;
    }
    $self->{bkgd_ref} = \@bkgd;
    my $nf_sum = List::Util::sum(@nf);
    if ( $self->{strand} eq "revcom" ) {
        $nf[0] = ( $nf[0] + $nf[3] ) / 2;
        $nf[3] = $nf[0];
        $nf[1] = ( $nf[1] + $nf[2] ) / 2;
        $nf[2] = $nf[1];
    }
    else {
        ();
    }
    for my $nucleotide ( 0 .. 3 ) {
        $nf[$nucleotide] /= $nf_sum;
    }
    $self->{nf_ref} = \@nf;
    return (\@bkgd, \@nf);
}

sub _oligo_count {
    my $self = shift;
    my @count_matrix;
    for my $seq_indice ( 0 .. $#{ $self->{seq_tbl_ref} } ) {
        my @seq_count = split( q{}, 0 x ( 4**$self->{seed_width} ) );
        my $seq = $self->{seq_tbl_ref}->[$seq_indice][1];
        for my $x (
            $self->{extent} .. ( length($seq) - $self->{motif_width} ) )
        {
            my $subseq = substr( $seq, $x, $self->{seed_width} );
            $subseq =~ tr{ACGT}{0123};
            if ( $subseq !~ m{\D} ) {
                my $oligo_indice = decode( $subseq, 4 );
                $seq_count[$oligo_indice] += 1;
            }
            else {
                ();
            }
        }
        if ( $self->{strand} eq "revcom" ) {
            my $revcom = $self->{revcom_tbl_ref}->[$seq_indice][1];
            for my $x ( 0 .. ( length($revcom) - $self->{seed_width} ) ) {
                my $subseq = substr( $revcom, $x, $self->{seed_width} );
                $subseq =~ tr{ACGT}{0123};
                if ( $subseq !~ m{\D} ) {
                    my $oligo_indice = decode( $subseq, 4 );
                    $seq_count[$oligo_indice] += 1;
                }
                else {
                    ();
                }
            }
        }
        else {
            ();
        }
        push @count_matrix, [@seq_count];
    }
    $self->{count_matrix_ref} = \@count_matrix;
    return \@count_matrix;
}

sub _extent {
    my $self = shift;
    $self->{motif_width} =
        ( ( $self->{motif_width} - $self->{seed_width} ) / 2 ) =~ m{\.}
        ? $self->{motif_width} + 1
        : $self->{motif_width};
    my $extent = $self->{motif_width} - $self->{seed_width};
    $self->{extent} = $extent;
    return $extent;
}

sub _build_hd_matrix {
    my $self = shift;
    my @hd_matrix;
    for my $count_indice ( 0 .. $#{ $self->{count_matrix_ref} } ) {
        for my $oligo_indice ( 0 .. $#{ $self->{oligo_ref} } ) {
            if ( $self->{count_matrix_ref}->[$count_indice][$oligo_indice]
                > 0 )
            {
                $hd_matrix[$oligo_indice][$count_indice] = 0;
            }
            else {
                my $status;
            HD: for my $depth ( 1 .. 3 ) {
                    if (List::MoreUtils::any {
                            $self->{count_matrix_ref}->[$count_indice][$_]
                                > 0;
                        }
                        @{ $self->{hd_index_ref}->[$oligo_indice] }[
                        $self->{from_ref}->[ $self->{seed_width} - 1 ][$depth]
                        .. $self->{to_ref}
                        ->[ $self->{seed_width} - 1 ][$depth]
                        ]
                        )
                    {
                        $status = 1;
                        $hd_matrix[$oligo_indice][$count_indice] = $depth
                            and last HD;
                    }
                    else {
                        ();
                    }
                }
                if ( !defined $status ) {
                    my $live_hd_index = $self->generate_hd_index(
                        \@{ $self->{oligo_ref}->[$oligo_indice] } );
                HD: for my $depth ( 4 .. $self->{seed_width} ) {
                        if (List::MoreUtils::any {
                                $self->{count_matrix_ref}->[$count_indice][$_]
                                    > 0;
                            }
                            @$live_hd_index[
                            $self->{from_ref}
                            ->[ $self->{seed_width} - 1 ][$depth]
                            .. $self->{to_ref}
                            ->[ $self->{seed_width} - 1 ][$depth]
                            ]
                            )
                        {
                            $hd_matrix[$oligo_indice][$count_indice] = $depth
                                and last HD;
                        }
                        else {
                            ();
                        }
                    }
                }
                else {
                    ();
                }
            }
        }
    }
    $self->{hd_matrix_ref} = \@hd_matrix;
    return \@hd_matrix;
}

sub _pr_sum {
    my ( $n, $f ) = @_;
    my ( $p, $r ) = ( $f, [qw(1)] );
    while ( $n > 1 ) {
        ( $r, $n ) = $n & 1 ? ( _convolution( $r, $p ), $n - 1 ) : ( $r, $n );
        $n /= 2;
        $p = _convolution( $p, $p );
    }
    $r = _convolution( $r, $p );
    return $r;
}

sub _convolution {
    my ( $p, $f ) = @_;
    my $r;
    for my $i ( 0 .. $#{$p} ) {
        for my $j ( 0 .. $#{$f} ) {
            $r->[ $i + $j ] += $p->[$i] * $f->[$j];
        }
    }
    return $r;
}

sub _iupac {
    my $self = shift;
    $self->{iupac}->{M} = [qw( 0 1 )];
    $self->{iupac}->{R} = [qw( 0 2 )];
    $self->{iupac}->{W} = [qw( 0 3 )];
    $self->{iupac}->{S} = [qw( 1 2 )];
    $self->{iupac}->{Y} = [qw( 1 3 )];
    $self->{iupac}->{K} = [qw( 2 3 )];
    $self->{iupac}->{V} = [qw( 0 1 2 )];
    $self->{iupac}->{H} = [qw( 0 1 3 )];
    $self->{iupac}->{D} = [qw( 0 2 3 )];
    $self->{iupac}->{B} = [qw( 1 2 3 )];
    $self->{iupac}->{X} = [qw( 0 1 2 3 )];
    $self->{iupac}->{N} = [qw( 0 1 2 3 )];
    return $self->{iupac};
}

sub _get_seed {
    my $self       = shift;
    my $n_bkgd_seq = List::Util::sum( @{ $self->{bkgd_ref}->[0] } );
    my @hd_sum;
    for my $oligo_indice ( 0 .. $#{ $self->{oligo_ref} } ) {
        $hd_sum[$oligo_indice] =
            List::Util::sum( @{ $self->{hd_matrix_ref}->[$oligo_indice] } );
    }
    my @p_value;
    for my $hd_sum_indice ( 0 .. $#hd_sum ) {
        my $p_value = 0;
        my @freq     = @{ $self->{bkgd_ref}->[$hd_sum_indice] };
        for my $freq (@freq) {
            $freq = $freq / $n_bkgd_seq;
        }
        my $distribution = _pr_sum( $self->{n_seq}, \@freq );
        for my $live_hd_sum ( 0 .. $hd_sum[$hd_sum_indice] ) {
            $p_value += $distribution->[$live_hd_sum];
        }
        push @p_value, $p_value;
    }
    my $min = List::Util::min @p_value;
    my @sorted_indice = sort { $p_value[$a] <=> $p_value[$b] } 0 .. $#p_value;
    my $min_indice = $sorted_indice[0];
    my @q_value = _mtc(@p_value);
    $self->{p_value} = $min;
    $self->{q_value} = $q_value[$min_indice];
    $self->{hd_sum}      = $hd_sum[$min_indice];
    my @seed_oligo = encode( $min_indice, 4, $self->{seed_width} );
    $self->{seed_oligo_ref} = \@seed_oligo;
    my @hd_to_seq = @{ $self->{hd_matrix_ref}->[$min_indice] };
    my @seed_instance;
    my $seed_hd_index = $self->generate_hd_index( \@seed_oligo );

    for my $hd_indice ( 0 .. $#hd_to_seq ) {
        my @live_instance;
        my @oligo_indice =
            @$seed_hd_index[ $self->{from_ref}
            ->[ $self->{seed_width} - 1 ][ $hd_to_seq[$hd_indice] ]
            .. $self->{to_ref}
            ->[ $self->{seed_width} - 1 ][ $hd_to_seq[$hd_indice] ] ];
        for my $oligo_indice (@oligo_indice) {
            if ( $self->{count_matrix_ref}->[$hd_indice][$oligo_indice] > 0 )
            {
                push @live_instance,
                    join( q{}, @{ $self->{oligo_ref}->[$oligo_indice] } );
            }
            else {
                ();
            }
        }
        push @seed_instance, [ sort (@live_instance) ];
    }
    $self->{site_ref} = \@seed_instance;
    $self->_frequency_matrix;
    $self->_probability_matrix;
    $self->_weight_matrix;
    $self->_select_site;
    $self->_frequency_matrix;
    $self->_probability_matrix;
    $self->_weight_matrix;
    $self->_extend_motif;
}

sub _mtc {
    my @pvalue = @_;
    my @lambda =
        qw(0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60
            0.65 0.70 0.75 0.80 0.85 0.90 0.95);
    my @pi0;
    for my $i ( 0 .. $#lambda ) {
        for my $j ( 0 .. $#pvalue ) {
            if ( $pvalue[$j] >= $lambda[$i] ) {
                $pi0[$i]++;
            }
            else {
                ();
            }
        }
        $pi0[$i] /= ( ( $#pvalue + 1 ) * ( 1 - $lambda[$i] ) );
    }
    my @u = sort { $pvalue[$a] <=> $pvalue[$b] } 0 .. $#pvalue;
    my @v;
    for my $i ( 0 .. $#pvalue ) {
        for my $y ( 0 .. $#pvalue ) {
            if ( $pvalue[$y] <= $pvalue[$i] ) {
                $v[$i]++;
            }
            else {
                ();
            }
        }
    }
    my $spline = new Math::Spline( \@lambda, \@pi0 );
    my $pi0 = $spline->evaluate( List::Util::max @lambda );
    $pi0 = $pi0 > 1 ? 1 : $pi0;
    my @qvalue;
    for my $i ( 0 .. $#pvalue ) {
        $qvalue[$i] = ( $pi0 * ( $#pvalue + 1 ) * $pvalue[$i] ) / $v[$i];
    }
    $qvalue[$#qvalue] = $qvalue[$#qvalue] > 1 ? 1 : $qvalue[$#qvalue];
    for ( my $i = $#pvalue - 1; $i >= 0; $i-- ) {
        $qvalue[ $u[$i] ] =
            List::Util::min( $qvalue[ $u[$i] ], $qvalue[ $u[ $i + 1 ] ], 1 );
    }
    return @qvalue;
}

sub _frequency_matrix {
    my $self = shift;
    my $width =
        $self->{iter} > 0 ? $self->{motif_width} : $self->{seed_width};
    my @f_matrix;
    for my $row ( 0 .. 3 ) {
        @{ $f_matrix[$row] } = split( q{}, 0 x $width );
    }
    for my $instance_indice ( 0 .. $#{ $self->{site_ref} } ) {
        my $n_instance = scalar @{ $self->{site_ref}->[$instance_indice] };
        for my $live_instance ( @{ $self->{site_ref}->[$instance_indice] } ) {
            for my $depth ( 0 .. $width - 1 ) {
                my $nucleotide = substr $live_instance, $depth, 1;
                if ( $nucleotide =~ m{\d} ) {
                    $f_matrix[$nucleotide][$depth] += 1 / $n_instance;
                }
                else {
                    for my $iupac ( @{ $self->{iupac}->{$nucleotide} } ) {
                        $f_matrix[$iupac][$depth]
                            += 1
                            / ( $n_instance
                                * ( $#{ $self->{iupac}->{$nucleotide} } + 1 )
                            );
                    }
                }
            }
        }
    }
    $self->{f_matrix_ref} = \@f_matrix;
    return \@f_matrix;
}

sub _probability_matrix {
    my $self = shift;
    my $width =
        $self->{iter} > 0 ? $self->{motif_width} : $self->{seed_width};
    my @p_matrix;
    for my $row ( 0 .. 3 ) {
        @{ $p_matrix[$row] } = split( q{}, 0 x $width );
    }
    for my $nucleotide ( 0 .. 3 ) {
        for my $depth ( 0 .. $width - 1 ) {
            $p_matrix[$nucleotide][$depth] = (
                (         $self->{f_matrix_ref}->[$nucleotide][$depth]
                        + $self->{nf_ref}->[$nucleotide] *
                        sqrt( $self->{n_seq} )
                ) / ( $self->{n_seq} + sqrt( $self->{n_seq} ) )
            );
        }
    }
    $self->{p_matrix_ref} = \@p_matrix;
    return \@p_matrix;
}

sub _weight_matrix {
    my $self = shift;
    my $width =
        $self->{iter} > 0 ? $self->{motif_width} : $self->{seed_width};
    my @w_matrix;
    for my $row ( 0 .. 3 ) {
        @{ $w_matrix[$row] } = split( q{}, 0 x $width );
    }
    for my $nucleotide ( 0 .. 3 ) {
        for my $depth ( 0 .. $width - 1 ) {
            $w_matrix[$nucleotide][$depth] =
                log(  $self->{p_matrix_ref}->[$nucleotide][$depth]
                    / $self->{nf_ref}->[$nucleotide] )
                / log(2);
        }
    }
    $self->{w_matrix_ref} = \@w_matrix;
    return \@w_matrix;
}

sub _select_site {
    my $self = shift;
    my @site;
    for my $instance_indice ( 0 .. $#{ $self->{site_ref} } ) {
        my @score;
        for my $live_instance ( @{ $self->{site_ref}->[$instance_indice] } ) {
            my $live_score = 0;
            for my $depth ( 0 .. $self->{seed_width} - 1 ) {
                my $nucleotide = substr $live_instance, $depth, 1;
                $live_score += $self->{w_matrix_ref}->[$nucleotide][$depth];
            }
            push @score, $live_score;
        }
    SCORE:
        for my $score_indice ( 0 .. $#score ) {
            if ( $score[$score_indice] == List::Util::max @score ) {
                push @{ $site[$instance_indice] },
                    $self->{site_ref}->[$instance_indice][$score_indice]
                    and last SCORE;
            }
        }
    }
    $self->{site_ref} = \@site;
    return \@site;
}

sub _extend_motif {
    my $self         = shift;
    my $fm_extension = $self->{n_seq} / 4;
    $fm_extension .= ";";
    my $extent = $self->{extent} / 2;
    while ( $self->{iter} == 0 ) {
        for my $row ( 0 .. 3 ) {
            push @{ $self->{f_matrix_ref}->[$row] },
                split( q{;}, ($fm_extension) x ($extent) );
            unshift @{ $self->{f_matrix_ref}->[$row] },
                split( q{;}, ($fm_extension) x ($extent) );
        }
        $self->{iter}++;
    }
    $self->_probability_matrix;
    $self->_weight_matrix;
    my ( @site, @site_strand );
    while ( $self->{iter} < 10 ) {
        for my $seq_indice ( 0 .. $#{ $self->{seq_mask_ref} } ) {
            my ( @score, @rev_score );
            my $m_seq = $self->{seq_mask_ref}->[$seq_indice][1];
            for my $x ( 0 .. ( length($m_seq) - $self->{motif_width} ) ) {
                my $live_score = 0;
                my $m_subseq = substr( $m_seq, $x, $self->{motif_width} );
                $m_subseq =~ tr{ACGT}{0123};
                for my $depth ( 0 .. $self->{motif_width} - 1 ) {
                    my $nucleotide = substr $m_subseq, $depth, 1;
                    if ( $nucleotide =~ m{\d} ) {
                        $live_score
                            += $self->{w_matrix_ref}->[$nucleotide][$depth];
                    }
                    else {
                        for my $iupac ( @{ $self->{iupac}->{$nucleotide} } ) {
                            $live_score += (
                                $self->{w_matrix_ref}->[$iupac][$depth] / (
                                    $#{ $self->{iupac}->{$nucleotide} } + 1
                                )
                            );
                        }
                    }
                }
                push @score, $live_score;
            }
            if ( $self->{strand} eq "revcom" ) {
                my $m_seq = $self->{revcom_mask_ref}->[$seq_indice][1];
                for my $x ( 0 .. ( length($m_seq) - $self->{motif_width} ) ) {
                    my $live_score = 0;
                    my $m_subseq = substr( $m_seq, $x, $self->{motif_width} );
                    $m_subseq =~ tr{ACGT}{0123};
                    for my $depth ( 0 .. $self->{motif_width} - 1 ) {
                        my $nucleotide = substr $m_subseq, $depth, 1;
                        if ( $nucleotide =~ m{\d} ) {
                            $live_score += $self->{w_matrix_ref}
                                ->[$nucleotide][$depth];
                        }
                        else {
                            for my $iupac (
                                @{ $self->{iupac}->{$nucleotide} } )
                            {
                                $live_score += (
                                    $self->{w_matrix_ref}->[$iupac][$depth]
                                        / (
                                        $#{ $self->{iupac}->{$nucleotide} }
                                            + 1
                                        )
                                );
                            }
                        }
                    }
                    push @rev_score, $live_score;
                }
            }
            else {
                ();
            }
            push @score, @rev_score;
        SCORE:
            for my $score_indice ( 0 .. $#score ) {
                if ( $score[$score_indice] == List::Util::max @score ) {
                    if (   ( $self->{strand} eq "revcom" )
                        && ( ($score_indice) >= ( scalar @score / 2 ) ) )
                    {
                        my $revcom_indice =
                            ( $score_indice - scalar @score / 2 );
                        my $live_site =
                            substr( $self->{revcom_tbl_ref}->[$seq_indice][1],
                            $revcom_indice, $self->{motif_width} );
                        $live_site =~ tr{ACGT}{0123};
                        $site[$seq_indice][0] = $live_site;
                        $self->{site_position_ref}->[$seq_indice][1] = (
                            (   length $self->{revcom_tbl_ref}
                                    ->[$seq_indice][1]
                            ) - $revcom_indice
                        );
                        $self->{site_position_ref}->[$seq_indice][0] =
                            (     $self->{site_position_ref}->[$seq_indice][1]
                                - $self->{motif_width} 
                                + 1 );
                        $site_strand[$seq_indice] = "minus";
                    }
                    else {
                        my $live_site =
                            substr( $self->{seq_tbl_ref}->[$seq_indice][1],
                            $score_indice, $self->{motif_width} );
                        $live_site =~ tr{ACGT}{0123};
                        $site[$seq_indice][0] = $live_site;
                        $self->{site_position_ref}->[$seq_indice][0] =
                            ( $score_indice + 1 );
                        $self->{site_position_ref}->[$seq_indice][1] =
                            ( $score_indice + $self->{motif_width} );
                        $site_strand[$seq_indice] = "plus";
                    }
                    last SCORE;
                }
                else {
                    ();
                }
            }
        }
        $self->{site_ref}    = \@site;
        $self->{site_strand} = \@site_strand;
        $self->_frequency_matrix;
        $self->_probability_matrix;
        $self->_weight_matrix;
        $self->{iter}++;
    }
    $self->{site_ref}    = \@site;
    $self->{site_strand} = \@site_strand;
    $self->_information_content;
    $self->_mask_site;
    return \@site;
}

sub _information_content {
    my $self = shift;
    my $ic   = 2 * $self->{motif_width};
    for my $nucleotide ( 0 .. 3 ) {
        for my $depth ( 0 .. $self->{motif_width} - 1 ) {
            $ic += (
                $self->{p_matrix_ref}->[$nucleotide][$depth] * (
                    log( $self->{p_matrix_ref}->[$nucleotide][$depth] ) /
                        log(2)
                )
            );
        }
    }
    $self->{information_content} = $ic;
    return $ic;
}

sub _mask_site {
    my $self = shift;
    my $mask = "X" x $self->{motif_width};
    for my $seq_indice ( 0 .. $#{ $self->{seq_mask_ref} } ) {
        my $live_seq = Bio::LiveSeq::DNA->new(
            -seq => $self->{seq_mask_ref}->[$seq_indice][1] );
        my $m_seq = $live_seq->seq;
        for my $p (
            (     $self->{site_position_ref}->[$seq_indice][0]
                - $self->{seed_width}
            ) .. ( $self->{site_position_ref}->[$seq_indice][1] - 1 )
            )
        {
            eval {
                my $overlap = substr( $m_seq, $p, $self->{seed_width} );
                if (   ( $overlap !~ m{[^ACGT]} )
                    && ( length($overlap) == $self->{seed_width} ) )
                {
                    $overlap =~ tr{ACGT}{0123};
                    my $overlap_ind = decode( $overlap, 4 );
                    $self->{count_matrix_ref}->[$seq_indice][$overlap_ind]
                        -= 1;
                    if ( $self->{strand} eq "revcom" ) {
                        my $rev_overlap = reverse $overlap;
                        $rev_overlap =~ tr{0123}{3210};
                        my $rev_overlap_ind = decode( $rev_overlap, 4 );
                        $self->{count_matrix_ref}
                            ->[$seq_indice][$rev_overlap_ind] -= 1;
                    }
                    else {
                        ();
                    }
                }
                else {
                    ();
                }
            };
        }
        $live_seq->change( $mask,
            ( $self->{site_position_ref}->[$seq_indice][0] ),
            $self->{motif_width} );
        $self->{seq_mask_ref}->[$seq_indice][1] = $live_seq->seq;
        if ( $self->{strand} eq "revcom" ) {
            my $reverse = Bio::LiveSeq::DNA->new(
                -seq => $self->{revcom_mask_ref}->[$seq_indice][1] );
            $reverse->change(
                $mask,
                (   ( length $m_seq )
                    - $self->{site_position_ref}->[$seq_indice][1] + 1
                ),
                $self->{motif_width}
            );
            $self->{revcom_mask_ref}->[$seq_indice][1] = $reverse->seq;
        }
        else {
            ();
        }
    }
}

sub _output_data {
    my $self = shift;
    $self->{land_time} = time;
    my $seed = join( q{}, @{ $self->{seed_oligo_ref} } );
    $seed =~ tr{0123}{ACGT};

    open( OUT, ">>$self->{out_file}" )
        or croak "Cannot open $self->{out_file}\n";
    print( OUT "*" x 78, "\n" );
    print( OUT "Motif $self->{motif_no}, width = $self->{motif_width},",
        " seed width = $self->{seed_width},",
        " sites = $self->{n_seq}\n"
    );
    print( OUT "Sum of HD to $seed in $self->{n_seq} seqs: $self->{hd_sum}\n");
    print (OUT "P-value($seed) = ", sprintf "%.5e\n", $self->{p_value}, "\n");
    print (OUT "Q-value($seed) = ", sprintf "%.5e\n", $self->{q_value}, "\n");
    print( OUT "Information Content = ", sprintf "%.15g\n", $self->{information_content} );
    print( OUT "*" x 78, "\n\n" );

    print( OUT ">NFM $self->{motif_no}\n\n" );
    my @dna = qw(A C G T);
    print( OUT q{ } x 6 );
    for my $dna (@dna) {
        print( OUT sprintf "%-16s  ", $dna );
    }
    print( OUT "\n" );
    my @depth = ( 0 .. $self->{motif_width} - 1 );
    for my $depth (@depth) {
        print( OUT "P" );
        print( OUT sprintf "%-3d  ", $depth );
        for my $nuc ( 0 .. 3 ) {
            print( OUT sprintf "%-16d  ",
                $self->{f_matrix_ref}->[$nuc][$depth] );
        }
        print( OUT "\n" );
    }
    print( OUT "\n" );

    print( OUT ">PWM $self->{motif_no}\n\n" );
    print( OUT q{ } x 6 );
    for my $dna (@dna) {
        print( OUT sprintf "%-16s  ", $dna );
    }
    print( OUT "\n" );
    for my $depth (@depth) {
        print( OUT "P" );
        print( OUT sprintf "%-3d  ", $depth );
        for my $nuc ( 0 .. 3 ) {
            if ( $self->{w_matrix_ref}->[$nuc][$depth] > 0 ) {
                print( OUT sprintf sprintf "%.14f  ",
                    $self->{w_matrix_ref}->[$nuc][$depth]
                );
            }
            else {
                print( OUT sprintf sprintf "%.13f  ",
                    $self->{w_matrix_ref}->[$nuc][$depth]
                );
            }
        }
        print( OUT "\n" );
    }
    print( OUT "\n" );

    print( OUT sprintf "%-" . $self->{motif_width} . "s    ", "Site" );
    print( OUT "Position       " );
    print( OUT "Strand    " );
    print( OUT "Sequence\n" );
    print( OUT "-" x $self->{motif_width} );
    print( OUT '    ', "-" x 11 );
    print( OUT '    ', "-" x 6 );
    print( OUT '    ', "-" x ( 49 - $self->{motif_width} ), "\n" );

    for my $seq_indice ( 0 .. $#{ $self->{seq_tbl_ref} } ) {
        my $subseq = $self->{site_ref}->[$seq_indice][0];
        $subseq =~ tr{0123}{ACGT};
        print( OUT $subseq, '    ' );
        print( OUT sprintf "%-4d",
            $self->{site_position_ref}->[$seq_indice][0] );
        print( OUT " - " );
        print( OUT sprintf "%4d",
            $self->{site_position_ref}->[$seq_indice][1] );
        print( OUT sprintf "    %-6s", $self->{site_strand}->[$seq_indice] );
        print( OUT '    ',
            sprintf "%." . ( 49 - $self->{motif_width} ) . "s",
            $self->{seq_tbl_ref}->[$seq_indice][0]
        );
        print( OUT "\n" );
    }

    print( OUT "\n", $self->execution_time, "\n\n\n" );
    close(OUT);
}

1;    # End of Seeder::Finder