Bio::NEXUS::Matrix - Provides functions for handling blocks that have matrices


Bio-NEXUS documentation Contained in the Bio-NEXUS distribution.

Index


Code Index:

NAME

Top

Bio::NEXUS::Matrix - Provides functions for handling blocks that have matrices

SYNOPSIS

Top

This module is the super class of Characters, Unaligned, and Distances block classes, and indirectly it is a super-class of Data and History blocks, which are both sub-classes of Characters blocks. These sub-classes inherint the methods within this module. There is no constructor, as a Matrix should not exist that is not also one of the sub-class block types.

DESCRIPTION

Top

Provides functions used for handling blocks that have matrices.

COMMENTS

Top

FEEDBACK

Top

All feedback (bugs, feature enhancements, etc.) are greatly appreciated.

AUTHORS

Top

 Thomas Hladish (tjhladish at yahoo)

VERSION

Top

$Revision: 1.23 $

METHODS

Top

get_ntax

 Title   : get_ntax
 Usage   : $block->get_ntax();
 Function: Returns the number of taxa in the block
 Returns : # taxa
 Args    : none

set_nchar

 Title   : set_nchar
 Usage   : print $block->set_nchar();
 Function: Sets the value of Dimensions:nchar
 Returns : none
 Args    : number of char(scalar)

get_nchar

 Title   : get_nchar
 Usage   : $block->get_nchar();
 Function: Returns the number of characters in the block (Note: In Distances Blocks, this is the number of characters used to infer distances.)
 Returns : # taxa
 Args    : none

set_format

 Title   : set_format
 Usage   : $block->set_format(\%format);
 Function: set the format of the characters
 Returns : none
 Args    : hash of format values

get_format

 Title   : get_format
 Usage   : $block->get_format($attribute);
 Function: Returns the format of the characters
 Returns : hash of format values, or if $attribute (a string) is supplied, the value of that attribute in the hash
 Args    : none

add_taxlabels

 Title   : add_taxlabels
 Usage   : $block->add_taxlabels($new_taxlabels);
 Function: Adds new taxa to taxlabels if they aren't already there
 Returns : none
 Args    : taxa to be added


Bio-NEXUS documentation Contained in the Bio-NEXUS distribution.
#################################################################
# Matrix.pm
#################################################################
# Author: Thomas Hladish
# $Id: Matrix.pm,v 1.23 2007/09/21 23:09:09 rvos Exp $

#################### START POD DOCUMENTATION ##################

package Bio::NEXUS::Matrix;

use strict;
#use Data::Dumper; # XXX this is not used, might as well not import it!
#use Carp; # XXX this is not used, might as well not import it!
use Bio::NEXUS::Functions;
use Bio::NEXUS::Block;
use Bio::NEXUS::Util::Logger;
use Bio::NEXUS::Util::Exceptions;
use vars qw(@ISA $VERSION $AUTOLOAD);
use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION;

@ISA = qw(Bio::NEXUS::Block);
my $logger = Bio::NEXUS::Util::Logger->new();

sub get_ntax {
    my $self = shift;

    if ( my $otuset = $self->get_otuset() ) {
        return $otuset->get_ntax();
    }
    elsif ( my $dimensions = $self->get_dimensions() ) {
        return $dimensions->{'ntax'};
    }
    else {
        return;
    }
}

sub set_nchar {
    my ( $self, $nchar ) = @_;
    $self->{'dimensions'}{'nchar'} = $nchar;
    return;
}

sub get_nchar {
    my $self = shift;

    if ( my $dimensions = $self->get_dimensions() ) {
        return $dimensions->{'nchar'};
    }
    else {
        return;
    }
}

sub _parse_format {
    my ( $self, $string ) = @_;

    my %format = ();

    my @format_tokens = @{ _parse_nexus_words($string) };
    while (@format_tokens) {

		# If the second thing in the list is a '=' (e.g. ('datatype', '=', 'standard') )
        if ( $format_tokens[1] && $format_tokens[1] eq '=' ) {
			if ( lc($format_tokens[0]) eq 'items' ) {
				# process items list 
	           	my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 );
             	$format{ 'items' } = $val;
             	if ( $val eq '(' ) {
					while ( $format{ 'items' } !~ /\)$/ ) {
						#print Dumper @format_tokens;
						$format{ 'items' } .= " " . shift( @format_tokens ); 
					}
				}
			}
			else {
            	#then set the first thing equal to the third
            	my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 );
            	$format{ lc $key } = $val;
        	}
      	}
        else {
            my $key = shift @format_tokens;

            # Otherwise, just set the first thing equal to TRUE
            $format{ lc $key } = 1;
        }
    }

    # Note: Treating flags and things with rvalues the same way is problematic--
    # how do you know whether a given format token has a count of 1, or if it
    # was merely present, and that's why it has a value of one.  One possible
    # way to make this more robust is to store flags in $format{'flags'},
    # e.g. $format{'flags'} = ['tokens', 'respectcase'];

    $self->set_format( \%format );
    return;
}

sub _validate_format {
    my ( $self, $format ) = @_;
    my $block_type = $self->get_type();

    # Currently, only Characters and Unaligned blocks are handled here--other
    # matrix-type blocks are treated as though their formatting is valid
    if ( $block_type !~ qr/^(?:characters|unaligned)$/i ) {
    	return $format;
    }

    $format->{'datatype'} ||= 'standard';   # 'standard' is the default datatype

	# tokens always true for continuous data (p. 601 of Maddison, et al, 1997)
    if ( $format->{'datatype'} =~ /^continuous$/i ) {
        if ( $format->{'notokens'} ) {
        	$logger->warn(
        		"notokens subcommand is incompatible with"
        		. "datatype=continuous subcommand in format statement"
        	);
        }     
        $format->{'tokens'} = 1;     
    }
        
    if ( $format->{'datatype'} =~ /^(?:dna|rna|nucleotide|protein|continuous)$/i ) {
        delete $format->{'respectcase'};
    }
    elsif ( $format->{'datatype'} eq 'standard' ) {
        if ( !$format->{'respectcase'} ) {
            for my $sub_cmd (qw/symbols missing gap matchar/) {
                $format->{$sub_cmd} = lc $format->{$sub_cmd}
                    if defined $format->{$sub_cmd};
            }
        }
    }
    else {
        $logger->warn(
        	"Unfamiliar datatype encountered in $block_type block: "
        	. "'$format->{'datatype'}' (continuing anyway)"
        );
    }

    return $format;
}

sub set_format {
    my ( $self, $format_hashref ) = @_;
    $self->{'format'} = $self->_validate_format($format_hashref);
}

sub get_format {
    my ( $self, $attribute ) = @_;
    $attribute
        ? return $self->{'format'}->{$attribute}
        : return $self->{'format'} || {};
}

sub add_taxlabels {
    my ( $self, $new_taxlabels ) = @_;
    my $current_taxlabels = $self->get_taxlabels();

    for my $new_label (@$new_taxlabels) {

        # Check to see if new_label is already in current_taxlabels
        if ( !defined first {/$new_label/} @$current_taxlabels ) {
            push @$current_taxlabels, $new_label;
        }
    }
    return;
}

sub _write_dimensions {
    my ( $self, $fh, $verbose ) = @_;
    $fh ||= \*STDOUT;

	# Arlin took out all ntax stuff, ntax only used in taxa block according to standard
	#
	# my $ntax  = $self->get_ntax();
    my $nchar = $self->get_nchar();

    return if !defined $nchar;# && !defined $ntax;

	# my $ntax_text  = $ntax  ? " ntax=$ntax"   : q{};
    my $nchar_text = $nchar ? " nchar=$nchar" : q{};

	# Tom: this code cannot be reached due to return above on !$nchar, right? -Arlin
	#
	if ( $self->get_type() eq 'characters' && !$nchar ) {
		Bio::NEXUS::Util::Exceptions::BadFormat->throw(
			'error' => "Characters blocks require that Dimensions:nchar be defined"
		);
	}

	# print $fh "\tDIMENSIONS$ntax_text$nchar_text;\n";
    print $fh "\tDIMENSIONS$nchar_text;\n";
    return;
}

sub _write_format {
    my ( $self, $fh, $verbose ) = @_;
    $fh ||= \*STDOUT;

    my %format_of = %{ $self->get_format() };
    if ( scalar keys %format_of ) {
        print $fh "\tFORMAT";

        print $fh " datatype=$format_of{'datatype'}"
            if defined $format_of{'datatype'};
        print $fh ' respectcase' if $format_of{'respectcase'};

        while ( my ( $key, $val ) = each %format_of ) {
            next if ( lc($key) eq 'interleave' ); 
            if ( !$val || ( $key =~ /(?:datatype|respectcase)/i ) ) { next; }
            elsif ( $val eq '1' ) {
                print $fh " $key";
            }
            else {
                print $fh " $key=$val";
            }
        }
        print $fh ";\n";
    }
    return;
}

sub AUTOLOAD {
    return if $AUTOLOAD =~ /DESTROY$/;
    my $package_name = __PACKAGE__ . '::';

    # The following methods are deprecated and are temporarily supported
    # via a warning and a redirection
    my %synonym_for = (

#        "${package_name}parse"      => "${package_name}_parse_tree",  # example
    );

    if ( defined $synonym_for{$AUTOLOAD} ) {
        $logger->warn("$AUTOLOAD() is deprecated; use $synonym_for{$AUTOLOAD}() instead");
        goto &{ $synonym_for{$AUTOLOAD} };
    }
    else {
        Bio::NEXUS::Util::Exceptions::UnknownMethod->throw(
        	'error' => "ERROR: Unknown method $AUTOLOAD called"
        );
    }
}

1;